1 |
/* |
2 |
* gfxaccel.cpp - Generic Native QuickDraw acceleration |
3 |
* |
4 |
* SheepShaver (C) 1997-2005 Marc Hellwig and Christian Bauer |
5 |
* |
6 |
* This program is free software; you can redistribute it and/or modify |
7 |
* it under the terms of the GNU General Public License as published by |
8 |
* the Free Software Foundation; either version 2 of the License, or |
9 |
* (at your option) any later version. |
10 |
* |
11 |
* This program is distributed in the hope that it will be useful, |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
15 |
* |
16 |
* You should have received a copy of the GNU General Public License |
17 |
* along with this program; if not, write to the Free Software |
18 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
*/ |
20 |
|
21 |
#include "sysdeps.h" |
22 |
|
23 |
#include "prefs.h" |
24 |
#include "video.h" |
25 |
#include "video_defs.h" |
26 |
|
27 |
#define DEBUG 0 |
28 |
#include "debug.h" |
29 |
|
30 |
|
31 |
/* |
32 |
* Utility functions |
33 |
*/ |
34 |
|
35 |
// Return bytes per pixel for requested depth |
36 |
static inline int bytes_per_pixel(int depth) |
37 |
{ |
38 |
int bpp; |
39 |
switch (depth) { |
40 |
case 8: |
41 |
bpp = 1; |
42 |
break; |
43 |
case 15: case 16: |
44 |
bpp = 2; |
45 |
break; |
46 |
case 24: case 32: |
47 |
bpp = 4; |
48 |
break; |
49 |
default: |
50 |
abort(); |
51 |
} |
52 |
return bpp; |
53 |
} |
54 |
|
55 |
|
56 |
/* |
57 |
* Rectangle inversion |
58 |
*/ |
59 |
|
60 |
template< int bpp > |
61 |
static inline void do_invrect(uint8 *dest, uint32 length) |
62 |
{ |
63 |
#define INVERT_1(PTR, OFS) ((uint8 *)(PTR))[OFS] = ~((uint8 *)(PTR))[OFS] |
64 |
#define INVERT_2(PTR, OFS) ((uint16 *)(PTR))[OFS] = ~((uint16 *)(PTR))[OFS] |
65 |
#define INVERT_4(PTR, OFS) ((uint32 *)(PTR))[OFS] = ~((uint32 *)(PTR))[OFS] |
66 |
#define INVERT_8(PTR, OFS) ((uint64 *)(PTR))[OFS] = ~((uint64 *)(PTR))[OFS] |
67 |
|
68 |
#ifndef UNALIGNED_PROFITABLE |
69 |
// Align on 16-bit boundaries |
70 |
if (bpp < 16 && (((uintptr)dest) & 1)) { |
71 |
INVERT_1(dest, 0); |
72 |
dest += 1; length -= 1; |
73 |
} |
74 |
|
75 |
// Align on 32-bit boundaries |
76 |
if (bpp < 32 && (((uintptr)dest) & 2)) { |
77 |
INVERT_2(dest, 0); |
78 |
dest += 2; length -= 2; |
79 |
} |
80 |
#endif |
81 |
|
82 |
// Invert 8-byte words |
83 |
if (length >= 8) { |
84 |
const int r = (length / 8) % 8; |
85 |
dest += r * 8; |
86 |
|
87 |
int n = ((length / 8) + 7) / 8; |
88 |
switch (r) { |
89 |
case 0: do { |
90 |
dest += 64; |
91 |
INVERT_8(dest, -8); |
92 |
case 7: INVERT_8(dest, -7); |
93 |
case 6: INVERT_8(dest, -6); |
94 |
case 5: INVERT_8(dest, -5); |
95 |
case 4: INVERT_8(dest, -4); |
96 |
case 3: INVERT_8(dest, -3); |
97 |
case 2: INVERT_8(dest, -2); |
98 |
case 1: INVERT_8(dest, -1); |
99 |
} while (--n > 0); |
100 |
} |
101 |
} |
102 |
|
103 |
// 32-bit cell to invert? |
104 |
if (length & 4) { |
105 |
INVERT_4(dest, 0); |
106 |
if (bpp <= 16) |
107 |
dest += 4; |
108 |
} |
109 |
|
110 |
// 16-bit cell to invert? |
111 |
if (bpp <= 16 && (length & 2)) { |
112 |
INVERT_2(dest, 0); |
113 |
if (bpp <= 8) |
114 |
dest += 2; |
115 |
} |
116 |
|
117 |
// 8-bit cell to invert? |
118 |
if (bpp <= 8 && (length & 1)) |
119 |
INVERT_1(dest, 0); |
120 |
|
121 |
#undef INVERT_1 |
122 |
#undef INVERT_2 |
123 |
#undef INVERT_4 |
124 |
#undef INVERT_8 |
125 |
} |
126 |
|
127 |
void NQD_invrect(uint32 p) |
128 |
{ |
129 |
D(bug("accl_invrect %08x\n", p)); |
130 |
|
131 |
// Get inversion parameters |
132 |
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2); |
133 |
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0); |
134 |
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2); |
135 |
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0); |
136 |
D(bug(" dest X %d, dest Y %d\n", dest_X, dest_Y)); |
137 |
D(bug(" width %d, height %d, bytes_per_row %d\n", width, height, (int32)ReadMacInt32(p + acclDestRowBytes))); |
138 |
|
139 |
//!!?? pen_mode == 14 |
140 |
|
141 |
// And perform the inversion |
142 |
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclDestPixelSize)); |
143 |
const int dest_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes); |
144 |
uint8 *dest = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dest_row_bytes) + (dest_X * bpp)); |
145 |
width *= bpp; |
146 |
switch (bpp) { |
147 |
case 1: |
148 |
for (int i = 0; i < height; i++) { |
149 |
do_invrect<8>(dest, width); |
150 |
dest += dest_row_bytes; |
151 |
} |
152 |
break; |
153 |
case 2: |
154 |
for (int i = 0; i < height; i++) { |
155 |
do_invrect<16>(dest, width); |
156 |
dest += dest_row_bytes; |
157 |
} |
158 |
break; |
159 |
case 4: |
160 |
for (int i = 0; i < height; i++) { |
161 |
do_invrect<32>(dest, width); |
162 |
dest += dest_row_bytes; |
163 |
} |
164 |
break; |
165 |
} |
166 |
} |
167 |
|
168 |
|
169 |
/* |
170 |
* Rectangle filling |
171 |
*/ |
172 |
|
173 |
template< int bpp > |
174 |
static inline void do_fillrect(uint8 *dest, uint32 color, uint32 length) |
175 |
{ |
176 |
#define FILL_1(PTR, OFS, VAL) ((uint8 *)(PTR))[OFS] = (VAL) |
177 |
#define FILL_2(PTR, OFS, VAL) ((uint16 *)(PTR))[OFS] = (VAL) |
178 |
#define FILL_4(PTR, OFS, VAL) ((uint32 *)(PTR))[OFS] = (VAL) |
179 |
#define FILL_8(PTR, OFS, VAL) ((uint64 *)(PTR))[OFS] = (VAL) |
180 |
|
181 |
#ifndef UNALIGNED_PROFITABLE |
182 |
// Align on 16-bit boundaries |
183 |
if (bpp < 16 && (((uintptr)dest) & 1)) { |
184 |
FILL_1(dest, 0, color); |
185 |
dest += 1; length -= 1; |
186 |
} |
187 |
|
188 |
// Align on 32-bit boundaries |
189 |
if (bpp < 32 && (((uintptr)dest) & 2)) { |
190 |
FILL_2(dest, 0, color); |
191 |
dest += 2; length -= 2; |
192 |
} |
193 |
#endif |
194 |
|
195 |
// Fill 8-byte words |
196 |
if (length >= 8) { |
197 |
const uint64 c = (((uint64)color) << 32) | color; |
198 |
const int r = (length / 8) % 8; |
199 |
dest += r * 8; |
200 |
|
201 |
int n = ((length / 8) + 7) / 8; |
202 |
switch (r) { |
203 |
case 0: do { |
204 |
dest += 64; |
205 |
FILL_8(dest, -8, c); |
206 |
case 7: FILL_8(dest, -7, c); |
207 |
case 6: FILL_8(dest, -6, c); |
208 |
case 5: FILL_8(dest, -5, c); |
209 |
case 4: FILL_8(dest, -4, c); |
210 |
case 3: FILL_8(dest, -3, c); |
211 |
case 2: FILL_8(dest, -2, c); |
212 |
case 1: FILL_8(dest, -1, c); |
213 |
} while (--n > 0); |
214 |
} |
215 |
} |
216 |
|
217 |
// 32-bit cell to fill? |
218 |
if (length & 4) { |
219 |
FILL_4(dest, 0, color); |
220 |
if (bpp <= 16) |
221 |
dest += 4; |
222 |
} |
223 |
|
224 |
// 16-bit cell to fill? |
225 |
if (bpp <= 16 && (length & 2)) { |
226 |
FILL_2(dest, 0, color); |
227 |
if (bpp <= 8) |
228 |
dest += 2; |
229 |
} |
230 |
|
231 |
// 8-bit cell to fill? |
232 |
if (bpp <= 8 && (length & 1)) |
233 |
FILL_1(dest, 0, color); |
234 |
|
235 |
#undef FILL_1 |
236 |
#undef FILL_2 |
237 |
#undef FILL_4 |
238 |
#undef FILL_8 |
239 |
} |
240 |
|
241 |
void NQD_fillrect(uint32 p) |
242 |
{ |
243 |
D(bug("accl_fillrect %08x\n", p)); |
244 |
|
245 |
// Get filling parameters |
246 |
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2); |
247 |
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0); |
248 |
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2); |
249 |
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0); |
250 |
uint32 color = htonl(ReadMacInt32(p + acclPenMode) == 8 ? ReadMacInt32(p + acclForePen) : ReadMacInt32(p + acclBackPen)); |
251 |
D(bug(" dest X %d, dest Y %d\n", dest_X, dest_Y)); |
252 |
D(bug(" width %d, height %d\n", width, height)); |
253 |
D(bug(" bytes_per_row %d color %08x\n", (int32)ReadMacInt32(p + acclDestRowBytes), color)); |
254 |
|
255 |
// And perform the fill |
256 |
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclDestPixelSize)); |
257 |
const int dest_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes); |
258 |
uint8 *dest = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dest_row_bytes) + (dest_X * bpp)); |
259 |
width *= bpp; |
260 |
switch (bpp) { |
261 |
case 1: |
262 |
for (int i = 0; i < height; i++) { |
263 |
memset(dest, color, width); |
264 |
dest += dest_row_bytes; |
265 |
} |
266 |
break; |
267 |
case 2: |
268 |
for (int i = 0; i < height; i++) { |
269 |
do_fillrect<16>(dest, color, width); |
270 |
dest += dest_row_bytes; |
271 |
} |
272 |
break; |
273 |
case 4: |
274 |
for (int i = 0; i < height; i++) { |
275 |
do_fillrect<32>(dest, color, width); |
276 |
dest += dest_row_bytes; |
277 |
} |
278 |
break; |
279 |
} |
280 |
} |
281 |
|
282 |
bool NQD_fillrect_hook(uint32 p) |
283 |
{ |
284 |
D(bug("accl_fillrect_hook %08x\n", p)); |
285 |
|
286 |
// Check if we can accelerate this fillrect |
287 |
if (ReadMacInt32(p + 0x284) != 0 && ReadMacInt32(p + acclDestPixelSize) >= 8) { |
288 |
const int transfer_mode = ReadMacInt32(p + acclTransferMode); |
289 |
if (transfer_mode == 8) { |
290 |
// Fill |
291 |
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_FILLRECT)); |
292 |
return true; |
293 |
} |
294 |
else if (transfer_mode == 10) { |
295 |
// Invert |
296 |
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_INVRECT)); |
297 |
return true; |
298 |
} |
299 |
} |
300 |
return false; |
301 |
} |
302 |
|
303 |
|
304 |
/* |
305 |
* Isomorphic rectangle blitting |
306 |
*/ |
307 |
|
308 |
// TODO: optimize for VOSF and target pixmap == screen |
309 |
void NQD_bitblt(uint32 p) |
310 |
{ |
311 |
D(bug("accl_bitblt %08x\n", p)); |
312 |
|
313 |
// Get blitting parameters |
314 |
int16 src_X = (int16)ReadMacInt16(p + acclSrcRect + 2) - (int16)ReadMacInt16(p + acclSrcBoundsRect + 2); |
315 |
int16 src_Y = (int16)ReadMacInt16(p + acclSrcRect + 0) - (int16)ReadMacInt16(p + acclSrcBoundsRect + 0); |
316 |
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2); |
317 |
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0); |
318 |
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2); |
319 |
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0); |
320 |
D(bug(" src addr %08x, dest addr %08x\n", ReadMacInt32(p + acclSrcBaseAddr), ReadMacInt32(p + acclDestBaseAddr))); |
321 |
D(bug(" src X %d, src Y %d, dest X %d, dest Y %d\n", src_X, src_Y, dest_X, dest_Y)); |
322 |
D(bug(" width %d, height %d\n", width, height)); |
323 |
|
324 |
// And perform the blit |
325 |
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclSrcPixelSize)); |
326 |
width *= bpp; |
327 |
if ((int32)ReadMacInt32(p + acclSrcRowBytes) > 0) { |
328 |
const int src_row_bytes = (int32)ReadMacInt32(p + acclSrcRowBytes); |
329 |
const int dst_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes); |
330 |
uint8 *src = Mac2HostAddr(ReadMacInt32(p + acclSrcBaseAddr) + (src_Y * src_row_bytes) + (src_X * bpp)); |
331 |
uint8 *dst = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dst_row_bytes) + (dest_X * bpp)); |
332 |
for (int i = 0; i < height; i++) { |
333 |
memmove(dst, src, width); |
334 |
src += src_row_bytes; |
335 |
dst += dst_row_bytes; |
336 |
} |
337 |
} |
338 |
else { |
339 |
const int src_row_bytes = -(int32)ReadMacInt32(p + acclSrcRowBytes); |
340 |
const int dst_row_bytes = -(int32)ReadMacInt32(p + acclDestRowBytes); |
341 |
uint8 *src = Mac2HostAddr(ReadMacInt32(p + acclSrcBaseAddr) + ((src_Y + height - 1) * src_row_bytes) + (src_X * bpp)); |
342 |
uint8 *dst = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + ((dest_Y + height - 1) * dst_row_bytes) + (dest_X * bpp)); |
343 |
for (int i = height - 1; i >= 0; i--) { |
344 |
memmove(dst, src, width); |
345 |
src -= src_row_bytes; |
346 |
dst -= dst_row_bytes; |
347 |
} |
348 |
} |
349 |
} |
350 |
|
351 |
/* |
352 |
BitBlt transfer modes: |
353 |
0 : srcCopy |
354 |
1 : srcOr |
355 |
2 : srcXor |
356 |
3 : srcBic |
357 |
4 : notSrcCopy |
358 |
5 : notSrcOr |
359 |
6 : notSrcXor |
360 |
7 : notSrcBic |
361 |
32 : blend |
362 |
33 : addPin |
363 |
34 : addOver |
364 |
35 : subPin |
365 |
36 : transparent |
366 |
37 : adMax |
367 |
38 : subOver |
368 |
39 : adMin |
369 |
50 : hilite |
370 |
*/ |
371 |
|
372 |
bool NQD_bitblt_hook(uint32 p) |
373 |
{ |
374 |
D(bug("accl_draw_hook %08x\n", p)); |
375 |
|
376 |
// Check if we can accelerate this bitblt |
377 |
if (ReadMacInt32(p + 0x018) + ReadMacInt32(p + 0x128) == 0 && |
378 |
ReadMacInt32(p + 0x130) == 0 && |
379 |
ReadMacInt32(p + acclSrcPixelSize) >= 8 && |
380 |
ReadMacInt32(p + acclSrcPixelSize) == ReadMacInt32(p + acclDestPixelSize) && |
381 |
(ReadMacInt32(p + acclSrcRowBytes) ^ ReadMacInt32(p + acclDestRowBytes)) >= 0 && // same sign? |
382 |
ReadMacInt32(p + acclTransferMode) == 0 && // srcCopy? |
383 |
ReadMacInt32(p + 0x15c) > 0) { |
384 |
|
385 |
// Yes, set function pointer |
386 |
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_BITBLT)); |
387 |
return true; |
388 |
} |
389 |
return false; |
390 |
} |
391 |
|
392 |
// Wait for graphics operation to finish |
393 |
bool NQD_sync_hook(uint32 arg) |
394 |
{ |
395 |
D(bug("accl_sync_hook %08x\n", arg)); |
396 |
return true; |
397 |
} |
398 |
|
399 |
|
400 |
/* |
401 |
* Install Native QuickDraw acceleration hooks |
402 |
*/ |
403 |
|
404 |
void VideoInstallAccel(void) |
405 |
{ |
406 |
// Install acceleration hooks |
407 |
if (PrefsFindBool("gfxaccel")) { |
408 |
D(bug("Video: Installing acceleration hooks\n")); |
409 |
uint32 base; |
410 |
|
411 |
SheepVar bitblt_hook_info(sizeof(accl_hook_info)); |
412 |
base = bitblt_hook_info.addr(); |
413 |
WriteMacInt32(base + 0, NativeTVECT(NATIVE_BITBLT_HOOK)); |
414 |
WriteMacInt32(base + 4, NativeTVECT(NATIVE_SYNC_HOOK)); |
415 |
WriteMacInt32(base + 8, ACCL_BITBLT); |
416 |
NQDMisc(6, bitblt_hook_info.addr()); |
417 |
|
418 |
SheepVar fillrect_hook_info(sizeof(accl_hook_info)); |
419 |
base = fillrect_hook_info.addr(); |
420 |
WriteMacInt32(base + 0, NativeTVECT(NATIVE_FILLRECT_HOOK)); |
421 |
WriteMacInt32(base + 4, NativeTVECT(NATIVE_SYNC_HOOK)); |
422 |
WriteMacInt32(base + 8, ACCL_FILLRECT); |
423 |
NQDMisc(6, fillrect_hook_info.addr()); |
424 |
} |
425 |
} |