1 |
/* |
2 |
* gfxaccel.cpp - Generic Native QuickDraw acceleration |
3 |
* |
4 |
* SheepShaver (C) 1997-2005 Marc Hellwig and Christian Bauer |
5 |
* |
6 |
* This program is free software; you can redistribute it and/or modify |
7 |
* it under the terms of the GNU General Public License as published by |
8 |
* the Free Software Foundation; either version 2 of the License, or |
9 |
* (at your option) any later version. |
10 |
* |
11 |
* This program is distributed in the hope that it will be useful, |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
15 |
* |
16 |
* You should have received a copy of the GNU General Public License |
17 |
* along with this program; if not, write to the Free Software |
18 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
*/ |
20 |
|
21 |
#include "sysdeps.h" |
22 |
|
23 |
#include "prefs.h" |
24 |
#include "video.h" |
25 |
#include "video_defs.h" |
26 |
|
27 |
#define DEBUG 0 |
28 |
#include "debug.h" |
29 |
|
30 |
|
31 |
/* |
32 |
* Utility functions |
33 |
*/ |
34 |
|
35 |
// Return bytes per pixel for requested depth |
36 |
static inline int bytes_per_pixel(int depth) |
37 |
{ |
38 |
int bpp; |
39 |
switch (depth) { |
40 |
case 8: |
41 |
bpp = 1; |
42 |
break; |
43 |
case 15: case 16: |
44 |
bpp = 2; |
45 |
break; |
46 |
case 24: case 32: |
47 |
bpp = 4; |
48 |
break; |
49 |
default: |
50 |
abort(); |
51 |
} |
52 |
return bpp; |
53 |
} |
54 |
|
55 |
// Pass-through dirty areas to redraw functions |
56 |
static inline void NQD_set_dirty_area(uint32 p) |
57 |
{ |
58 |
if (ReadMacInt32(p + acclDestBaseAddr) == screen_base) { |
59 |
int16 x = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2); |
60 |
int16 y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0); |
61 |
int16 w = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2); |
62 |
int16 h = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0); |
63 |
video_set_dirty_area(x, y, w, h); |
64 |
} |
65 |
} |
66 |
|
67 |
|
68 |
/* |
69 |
* Rectangle inversion |
70 |
*/ |
71 |
|
72 |
template< int bpp > |
73 |
static inline void do_invrect(uint8 *dest, uint32 length) |
74 |
{ |
75 |
#define INVERT_1(PTR, OFS) ((uint8 *)(PTR))[OFS] = ~((uint8 *)(PTR))[OFS] |
76 |
#define INVERT_2(PTR, OFS) ((uint16 *)(PTR))[OFS] = ~((uint16 *)(PTR))[OFS] |
77 |
#define INVERT_4(PTR, OFS) ((uint32 *)(PTR))[OFS] = ~((uint32 *)(PTR))[OFS] |
78 |
#define INVERT_8(PTR, OFS) ((uint64 *)(PTR))[OFS] = ~((uint64 *)(PTR))[OFS] |
79 |
|
80 |
#ifndef UNALIGNED_PROFITABLE |
81 |
// Align on 16-bit boundaries |
82 |
if (bpp < 16 && (((uintptr)dest) & 1)) { |
83 |
INVERT_1(dest, 0); |
84 |
dest += 1; length -= 1; |
85 |
} |
86 |
|
87 |
// Align on 32-bit boundaries |
88 |
if (bpp < 32 && (((uintptr)dest) & 2)) { |
89 |
INVERT_2(dest, 0); |
90 |
dest += 2; length -= 2; |
91 |
} |
92 |
#endif |
93 |
|
94 |
// Invert 8-byte words |
95 |
if (length >= 8) { |
96 |
const int r = (length / 8) % 8; |
97 |
dest += r * 8; |
98 |
|
99 |
int n = ((length / 8) + 7) / 8; |
100 |
switch (r) { |
101 |
case 0: do { |
102 |
dest += 64; |
103 |
INVERT_8(dest, -8); |
104 |
case 7: INVERT_8(dest, -7); |
105 |
case 6: INVERT_8(dest, -6); |
106 |
case 5: INVERT_8(dest, -5); |
107 |
case 4: INVERT_8(dest, -4); |
108 |
case 3: INVERT_8(dest, -3); |
109 |
case 2: INVERT_8(dest, -2); |
110 |
case 1: INVERT_8(dest, -1); |
111 |
} while (--n > 0); |
112 |
} |
113 |
} |
114 |
|
115 |
// 32-bit cell to invert? |
116 |
if (length & 4) { |
117 |
INVERT_4(dest, 0); |
118 |
if (bpp <= 16) |
119 |
dest += 4; |
120 |
} |
121 |
|
122 |
// 16-bit cell to invert? |
123 |
if (bpp <= 16 && (length & 2)) { |
124 |
INVERT_2(dest, 0); |
125 |
if (bpp <= 8) |
126 |
dest += 2; |
127 |
} |
128 |
|
129 |
// 8-bit cell to invert? |
130 |
if (bpp <= 8 && (length & 1)) |
131 |
INVERT_1(dest, 0); |
132 |
|
133 |
#undef INVERT_1 |
134 |
#undef INVERT_2 |
135 |
#undef INVERT_4 |
136 |
#undef INVERT_8 |
137 |
} |
138 |
|
139 |
void NQD_invrect(uint32 p) |
140 |
{ |
141 |
D(bug("accl_invrect %08x\n", p)); |
142 |
|
143 |
// Get inversion parameters |
144 |
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2); |
145 |
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0); |
146 |
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2); |
147 |
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0); |
148 |
D(bug(" dest X %d, dest Y %d\n", dest_X, dest_Y)); |
149 |
D(bug(" width %d, height %d, bytes_per_row %d\n", width, height, (int32)ReadMacInt32(p + acclDestRowBytes))); |
150 |
|
151 |
//!!?? pen_mode == 14 |
152 |
|
153 |
// And perform the inversion |
154 |
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclDestPixelSize)); |
155 |
const int dest_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes); |
156 |
uint8 *dest = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dest_row_bytes) + (dest_X * bpp)); |
157 |
width *= bpp; |
158 |
switch (bpp) { |
159 |
case 1: |
160 |
for (int i = 0; i < height; i++) { |
161 |
do_invrect<8>(dest, width); |
162 |
dest += dest_row_bytes; |
163 |
} |
164 |
break; |
165 |
case 2: |
166 |
for (int i = 0; i < height; i++) { |
167 |
do_invrect<16>(dest, width); |
168 |
dest += dest_row_bytes; |
169 |
} |
170 |
break; |
171 |
case 4: |
172 |
for (int i = 0; i < height; i++) { |
173 |
do_invrect<32>(dest, width); |
174 |
dest += dest_row_bytes; |
175 |
} |
176 |
break; |
177 |
} |
178 |
} |
179 |
|
180 |
|
181 |
/* |
182 |
* Rectangle filling |
183 |
*/ |
184 |
|
185 |
template< int bpp > |
186 |
static inline void do_fillrect(uint8 *dest, uint32 color, uint32 length) |
187 |
{ |
188 |
#define FILL_1(PTR, OFS, VAL) ((uint8 *)(PTR))[OFS] = (VAL) |
189 |
#define FILL_2(PTR, OFS, VAL) ((uint16 *)(PTR))[OFS] = (VAL) |
190 |
#define FILL_4(PTR, OFS, VAL) ((uint32 *)(PTR))[OFS] = (VAL) |
191 |
#define FILL_8(PTR, OFS, VAL) ((uint64 *)(PTR))[OFS] = (VAL) |
192 |
|
193 |
#ifndef UNALIGNED_PROFITABLE |
194 |
// Align on 16-bit boundaries |
195 |
if (bpp < 16 && (((uintptr)dest) & 1)) { |
196 |
FILL_1(dest, 0, color); |
197 |
dest += 1; length -= 1; |
198 |
} |
199 |
|
200 |
// Align on 32-bit boundaries |
201 |
if (bpp < 32 && (((uintptr)dest) & 2)) { |
202 |
FILL_2(dest, 0, color); |
203 |
dest += 2; length -= 2; |
204 |
} |
205 |
#endif |
206 |
|
207 |
// Fill 8-byte words |
208 |
if (length >= 8) { |
209 |
const uint64 c = (((uint64)color) << 32) | color; |
210 |
const int r = (length / 8) % 8; |
211 |
dest += r * 8; |
212 |
|
213 |
int n = ((length / 8) + 7) / 8; |
214 |
switch (r) { |
215 |
case 0: do { |
216 |
dest += 64; |
217 |
FILL_8(dest, -8, c); |
218 |
case 7: FILL_8(dest, -7, c); |
219 |
case 6: FILL_8(dest, -6, c); |
220 |
case 5: FILL_8(dest, -5, c); |
221 |
case 4: FILL_8(dest, -4, c); |
222 |
case 3: FILL_8(dest, -3, c); |
223 |
case 2: FILL_8(dest, -2, c); |
224 |
case 1: FILL_8(dest, -1, c); |
225 |
} while (--n > 0); |
226 |
} |
227 |
} |
228 |
|
229 |
// 32-bit cell to fill? |
230 |
if (length & 4) { |
231 |
FILL_4(dest, 0, color); |
232 |
if (bpp <= 16) |
233 |
dest += 4; |
234 |
} |
235 |
|
236 |
// 16-bit cell to fill? |
237 |
if (bpp <= 16 && (length & 2)) { |
238 |
FILL_2(dest, 0, color); |
239 |
if (bpp <= 8) |
240 |
dest += 2; |
241 |
} |
242 |
|
243 |
// 8-bit cell to fill? |
244 |
if (bpp <= 8 && (length & 1)) |
245 |
FILL_1(dest, 0, color); |
246 |
|
247 |
#undef FILL_1 |
248 |
#undef FILL_2 |
249 |
#undef FILL_4 |
250 |
#undef FILL_8 |
251 |
} |
252 |
|
253 |
void NQD_fillrect(uint32 p) |
254 |
{ |
255 |
D(bug("accl_fillrect %08x\n", p)); |
256 |
|
257 |
// Get filling parameters |
258 |
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2); |
259 |
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0); |
260 |
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2); |
261 |
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0); |
262 |
uint32 color = htonl(ReadMacInt32(p + acclPenMode) == 8 ? ReadMacInt32(p + acclForePen) : ReadMacInt32(p + acclBackPen)); |
263 |
D(bug(" dest X %d, dest Y %d\n", dest_X, dest_Y)); |
264 |
D(bug(" width %d, height %d\n", width, height)); |
265 |
D(bug(" bytes_per_row %d color %08x\n", (int32)ReadMacInt32(p + acclDestRowBytes), color)); |
266 |
|
267 |
// And perform the fill |
268 |
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclDestPixelSize)); |
269 |
const int dest_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes); |
270 |
uint8 *dest = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dest_row_bytes) + (dest_X * bpp)); |
271 |
width *= bpp; |
272 |
switch (bpp) { |
273 |
case 1: |
274 |
for (int i = 0; i < height; i++) { |
275 |
memset(dest, color, width); |
276 |
dest += dest_row_bytes; |
277 |
} |
278 |
break; |
279 |
case 2: |
280 |
for (int i = 0; i < height; i++) { |
281 |
do_fillrect<16>(dest, color, width); |
282 |
dest += dest_row_bytes; |
283 |
} |
284 |
break; |
285 |
case 4: |
286 |
for (int i = 0; i < height; i++) { |
287 |
do_fillrect<32>(dest, color, width); |
288 |
dest += dest_row_bytes; |
289 |
} |
290 |
break; |
291 |
} |
292 |
} |
293 |
|
294 |
bool NQD_fillrect_hook(uint32 p) |
295 |
{ |
296 |
D(bug("accl_fillrect_hook %08x\n", p)); |
297 |
NQD_set_dirty_area(p); |
298 |
|
299 |
// Check if we can accelerate this fillrect |
300 |
if (ReadMacInt32(p + 0x284) != 0 && ReadMacInt32(p + acclDestPixelSize) >= 8) { |
301 |
const int transfer_mode = ReadMacInt32(p + acclTransferMode); |
302 |
if (transfer_mode == 8) { |
303 |
// Fill |
304 |
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_NQD_FILLRECT)); |
305 |
return true; |
306 |
} |
307 |
else if (transfer_mode == 10) { |
308 |
// Invert |
309 |
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_NQD_INVRECT)); |
310 |
return true; |
311 |
} |
312 |
} |
313 |
return false; |
314 |
} |
315 |
|
316 |
|
317 |
/* |
318 |
* Isomorphic rectangle blitting |
319 |
*/ |
320 |
|
321 |
void NQD_bitblt(uint32 p) |
322 |
{ |
323 |
D(bug("accl_bitblt %08x\n", p)); |
324 |
|
325 |
// Get blitting parameters |
326 |
int16 src_X = (int16)ReadMacInt16(p + acclSrcRect + 2) - (int16)ReadMacInt16(p + acclSrcBoundsRect + 2); |
327 |
int16 src_Y = (int16)ReadMacInt16(p + acclSrcRect + 0) - (int16)ReadMacInt16(p + acclSrcBoundsRect + 0); |
328 |
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2); |
329 |
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0); |
330 |
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2); |
331 |
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0); |
332 |
D(bug(" src addr %08x, dest addr %08x\n", ReadMacInt32(p + acclSrcBaseAddr), ReadMacInt32(p + acclDestBaseAddr))); |
333 |
D(bug(" src X %d, src Y %d, dest X %d, dest Y %d\n", src_X, src_Y, dest_X, dest_Y)); |
334 |
D(bug(" width %d, height %d\n", width, height)); |
335 |
|
336 |
// And perform the blit |
337 |
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclSrcPixelSize)); |
338 |
width *= bpp; |
339 |
if ((int32)ReadMacInt32(p + acclSrcRowBytes) > 0) { |
340 |
const int src_row_bytes = (int32)ReadMacInt32(p + acclSrcRowBytes); |
341 |
const int dst_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes); |
342 |
uint8 *src = Mac2HostAddr(ReadMacInt32(p + acclSrcBaseAddr) + (src_Y * src_row_bytes) + (src_X * bpp)); |
343 |
uint8 *dst = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dst_row_bytes) + (dest_X * bpp)); |
344 |
for (int i = 0; i < height; i++) { |
345 |
memmove(dst, src, width); |
346 |
src += src_row_bytes; |
347 |
dst += dst_row_bytes; |
348 |
} |
349 |
} |
350 |
else { |
351 |
const int src_row_bytes = -(int32)ReadMacInt32(p + acclSrcRowBytes); |
352 |
const int dst_row_bytes = -(int32)ReadMacInt32(p + acclDestRowBytes); |
353 |
uint8 *src = Mac2HostAddr(ReadMacInt32(p + acclSrcBaseAddr) + ((src_Y + height - 1) * src_row_bytes) + (src_X * bpp)); |
354 |
uint8 *dst = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + ((dest_Y + height - 1) * dst_row_bytes) + (dest_X * bpp)); |
355 |
for (int i = height - 1; i >= 0; i--) { |
356 |
memmove(dst, src, width); |
357 |
src -= src_row_bytes; |
358 |
dst -= dst_row_bytes; |
359 |
} |
360 |
} |
361 |
} |
362 |
|
363 |
/* |
364 |
BitBlt transfer modes: |
365 |
0 : srcCopy |
366 |
1 : srcOr |
367 |
2 : srcXor |
368 |
3 : srcBic |
369 |
4 : notSrcCopy |
370 |
5 : notSrcOr |
371 |
6 : notSrcXor |
372 |
7 : notSrcBic |
373 |
32 : blend |
374 |
33 : addPin |
375 |
34 : addOver |
376 |
35 : subPin |
377 |
36 : transparent |
378 |
37 : adMax |
379 |
38 : subOver |
380 |
39 : adMin |
381 |
50 : hilite |
382 |
*/ |
383 |
|
384 |
bool NQD_bitblt_hook(uint32 p) |
385 |
{ |
386 |
D(bug("accl_draw_hook %08x\n", p)); |
387 |
NQD_set_dirty_area(p); |
388 |
|
389 |
// Check if we can accelerate this bitblt |
390 |
if (ReadMacInt32(p + 0x018) + ReadMacInt32(p + 0x128) == 0 && |
391 |
ReadMacInt32(p + 0x130) == 0 && |
392 |
ReadMacInt32(p + acclSrcPixelSize) >= 8 && |
393 |
ReadMacInt32(p + acclSrcPixelSize) == ReadMacInt32(p + acclDestPixelSize) && |
394 |
(int32)(ReadMacInt32(p + acclSrcRowBytes) ^ ReadMacInt32(p + acclDestRowBytes)) >= 0 && // same sign? |
395 |
ReadMacInt32(p + acclTransferMode) == 0 && // srcCopy? |
396 |
(int32)ReadMacInt32(p + 0x15c) > 0) { |
397 |
|
398 |
// Yes, set function pointer |
399 |
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_NQD_BITBLT)); |
400 |
return true; |
401 |
} |
402 |
return false; |
403 |
} |
404 |
|
405 |
// Unknown hook |
406 |
bool NQD_unknown_hook(uint32 arg) |
407 |
{ |
408 |
D(bug("accl_unknown_hook %08x\n", arg)); |
409 |
NQD_set_dirty_area(arg); |
410 |
|
411 |
return false; |
412 |
} |
413 |
|
414 |
// Wait for graphics operation to finish |
415 |
bool NQD_sync_hook(uint32 arg) |
416 |
{ |
417 |
D(bug("accl_sync_hook %08x\n", arg)); |
418 |
return true; |
419 |
} |
420 |
|
421 |
|
422 |
/* |
423 |
* Install Native QuickDraw acceleration hooks |
424 |
*/ |
425 |
|
426 |
void VideoInstallAccel(void) |
427 |
{ |
428 |
// Install acceleration hooks |
429 |
if (PrefsFindBool("gfxaccel")) { |
430 |
D(bug("Video: Installing acceleration hooks\n")); |
431 |
uint32 base; |
432 |
|
433 |
SheepVar bitblt_hook_info(sizeof(accl_hook_info)); |
434 |
base = bitblt_hook_info.addr(); |
435 |
WriteMacInt32(base + 0, NativeTVECT(NATIVE_NQD_BITBLT_HOOK)); |
436 |
WriteMacInt32(base + 4, NativeTVECT(NATIVE_NQD_SYNC_HOOK)); |
437 |
WriteMacInt32(base + 8, ACCL_BITBLT); |
438 |
NQDMisc(6, bitblt_hook_info.addr()); |
439 |
|
440 |
SheepVar fillrect_hook_info(sizeof(accl_hook_info)); |
441 |
base = fillrect_hook_info.addr(); |
442 |
WriteMacInt32(base + 0, NativeTVECT(NATIVE_NQD_FILLRECT_HOOK)); |
443 |
WriteMacInt32(base + 4, NativeTVECT(NATIVE_NQD_SYNC_HOOK)); |
444 |
WriteMacInt32(base + 8, ACCL_FILLRECT); |
445 |
NQDMisc(6, fillrect_hook_info.addr()); |
446 |
|
447 |
for (int op = 0; op < 8; op++) { |
448 |
switch (op) { |
449 |
case ACCL_BITBLT: |
450 |
case ACCL_FILLRECT: |
451 |
continue; |
452 |
} |
453 |
SheepVar unknown_hook_info(sizeof(accl_hook_info)); |
454 |
base = unknown_hook_info.addr(); |
455 |
WriteMacInt32(base + 0, NativeTVECT(NATIVE_NQD_UNKNOWN_HOOK)); |
456 |
WriteMacInt32(base + 4, NativeTVECT(NATIVE_NQD_SYNC_HOOK)); |
457 |
WriteMacInt32(base + 8, op); |
458 |
NQDMisc(6, unknown_hook_info.addr()); |
459 |
} |
460 |
} |
461 |
} |