ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp (file contents):
Revision 1.6 by gbeauche, 2002-10-03T16:13:46Z vs.
Revision 1.20 by gbeauche, 2004-11-01T16:01:51Z

# Line 3 | Line 3
3   *
4   *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5   *
6 < *  Adaptation for Basilisk II and improvements, copyright 2000-2002
6 > *  Adaptation for Basilisk II and improvements, copyright 2000-2004
7   *    Gwenole Beauchesne
8   *
9 < *  Basilisk II (C) 1997-2002 Christian Bauer
9 > *  Basilisk II (C) 1997-2004 Christian Bauer
10 > *
11 > *  Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12   *  
13   *  This program is free software; you can redistribute it and/or modify
14   *  it under the terms of the GNU General Public License as published by
# Line 40 | Line 42
42   #define EBP_INDEX 5
43   #define ESI_INDEX 6
44   #define EDI_INDEX 7
45 + #if defined(__x86_64__)
46 + #define R8_INDEX  8
47 + #define R9_INDEX  9
48 + #define R10_INDEX 10
49 + #define R11_INDEX 11
50 + #define R12_INDEX 12
51 + #define R13_INDEX 13
52 + #define R14_INDEX 14
53 + #define R15_INDEX 15
54 + #endif
55  
56   /* The register in which subroutines return an integer return value */
57 < #define REG_RESULT 0
57 > #define REG_RESULT EAX_INDEX
58  
59   /* The registers subroutines take their first and second argument in */
60   #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61   /* Handle the _fastcall parameters of ECX and EDX */
62 < #define REG_PAR1 1
63 < #define REG_PAR2 2
62 > #define REG_PAR1 ECX_INDEX
63 > #define REG_PAR2 EDX_INDEX
64 > #elif defined(__x86_64__)
65 > #define REG_PAR1 EDI_INDEX
66 > #define REG_PAR2 ESI_INDEX
67   #else
68 < #define REG_PAR1 0
69 < #define REG_PAR2 2
68 > #define REG_PAR1 EAX_INDEX
69 > #define REG_PAR2 EDX_INDEX
70   #endif
71  
72 < /* Three registers that are not used for any of the above */
58 < #define REG_NOPAR1 6
59 < #define REG_NOPAR2 5
60 < #define REG_NOPAR3 3
61 <
62 < #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
72 > #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73   #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 < #define REG_PC_TMP 0
74 > #define REG_PC_TMP EAX_INDEX
75   #else
76 < #define REG_PC_TMP 1 /* Another register that is not the above */
76 > #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77   #endif
78  
79 < #define SHIFTCOUNT_NREG 1  /* Register that can be used for shiftcount.
79 > #define SHIFTCOUNT_NREG ECX_INDEX  /* Register that can be used for shiftcount.
80                                -1 if any reg will do */
81 < #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
82 < #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
81 > #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82 > #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83  
84   uae_s8 always_used[]={4,-1};
85 + #if defined(__x86_64__)
86 + uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87 + uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88 + #else
89   uae_s8 can_byte[]={0,1,2,3,-1};
90   uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 + #endif
92  
93 + #if USE_OPTIMIZED_CALLS
94 + /* Make sure interpretive core does not use cpuopti */
95 + uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 + #error FIXME: code not ready
97 + #else
98   /* cpuopti mutate instruction handlers to assume registers are saved
99     by the caller */
100 < uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
100 > uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 > #endif
102  
103   /* This *should* be the same as call_saved. But:
104     - We might not really know which registers are saved, and which aren't,
# Line 86 | Line 107 | uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
107     - Special registers (such like the stack pointer) should not be "preserved"
108       by pushing, even though they are "saved" across function calls
109   */
110 < uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
110 > uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1};
111  
112   /* Whether classes of instructions do or don't clobber the native flags */
113   #define CLOBBER_MOV
# Line 111 | Line 132 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
132   #define CLOBBER_TEST clobber_flags()
133   #define CLOBBER_CL16
134   #define CLOBBER_CL8  
135 + #define CLOBBER_SE32
136   #define CLOBBER_SE16
137   #define CLOBBER_SE8
138 + #define CLOBBER_ZE32
139   #define CLOBBER_ZE16
140   #define CLOBBER_ZE8
141   #define CLOBBER_SW16 clobber_flags()
# Line 122 | Line 145 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
145   #define CLOBBER_BT   clobber_flags()
146   #define CLOBBER_BSF  clobber_flags()
147  
148 + /* FIXME: disabled until that's proofread.  */
149 + #if defined(__x86_64__)
150 + #define USE_NEW_RTASM 1
151 + #endif
152 +
153 + #if USE_NEW_RTASM
154 +
155 + #if defined(__x86_64__)
156 + #define X86_TARGET_64BIT                1
157 + #endif
158 + #define X86_FLAT_REGISTERS              0
159 + #define X86_OPTIMIZE_ALU                1
160 + #define X86_OPTIMIZE_ROTSHI             1
161 + #include "codegen_x86.h"
162 +
163 + #define x86_emit_byte(B)                emit_byte(B)
164 + #define x86_emit_word(W)                emit_word(W)
165 + #define x86_emit_long(L)                emit_long(L)
166 + #define x86_emit_quad(Q)                emit_quad(Q)
167 + #define x86_get_target()                get_target()
168 + #define x86_emit_failure(MSG)   jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
169 +
170 + static void jit_fail(const char *msg, const char *file, int line, const char *function)
171 + {
172 +        fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
173 +                        function, file, line, msg);
174 +        abort();
175 + }
176 +
177 + LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
178 + {
179 + #if defined(__x86_64__)
180 +        PUSHQr(r);
181 + #else
182 +        PUSHLr(r);
183 + #endif
184 + }
185 + LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
186 +
187 + LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
188 + {
189 + #if defined(__x86_64__)
190 +        POPQr(r);
191 + #else
192 +        POPLr(r);
193 + #endif
194 + }
195 + LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
196 +
197 + LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
198 + {
199 +        BTLir(i, r);
200 + }
201 + LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
202 +
203 + LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
204 + {
205 +        BTLrr(b, r);
206 + }
207 + LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
208 +
209 + LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
210 + {
211 +        BTCLir(i, r);
212 + }
213 + LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
214 +
215 + LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
216 + {
217 +        BTCLrr(b, r);
218 + }
219 + LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
220 +
221 + LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
222 + {
223 +        BTRLir(i, r);
224 + }
225 + LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
226 +
227 + LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
228 + {
229 +        BTRLrr(b, r);
230 + }
231 + LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
232 +
233 + LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
234 + {
235 +        BTSLir(i, r);
236 + }
237 + LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
238 +
239 + LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
240 + {
241 +        BTSLrr(b, r);
242 + }
243 + LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
244 +
245 + LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
246 + {
247 +        SUBWir(i, d);
248 + }
249 + LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
250 +
251 + LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
252 + {
253 +        MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
254 + }
255 + LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
256 +
257 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
258 + {
259 +        MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
260 + }
261 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
262 +
263 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
264 + {
265 +        MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
266 + }
267 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
268 +
269 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
270 + {
271 +        MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
272 + }
273 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
274 +
275 + LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
276 + {
277 +        ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
278 + }
279 + LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
280 +
281 + LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
282 + {
283 +        ROLBir(i, r);
284 + }
285 + LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
286 +
287 + LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
288 + {
289 +        ROLWir(i, r);
290 + }
291 + LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
292 +
293 + LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
294 + {
295 +        ROLLir(i, r);
296 + }
297 + LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
298 +
299 + LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
300 + {
301 +        ROLLrr(r, d);
302 + }
303 + LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
304 +
305 + LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
306 + {
307 +        ROLWrr(r, d);
308 + }
309 + LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
310 +
311 + LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
312 + {
313 +        ROLBrr(r, d);
314 + }
315 + LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
316 +
317 + LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
318 + {
319 +        SHLLrr(r, d);
320 + }
321 + LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
322 +
323 + LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
324 + {
325 +        SHLWrr(r, d);
326 + }
327 + LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
328 +
329 + LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
330 + {
331 +        SHLBrr(r, d);
332 + }
333 + LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
334 +
335 + LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
336 + {
337 +        RORBir(i, r);
338 + }
339 + LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
340 +
341 + LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
342 + {
343 +        RORWir(i, r);
344 + }
345 + LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
346 +
347 + LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
348 + {
349 +        ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
350 + }
351 + LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
352 +
353 + LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
354 + {
355 +        RORLir(i, r);
356 + }
357 + LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
358 +
359 + LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
360 + {
361 +        RORLrr(r, d);
362 + }
363 + LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
364 +
365 + LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
366 + {
367 +        RORWrr(r, d);
368 + }
369 + LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
370 +
371 + LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
372 + {
373 +        RORBrr(r, d);
374 + }
375 + LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
376 +
377 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
378 + {
379 +        SHRLrr(r, d);
380 + }
381 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
382 +
383 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
384 + {
385 +        SHRWrr(r, d);
386 + }
387 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
388 +
389 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
390 + {
391 +        SHRBrr(r, d);
392 + }
393 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
394 +
395 + LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
396 + {
397 +        SARLrr(r, d);
398 + }
399 + LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
400 +
401 + LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
402 + {
403 +        SARWrr(r, d);
404 + }
405 + LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
406 +
407 + LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
408 + {
409 +        SARBrr(r, d);
410 + }
411 + LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
412 +
413 + LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
414 + {
415 +        SHLLir(i, r);
416 + }
417 + LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
418 +
419 + LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
420 + {
421 +        SHLWir(i, r);
422 + }
423 + LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
424 +
425 + LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
426 + {
427 +        SHLBir(i, r);
428 + }
429 + LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
430 +
431 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
432 + {
433 +        SHRLir(i, r);
434 + }
435 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
436 +
437 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
438 + {
439 +        SHRWir(i, r);
440 + }
441 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
442 +
443 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
444 + {
445 +        SHRBir(i, r);
446 + }
447 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
448 +
449 + LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
450 + {
451 +        SARLir(i, r);
452 + }
453 + LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
454 +
455 + LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
456 + {
457 +        SARWir(i, r);
458 + }
459 + LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
460 +
461 + LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
462 + {
463 +        SARBir(i, r);
464 + }
465 + LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
466 +
467 + LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
468 + {
469 +        SAHF();
470 + }
471 + LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
472 +
473 + LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
474 + {
475 +        CPUID();
476 + }
477 + LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
478 +
479 + LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
480 + {
481 +        LAHF();
482 + }
483 + LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
484 +
485 + LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
486 + {
487 +        SETCCir(cc, d);
488 + }
489 + LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
490 +
491 + LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
492 + {
493 +        SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
494 + }
495 + LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
496 +
497 + LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
498 + {
499 +        if (have_cmov)
500 +                CMOVLrr(cc, s, d);
501 +        else { /* replacement using branch and mov */
502 + #if defined(__x86_64__)
503 +                write_log("x86-64 implementations are bound to have CMOV!\n");
504 +                abort();
505 + #endif
506 +                JCCSii(cc^1, 2);
507 +                MOVLrr(s, d);
508 +        }
509 + }
510 + LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
511 +
512 + LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
513 + {
514 +        BSFLrr(s, d);
515 + }
516 + LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
517 +
518 + LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
519 + {
520 +        MOVSLQrr(s, d);
521 + }
522 + LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
523 +
524 + LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
525 + {
526 +        MOVSWLrr(s, d);
527 + }
528 + LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
529 +
530 + LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
531 + {
532 +        MOVSBLrr(s, d);
533 + }
534 + LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
535 +
536 + LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
537 + {
538 +        MOVZWLrr(s, d);
539 + }
540 + LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
541 +
542 + LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
543 + {
544 +        MOVZBLrr(s, d);
545 + }
546 + LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
547 +
548 + LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
549 + {
550 +        IMULLrr(s, d);
551 + }
552 + LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
553 +
554 + LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
555 + {
556 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
557 +        write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
558 +        abort();
559 +        }
560 +        IMULLr(s);
561 + }
562 + LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
563 +
564 + LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
565 + {
566 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
567 +        write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
568 +        abort();
569 +        }
570 +        MULLr(s);
571 + }
572 + LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
573 +
574 + LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
575 + {
576 +        abort(); /* %^$&%^$%#^ x86! */
577 + }
578 + LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
579 +
580 + LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
581 + {
582 +        MOVBrr(s, d);
583 + }
584 + LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
585 +
586 + LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
587 + {
588 +        MOVWrr(s, d);
589 + }
590 + LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
591 +
592 + LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
593 + {
594 +        MOVLmr(0, baser, index, factor, d);
595 + }
596 + LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
597 +
598 + LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
599 + {
600 +        MOVWmr(0, baser, index, factor, d);
601 + }
602 + LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
603 +
604 + LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
605 + {
606 +        MOVBmr(0, baser, index, factor, d);
607 + }
608 + LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
609 +
610 + LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
611 + {
612 +        MOVLrm(s, 0, baser, index, factor);
613 + }
614 + LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
615 +
616 + LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
617 + {
618 +        MOVWrm(s, 0, baser, index, factor);
619 + }
620 + LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
621 +
622 + LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
623 + {
624 +        MOVBrm(s, 0, baser, index, factor);
625 + }
626 + LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
627 +
628 + LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
629 + {
630 +        MOVLrm(s, base, baser, index, factor);
631 + }
632 + LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
633 +
634 + LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
635 + {
636 +        MOVWrm(s, base, baser, index, factor);
637 + }
638 + LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
639 +
640 + LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
641 + {
642 +        MOVBrm(s, base, baser, index, factor);
643 + }
644 + LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
645 +
646 + LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
647 + {
648 +        MOVLmr(base, baser, index, factor, d);
649 + }
650 + LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
651 +
652 + LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
653 + {
654 +        MOVWmr(base, baser, index, factor, d);
655 + }
656 + LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
657 +
658 + LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
659 + {
660 +        MOVBmr(base, baser, index, factor, d);
661 + }
662 + LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
663 +
664 + LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
665 + {
666 +        MOVLmr(base, X86_NOREG, index, factor, d);
667 + }
668 + LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
669 +
670 + LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
671 + {
672 +        if (have_cmov)
673 +                CMOVLmr(cond, base, X86_NOREG, index, factor, d);
674 +        else { /* replacement using branch and mov */
675 + #if defined(__x86_64__)
676 +                write_log("x86-64 implementations are bound to have CMOV!\n");
677 +                abort();
678 + #endif
679 +                JCCSii(cond^1, 7);
680 +                MOVLmr(base, X86_NOREG, index, factor, d);
681 +        }
682 + }
683 + LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
684 +
685 + LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
686 + {
687 +        if (have_cmov)
688 +                CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
689 +        else { /* replacement using branch and mov */
690 + #if defined(__x86_64__)
691 +                write_log("x86-64 implementations are bound to have CMOV!\n");
692 +                abort();
693 + #endif
694 +                JCCSii(cond^1, 6);
695 +                MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
696 +        }
697 + }
698 + LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
699 +
700 + LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
701 + {
702 +        MOVLmr(offset, s, X86_NOREG, 1, d);
703 + }
704 + LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
705 +
706 + LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
707 + {
708 +        MOVWmr(offset, s, X86_NOREG, 1, d);
709 + }
710 + LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
711 +
712 + LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
713 + {
714 +        MOVBmr(offset, s, X86_NOREG, 1, d);
715 + }
716 + LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
717 +
718 + LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
719 + {
720 +        MOVLmr(offset, s, X86_NOREG, 1, d);
721 + }
722 + LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
723 +
724 + LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
725 + {
726 +        MOVWmr(offset, s, X86_NOREG, 1, d);
727 + }
728 + LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
729 +
730 + LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
731 + {
732 +        MOVBmr(offset, s, X86_NOREG, 1, d);
733 + }
734 + LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
735 +
736 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
737 + {
738 +        MOVLim(i, offset, d, X86_NOREG, 1);
739 + }
740 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
741 +
742 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
743 + {
744 +        MOVWim(i, offset, d, X86_NOREG, 1);
745 + }
746 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
747 +
748 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
749 + {
750 +        MOVBim(i, offset, d, X86_NOREG, 1);
751 + }
752 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
753 +
754 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
755 + {
756 +        MOVLrm(s, offset, d, X86_NOREG, 1);
757 + }
758 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
759 +
760 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
761 + {
762 +        MOVWrm(s, offset, d, X86_NOREG, 1);
763 + }
764 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
765 +
766 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
767 + {
768 +        MOVBrm(s, offset, d, X86_NOREG, 1);
769 + }
770 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
771 +
772 + LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
773 + {
774 +        LEALmr(offset, s, X86_NOREG, 1, d);
775 + }
776 + LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
777 +
778 + LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
779 + {
780 +        LEALmr(offset, s, index, factor, d);
781 + }
782 + LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
783 +
784 + LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
785 + {
786 +        LEALmr(0, s, index, factor, d);
787 + }
788 + LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
789 +
790 + LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
791 + {
792 +        MOVLrm(s, offset, d, X86_NOREG, 1);
793 + }
794 + LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
795 +
796 + LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
797 + {
798 +        MOVWrm(s, offset, d, X86_NOREG, 1);
799 + }
800 + LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
801 +
802 + LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
803 + {
804 +        MOVBrm(s, offset, d, X86_NOREG, 1);
805 + }
806 + LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
807 +
808 + LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
809 + {
810 +        BSWAPLr(r);
811 + }
812 + LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
813 +
814 + LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
815 + {
816 +        ROLWir(8, r);
817 + }
818 + LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
819 +
820 + LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
821 + {
822 +        MOVLrr(s, d);
823 + }
824 + LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
825 +
826 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
827 + {
828 +        MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
829 + }
830 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
831 +
832 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
833 + {
834 +        MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
835 + }
836 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
837 +
838 + LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
839 + {
840 +        MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
841 + }
842 + LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
843 +
844 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
845 + {
846 +        MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
847 + }
848 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
849 +
850 + LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
851 + {
852 +        MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
853 + }
854 + LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
855 +
856 + LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
857 + {
858 +        MOVLir(s, d);
859 + }
860 + LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
861 +
862 + LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
863 + {
864 +        MOVWir(s, d);
865 + }
866 + LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
867 +
868 + LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
869 + {
870 +        MOVBir(s, d);
871 + }
872 + LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
873 +
874 + LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
875 + {
876 +        ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
877 + }
878 + LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
879 +
880 + LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
881 + {
882 +        ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
883 + }
884 + LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
885 +
886 + LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
887 + {
888 +        ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
889 + }
890 + LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
891 +
892 + LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
893 + {
894 +        ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
895 + }
896 + LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
897 +
898 + LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
899 + {
900 +        TESTLir(i, d);
901 + }
902 + LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
903 +
904 + LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
905 + {
906 +        TESTLrr(s, d);
907 + }
908 + LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
909 +
910 + LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
911 + {
912 +        TESTWrr(s, d);
913 + }
914 + LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
915 +
916 + LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
917 + {
918 +        TESTBrr(s, d);
919 + }
920 + LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
921 +
922 + LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
923 + {
924 +        ANDLir(i, d);
925 + }
926 + LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
927 +
928 + LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
929 + {
930 +        ANDWir(i, d);
931 + }
932 + LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
933 +
934 + LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
935 + {
936 +        ANDLrr(s, d);
937 + }
938 + LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
939 +
940 + LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
941 + {
942 +        ANDWrr(s, d);
943 + }
944 + LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
945 +
946 + LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
947 + {
948 +        ANDBrr(s, d);
949 + }
950 + LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
951 +
952 + LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
953 + {
954 +        ORLir(i, d);
955 + }
956 + LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
957 +
958 + LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
959 + {
960 +        ORLrr(s, d);
961 + }
962 + LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
963 +
964 + LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
965 + {
966 +        ORWrr(s, d);
967 + }
968 + LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
969 +
970 + LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
971 + {
972 +        ORBrr(s, d);
973 + }
974 + LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
975 +
976 + LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
977 + {
978 +        ADCLrr(s, d);
979 + }
980 + LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
981 +
982 + LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
983 + {
984 +        ADCWrr(s, d);
985 + }
986 + LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
987 +
988 + LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
989 + {
990 +        ADCBrr(s, d);
991 + }
992 + LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
993 +
994 + LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
995 + {
996 +        ADDLrr(s, d);
997 + }
998 + LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
999 +
1000 + LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1001 + {
1002 +        ADDWrr(s, d);
1003 + }
1004 + LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1005 +
1006 + LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1007 + {
1008 +        ADDBrr(s, d);
1009 + }
1010 + LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1011 +
1012 + LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1013 + {
1014 +        SUBLir(i, d);
1015 + }
1016 + LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1017 +
1018 + LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1019 + {
1020 +        SUBBir(i, d);
1021 + }
1022 + LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1023 +
1024 + LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1025 + {
1026 +        ADDLir(i, d);
1027 + }
1028 + LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1029 +
1030 + LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1031 + {
1032 +        ADDWir(i, d);
1033 + }
1034 + LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1035 +
1036 + LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1037 + {
1038 +        ADDBir(i, d);
1039 + }
1040 + LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1041 +
1042 + LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1043 + {
1044 +        SBBLrr(s, d);
1045 + }
1046 + LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1047 +
1048 + LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1049 + {
1050 +        SBBWrr(s, d);
1051 + }
1052 + LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1053 +
1054 + LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1055 + {
1056 +        SBBBrr(s, d);
1057 + }
1058 + LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1059 +
1060 + LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1061 + {
1062 +        SUBLrr(s, d);
1063 + }
1064 + LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1065 +
1066 + LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1067 + {
1068 +        SUBWrr(s, d);
1069 + }
1070 + LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1071 +
1072 + LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1073 + {
1074 +        SUBBrr(s, d);
1075 + }
1076 + LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1077 +
1078 + LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1079 + {
1080 +        CMPLrr(s, d);
1081 + }
1082 + LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1083 +
1084 + LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1085 + {
1086 +        CMPLir(i, r);
1087 + }
1088 + LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1089 +
1090 + LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1091 + {
1092 +        CMPWrr(s, d);
1093 + }
1094 + LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1095 +
1096 + LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1097 + {
1098 +        CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1099 + }
1100 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1101 +
1102 + LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1103 + {
1104 +        CMPBir(i, d);
1105 + }
1106 + LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1107 +
1108 + LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1109 + {
1110 +        CMPBrr(s, d);
1111 + }
1112 + LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1113 +
1114 + LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1115 + {
1116 +        CMPLmr(offset, X86_NOREG, index, factor, d);
1117 + }
1118 + LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1119 +
1120 + LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1121 + {
1122 +        XORLrr(s, d);
1123 + }
1124 + LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1125 +
1126 + LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1127 + {
1128 +        XORWrr(s, d);
1129 + }
1130 + LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1131 +
1132 + LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1133 + {
1134 +        XORBrr(s, d);
1135 + }
1136 + LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1137 +
1138 + LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1139 + {
1140 +        SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1141 + }
1142 + LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1143 +
1144 + LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1145 + {
1146 +        CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1147 + }
1148 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1149 +
1150 + LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1151 + {
1152 +        XCHGLrr(r2, r1);
1153 + }
1154 + LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1155 +
1156 + LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1157 + {
1158 +        PUSHF();
1159 + }
1160 + LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1161 +
1162 + LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1163 + {
1164 +        POPF();
1165 + }
1166 + LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1167 +
1168 + #else
1169 +
1170   const bool optimize_accum               = true;
1171   const bool optimize_imm8                = true;
1172   const bool optimize_shift_once  = true;
# Line 1071 | Line 2116 | LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d
2116  
2117   LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2118   {
2119 +        Dif(!isbyte(offset)) abort();
2120      emit_byte(0x8b);
2121      emit_byte(0x40+8*d+s);
2122      emit_byte(offset);
# Line 1079 | Line 2125 | LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d,
2125  
2126   LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2127   {
2128 +        Dif(!isbyte(offset)) abort();
2129      emit_byte(0x66);
2130      emit_byte(0x8b);
2131      emit_byte(0x40+8*d+s);
# Line 1088 | Line 2135 | LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d,
2135  
2136   LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2137   {
2138 +        Dif(!isbyte(offset)) abort();
2139      emit_byte(0x8a);
2140      emit_byte(0x40+8*d+s);
2141      emit_byte(offset);
# Line 1121 | Line 2169 | LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d
2169  
2170   LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2171   {
2172 +        Dif(!isbyte(offset)) abort();
2173      emit_byte(0xc7);
2174      emit_byte(0x40+d);
2175      emit_byte(offset);
# Line 1130 | Line 2179 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d
2179  
2180   LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2181   {
2182 +        Dif(!isbyte(offset)) abort();
2183      emit_byte(0x66);
2184      emit_byte(0xc7);
2185      emit_byte(0x40+d);
# Line 1140 | Line 2190 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d
2190  
2191   LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2192   {
2193 +        Dif(!isbyte(offset)) abort();
2194      emit_byte(0xc6);
2195      emit_byte(0x40+d);
2196      emit_byte(offset);
# Line 1149 | Line 2200 | LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d
2200  
2201   LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2202   {
2203 +        Dif(!isbyte(offset)) abort();
2204      emit_byte(0x89);
2205      emit_byte(0x40+8*s+d);
2206      emit_byte(offset);
# Line 1157 | Line 2209 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d
2209  
2210   LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2211   {
2212 +        Dif(!isbyte(offset)) abort();
2213      emit_byte(0x66);
2214      emit_byte(0x89);
2215      emit_byte(0x40+8*s+d);
# Line 1166 | Line 2219 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d
2219  
2220   LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2221   {
2222 +        Dif(!isbyte(offset)) abort();
2223      emit_byte(0x88);
2224      emit_byte(0x40+8*s+d);
2225      emit_byte(offset);
# Line 1856 | Line 2910 | LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r
2910   LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2911  
2912   /*************************************************************************
1859 * FIXME: string-related instructions                                    *
1860 *************************************************************************/
1861
1862 LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1863 {
1864        emit_byte(0xfc);
1865 }
1866 LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1867
1868 LOWFUNC(WRITE,NONE,0,raw_std,(void))
1869 {
1870        emit_byte(0xfd);
1871 }
1872 LENDFUNC(WRITE,NONE,0,raw_std,(void))
1873
1874 LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1875 {
1876        emit_byte(0xa4);
1877 }
1878 LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1879
1880 LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1881 {
1882        emit_byte(0xa5);
1883 }
1884 LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1885
1886 LOWFUNC(NONE,RMW,0,raw_rep,(void))
1887 {
1888        emit_byte(0xf3);
1889 }
1890 LENDFUNC(NONE,RMW,0,raw_rep,(void))
1891
1892 LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1893 {
1894        raw_rep();
1895        raw_movs_b();
1896 }
1897 LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1898
1899 LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1900 {
1901        raw_rep();
1902        raw_movs_l();
1903 }
1904 LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1905
1906 /*************************************************************************
2913   * FIXME: mem access modes probably wrong                                *
2914   *************************************************************************/
2915  
# Line 1919 | Line 2925 | LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2925   }
2926   LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2927  
2928 + #endif
2929 +
2930   /*************************************************************************
2931   * Unoptimizable stuff --- jump                                          *
2932   *************************************************************************/
2933  
2934   static __inline__ void raw_call_r(R4 r)
2935   {
2936 + #if USE_NEW_RTASM
2937 +    CALLsr(r);
2938 + #else
2939      emit_byte(0xff);
2940      emit_byte(0xd0+r);
2941 + #endif
2942   }
2943  
2944   static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2945   {
2946 + #if USE_NEW_RTASM
2947 +    CALLsm(base, X86_NOREG, r, m);
2948 + #else
2949      int mu;
2950      switch(m) {
2951       case 1: mu=0; break;
# Line 1943 | Line 2958 | static __inline__ void raw_call_m_indexe
2958      emit_byte(0x14);
2959      emit_byte(0x05+8*r+0x40*mu);
2960      emit_long(base);
2961 + #endif
2962   }
2963  
2964   static __inline__ void raw_jmp_r(R4 r)
2965   {
2966 + #if USE_NEW_RTASM
2967 +    JMPsr(r);
2968 + #else
2969      emit_byte(0xff);
2970      emit_byte(0xe0+r);
2971 + #endif
2972   }
2973  
2974   static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2975   {
2976 + #if USE_NEW_RTASM
2977 +    JMPsm(base, X86_NOREG, r, m);
2978 + #else
2979      int mu;
2980      switch(m) {
2981       case 1: mu=0; break;
# Line 1965 | Line 2988 | static __inline__ void raw_jmp_m_indexed
2988      emit_byte(0x24);
2989      emit_byte(0x05+8*r+0x40*mu);
2990      emit_long(base);
2991 + #endif
2992   }
2993  
2994   static __inline__ void raw_jmp_m(uae_u32 base)
# Line 1977 | Line 3001 | static __inline__ void raw_jmp_m(uae_u32
3001  
3002   static __inline__ void raw_call(uae_u32 t)
3003   {
3004 + #if USE_NEW_RTASM
3005 +    CALLm(t);
3006 + #else
3007      emit_byte(0xe8);
3008      emit_long(t-(uae_u32)target-4);
3009 + #endif
3010   }
3011  
3012   static __inline__ void raw_jmp(uae_u32 t)
3013   {
3014 + #if USE_NEW_RTASM
3015 +    JMPm(t);
3016 + #else
3017      emit_byte(0xe9);
3018      emit_long(t-(uae_u32)target-4);
3019 + #endif
3020   }
3021  
3022   static __inline__ void raw_jl(uae_u32 t)
3023   {
3024      emit_byte(0x0f);
3025      emit_byte(0x8c);
3026 <    emit_long(t-(uae_u32)target-4);
3026 >    emit_long(t-(uintptr)target-4);
3027   }
3028  
3029   static __inline__ void raw_jz(uae_u32 t)
3030   {
3031      emit_byte(0x0f);
3032      emit_byte(0x84);
3033 <    emit_long(t-(uae_u32)target-4);
3033 >    emit_long(t-(uintptr)target-4);
3034   }
3035  
3036   static __inline__ void raw_jnz(uae_u32 t)
3037   {
3038      emit_byte(0x0f);
3039      emit_byte(0x85);
3040 <    emit_long(t-(uae_u32)target-4);
3040 >    emit_long(t-(uintptr)target-4);
3041   }
3042  
3043   static __inline__ void raw_jnz_l_oponly(void)
# Line 2055 | Line 3087 | static __inline__ void raw_nop(void)
3087      emit_byte(0x90);
3088   }
3089  
3090 + static __inline__ void raw_emit_nop_filler(int nbytes)
3091 + {
3092 +  /* Source: GNU Binutils 2.12.90.0.15 */
3093 +  /* Various efficient no-op patterns for aligning code labels.
3094 +     Note: Don't try to assemble the instructions in the comments.
3095 +     0L and 0w are not legal.  */
3096 +  static const uae_u8 f32_1[] =
3097 +    {0x90};                                                                     /* nop                                  */
3098 +  static const uae_u8 f32_2[] =
3099 +    {0x89,0xf6};                                                        /* movl %esi,%esi               */
3100 +  static const uae_u8 f32_3[] =
3101 +    {0x8d,0x76,0x00};                                           /* leal 0(%esi),%esi    */
3102 +  static const uae_u8 f32_4[] =
3103 +    {0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3104 +  static const uae_u8 f32_5[] =
3105 +    {0x90,                                                                      /* nop                                  */
3106 +     0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3107 +  static const uae_u8 f32_6[] =
3108 +    {0x8d,0xb6,0x00,0x00,0x00,0x00};            /* leal 0L(%esi),%esi   */
3109 +  static const uae_u8 f32_7[] =
3110 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3111 +  static const uae_u8 f32_8[] =
3112 +    {0x90,                                                                      /* nop                                  */
3113 +     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3114 +  static const uae_u8 f32_9[] =
3115 +    {0x89,0xf6,                                                         /* movl %esi,%esi               */
3116 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3117 +  static const uae_u8 f32_10[] =
3118 +    {0x8d,0x76,0x00,                                            /* leal 0(%esi),%esi    */
3119 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3120 +  static const uae_u8 f32_11[] =
3121 +    {0x8d,0x74,0x26,0x00,                                       /* leal 0(%esi,1),%esi  */
3122 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3123 +  static const uae_u8 f32_12[] =
3124 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3125 +     0x8d,0xbf,0x00,0x00,0x00,0x00};            /* leal 0L(%edi),%edi   */
3126 +  static const uae_u8 f32_13[] =
3127 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3128 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3129 +  static const uae_u8 f32_14[] =
3130 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00,        /* leal 0L(%esi,1),%esi */
3131 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3132 +  static const uae_u8 f32_15[] =
3133 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3134 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3135 +  static const uae_u8 f32_16[] =
3136 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3137 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3138 +  static const uae_u8 *const f32_patt[] = {
3139 +    f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3140 +    f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3141 +  };
3142 +
3143 +  int nloops = nbytes / 16;
3144 +  while (nloops-- > 0)
3145 +        emit_block(f32_16, sizeof(f32_16));
3146 +
3147 +  nbytes %= 16;
3148 +  if (nbytes)
3149 +        emit_block(f32_patt[nbytes - 1], nbytes);
3150 + }
3151 +
3152  
3153   /*************************************************************************
3154   * Flag handling, to and fro UAE flag register                           *
# Line 2068 | Line 3162 | static __inline__ void raw_flags_to_reg(
3162   {
3163    raw_lahf(0);  /* Most flags in AH */
3164    //raw_setcc(r,0); /* V flag in AL */
3165 <  raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
3165 >  raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3166    
3167   #if 1   /* Let's avoid those nasty partial register stalls */
3168 <  //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
3169 <  raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
3168 >  //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3169 >  raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3170    //live.state[FLAGTMP].status=CLEAN;
3171    live.state[FLAGTMP].status=INMEM;
3172    live.state[FLAGTMP].realreg=-1;
# Line 2099 | Line 3193 | static __inline__ void raw_flags_to_reg(
3193   {
3194          raw_pushfl();
3195          raw_pop_l_r(r);
3196 <        raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
3196 >        raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3197   //      live.state[FLAGTMP].status=CLEAN;
3198          live.state[FLAGTMP].status=INMEM;
3199          live.state[FLAGTMP].realreg=-1;
# Line 2125 | Line 3219 | static __inline__ void raw_reg_to_flags(
3219   static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3220   {
3221   #if 1
3222 <    raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3222 >    raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3223   #else
3224 <    raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3225 <    raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
3224 >    raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3225 >    raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3226   #endif
3227   }
3228  
# Line 2136 | Line 3230 | static __inline__ void raw_load_flagreg(
3230   static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3231   {
3232      if (live.nat[target].canbyte)
3233 <        raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3233 >        raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3234      else if (live.nat[target].canword)
3235 <        raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
3235 >        raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3236      else
3237 <        raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3237 >        raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3238   }
3239  
3240 + #define NATIVE_FLAG_Z 0x40
3241 + static __inline__ void raw_flags_set_zero(int f, int r, int t)
3242 + {
3243 +        // FIXME: this is really suboptimal
3244 +        raw_pushfl();
3245 +        raw_pop_l_r(f);
3246 +        raw_and_l_ri(f,~NATIVE_FLAG_Z);
3247 +        raw_test_l_rr(r,r);
3248 +        raw_mov_l_ri(r,0);
3249 +        raw_mov_l_ri(t,NATIVE_FLAG_Z);
3250 +        raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
3251 +        raw_or_l(f,r);
3252 +        raw_push_l_r(f);
3253 +        raw_popfl();
3254 + }
3255  
3256   static __inline__ void raw_inc_sp(int off)
3257   {
# Line 2305 | Line 3414 | static void vec(int x, struct sigcontext
3414                  for (i=0;i<5;i++)
3415                      vecbuf[i]=target[i];
3416                  emit_byte(0xe9);
3417 <                emit_long((uae_u32)veccode-(uae_u32)target-4);
3417 >                emit_long((uintptr)veccode-(uintptr)target-4);
3418                  write_log("Create jump to %p\n",veccode);
3419              
3420                  write_log("Handled one access!\n");
# Line 2332 | Line 3441 | static void vec(int x, struct sigcontext
3441                  }
3442                  for (i=0;i<5;i++)
3443                      raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3444 <                raw_mov_l_mi((uae_u32)&in_handler,0);
3444 >                raw_mov_l_mi((uintptr)&in_handler,0);
3445                  emit_byte(0xe9);
3446 <                emit_long(sc.eip+len-(uae_u32)target-4);
3446 >                emit_long(sc.eip+len-(uintptr)target-4);
3447                  in_handler=1;
3448                  target=tmp;
3449              }
# Line 2429 | Line 3538 | enum {
3538    X86_PROCESSOR_K6,
3539    X86_PROCESSOR_ATHLON,
3540    X86_PROCESSOR_PENTIUM4,
3541 +  X86_PROCESSOR_K8,
3542    X86_PROCESSOR_max
3543   };
3544  
# Line 2439 | Line 3549 | static const char * x86_processor_string
3549    "PentiumPro",
3550    "K6",
3551    "Athlon",
3552 <  "Pentium4"
3552 >  "Pentium4",
3553 >  "K8"
3554   };
3555  
3556   static struct ptt {
# Line 2456 | Line 3567 | x86_alignments[X86_PROCESSOR_max] = {
3567    { 16, 15, 16,  7, 16 },
3568    { 32,  7, 32,  7, 32 },
3569    { 16,  7, 16,  7, 16 },
3570 <  {  0,  0,  0,  0,  0 }
3570 >  {  0,  0,  0,  0,  0 },
3571 >  { 16,  7, 16,  7, 16 }
3572   };
3573  
3574   static void
# Line 2491 | Line 3603 | static void
3603   cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3604   {
3605    static uae_u8 cpuid_space[256];  
3606 +  static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3607    uae_u8* tmp=get_target();
3608  
3609 +  s_op = op;
3610    set_target(cpuid_space);
3611    raw_push_l_r(0); /* eax */
3612    raw_push_l_r(1); /* ecx */
3613    raw_push_l_r(2); /* edx */
3614    raw_push_l_r(3); /* ebx */
3615 <  raw_mov_l_rm(0,(uae_u32)&op);
3615 >  raw_mov_l_rm(0,(uintptr)&s_op);
3616    raw_cpuid(0);
3617 <  if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
3618 <  if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
3619 <  if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
3620 <  if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
3617 >  raw_mov_l_mr((uintptr)&s_eax,0);
3618 >  raw_mov_l_mr((uintptr)&s_ebx,3);
3619 >  raw_mov_l_mr((uintptr)&s_ecx,1);
3620 >  raw_mov_l_mr((uintptr)&s_edx,2);
3621    raw_pop_l_r(3);
3622    raw_pop_l_r(2);
3623    raw_pop_l_r(1);
# Line 2512 | Line 3626 | cpuid(uae_u32 op, uae_u32 *eax, uae_u32
3626    set_target(tmp);
3627  
3628    ((cpuop_func*)cpuid_space)(0);
3629 +  if (eax != NULL) *eax = s_eax;
3630 +  if (ebx != NULL) *ebx = s_ebx;
3631 +  if (ecx != NULL) *ecx = s_ecx;
3632 +  if (edx != NULL) *edx = s_edx;
3633   }
3634  
3635   static void
# Line 2520 | Line 3638 | raw_init_cpu(void)
3638    struct cpuinfo_x86 *c = &cpuinfo;
3639  
3640    /* Defaults */
3641 +  c->x86_processor = X86_PROCESSOR_max;
3642    c->x86_vendor = X86_VENDOR_UNKNOWN;
3643    c->cpuid_level = -1;                          /* CPUID not detected */
3644    c->x86_model = c->x86_mask = 0;       /* So far unknown... */
# Line 2555 | Line 3674 | raw_init_cpu(void)
3674          c->x86 = 4;
3675    }
3676  
3677 +  /* AMD-defined flags: level 0x80000001 */
3678 +  uae_u32 xlvl;
3679 +  cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3680 +  if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3681 +        if ( xlvl >= 0x80000001 ) {
3682 +          uae_u32 features;
3683 +          cpuid(0x80000001, NULL, NULL, NULL, &features);
3684 +          if (features & (1 << 29)) {
3685 +                /* Assume x86-64 if long mode is supported */
3686 +                c->x86_processor = X86_PROCESSOR_K8;
3687 +          }
3688 +        }
3689 +  }
3690 +          
3691    /* Canonicalize processor ID */
2559  c->x86_processor = X86_PROCESSOR_max;
3692    switch (c->x86) {
3693    case 3:
3694          c->x86_processor = X86_PROCESSOR_I386;
# Line 2578 | Line 3710 | raw_init_cpu(void)
3710          break;
3711    case 15:
3712          if (c->x86_vendor == X86_VENDOR_INTEL) {
3713 <          /*  Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
3713 >          /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3714            if (c->x86_brand_id >= 8)
3715                  c->x86_processor = X86_PROCESSOR_PENTIUM4;
3716          }
3717 +        if (c->x86_vendor == X86_VENDOR_AMD) {
3718 +          /* Assume an Athlon processor if family == 15 and it was not
3719 +             detected as an x86-64 so far */
3720 +          if (c->x86_processor == X86_PROCESSOR_max)
3721 +                c->x86_processor = X86_PROCESSOR_ATHLON;
3722 +        }
3723          break;
3724    }
3725    if (c->x86_processor == X86_PROCESSOR_max) {
# Line 2589 | Line 3727 | raw_init_cpu(void)
3727          fprintf(stderr, "  Family  : %d\n", c->x86);
3728          fprintf(stderr, "  Model   : %d\n", c->x86_model);
3729          fprintf(stderr, "  Mask    : %d\n", c->x86_mask);
3730 +        fprintf(stderr, "  Vendor  : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3731          if (c->x86_brand_id)
3732            fprintf(stderr, "  BrandID : %02x\n", c->x86_brand_id);
3733          abort();
3734    }
3735  
3736    /* Have CMOV support? */
3737 <  have_cmov = (c->x86_hwcap & (1 << 15)) && true;
3737 >  have_cmov = c->x86_hwcap & (1 << 15);
3738  
3739    /* Can the host CPU suffer from partial register stalls? */
3740    have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
# Line 2618 | Line 3757 | raw_init_cpu(void)
3757                          x86_processor_string_table[c->x86_processor]);
3758   }
3759  
3760 + static bool target_check_bsf(void)
3761 + {
3762 +        bool mismatch = false;
3763 +        for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3764 +        for (int g_CF = 0; g_CF <= 1; g_CF++) {
3765 +        for (int g_OF = 0; g_OF <= 1; g_OF++) {
3766 +        for (int g_SF = 0; g_SF <= 1; g_SF++) {
3767 +                for (int value = -1; value <= 1; value++) {
3768 +                        int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3769 +                        int tmp = value;
3770 +                        __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3771 +                                                                  : "+r" (flags), "+r" (tmp) : : "cc");
3772 +                        int OF = (flags >> 11) & 1;
3773 +                        int SF = (flags >>  7) & 1;
3774 +                        int ZF = (flags >>  6) & 1;
3775 +                        int CF = flags & 1;
3776 +                        tmp = (value == 0);
3777 +                        if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3778 +                                mismatch = true;
3779 +                }
3780 +        }}}}
3781 +        if (mismatch)
3782 +                write_log("Target CPU defines all flags on BSF instruction\n");
3783 +        return !mismatch;
3784 + }
3785 +
3786  
3787   /*************************************************************************
3788   * FPU stuff                                                             *
# Line 3052 | Line 4217 | LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d
4217      emit_byte(0xf0);  /* f2xm1 */
4218      emit_byte(0xdc);
4219      emit_byte(0x05);
4220 <    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
4220 >    emit_long((uintptr)&one);  /* Add '1' without using extra stack space */
4221      emit_byte(0xd9);
4222      emit_byte(0xfd);  /* and scale it */
4223      emit_byte(0xdd);
# Line 3086 | Line 4251 | LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d,
4251      emit_byte(0xf0);  /* f2xm1 */
4252      emit_byte(0xdc);
4253      emit_byte(0x05);
4254 <    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
4254 >    emit_long((uintptr)&one);  /* Add '1' without using extra stack space */
4255      emit_byte(0xd9);
4256      emit_byte(0xfd);  /* and scale it */
4257      emit_byte(0xdd);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines