ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp (file contents):
Revision 1.6 by gbeauche, 2002-10-03T16:13:46Z vs.
Revision 1.22 by gbeauche, 2004-11-01T18:40:30Z

# Line 3 | Line 3
3   *
4   *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5   *
6 < *  Adaptation for Basilisk II and improvements, copyright 2000-2002
6 > *  Adaptation for Basilisk II and improvements, copyright 2000-2004
7   *    Gwenole Beauchesne
8   *
9 < *  Basilisk II (C) 1997-2002 Christian Bauer
9 > *  Basilisk II (C) 1997-2004 Christian Bauer
10 > *
11 > *  Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12   *  
13   *  This program is free software; you can redistribute it and/or modify
14   *  it under the terms of the GNU General Public License as published by
# Line 40 | Line 42
42   #define EBP_INDEX 5
43   #define ESI_INDEX 6
44   #define EDI_INDEX 7
45 + #if defined(__x86_64__)
46 + #define R8_INDEX  8
47 + #define R9_INDEX  9
48 + #define R10_INDEX 10
49 + #define R11_INDEX 11
50 + #define R12_INDEX 12
51 + #define R13_INDEX 13
52 + #define R14_INDEX 14
53 + #define R15_INDEX 15
54 + #endif
55  
56   /* The register in which subroutines return an integer return value */
57 < #define REG_RESULT 0
57 > #define REG_RESULT EAX_INDEX
58  
59   /* The registers subroutines take their first and second argument in */
60   #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61   /* Handle the _fastcall parameters of ECX and EDX */
62 < #define REG_PAR1 1
63 < #define REG_PAR2 2
62 > #define REG_PAR1 ECX_INDEX
63 > #define REG_PAR2 EDX_INDEX
64 > #elif defined(__x86_64__)
65 > #define REG_PAR1 EDI_INDEX
66 > #define REG_PAR2 ESI_INDEX
67   #else
68 < #define REG_PAR1 0
69 < #define REG_PAR2 2
68 > #define REG_PAR1 EAX_INDEX
69 > #define REG_PAR2 EDX_INDEX
70   #endif
71  
72 < /* Three registers that are not used for any of the above */
58 < #define REG_NOPAR1 6
59 < #define REG_NOPAR2 5
60 < #define REG_NOPAR3 3
61 <
62 < #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
72 > #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73   #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 < #define REG_PC_TMP 0
74 > #define REG_PC_TMP EAX_INDEX
75   #else
76 < #define REG_PC_TMP 1 /* Another register that is not the above */
76 > #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77   #endif
78  
79 < #define SHIFTCOUNT_NREG 1  /* Register that can be used for shiftcount.
79 > #define SHIFTCOUNT_NREG ECX_INDEX  /* Register that can be used for shiftcount.
80                                -1 if any reg will do */
81 < #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
82 < #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
81 > #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82 > #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83  
84   uae_s8 always_used[]={4,-1};
85 + #if defined(__x86_64__)
86 + uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87 + uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88 + #else
89   uae_s8 can_byte[]={0,1,2,3,-1};
90   uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 + #endif
92  
93 + #if USE_OPTIMIZED_CALLS
94 + /* Make sure interpretive core does not use cpuopti */
95 + uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 + #error FIXME: code not ready
97 + #else
98   /* cpuopti mutate instruction handlers to assume registers are saved
99     by the caller */
100 < uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
100 > uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 > #endif
102  
103   /* This *should* be the same as call_saved. But:
104     - We might not really know which registers are saved, and which aren't,
# Line 86 | Line 107 | uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
107     - Special registers (such like the stack pointer) should not be "preserved"
108       by pushing, even though they are "saved" across function calls
109   */
110 < uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
110 > #if defined(__x86_64__)
111 > /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
112 > /* preserve r11 because it's generally used to hold pointers to functions */
113 > static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
114 > #else
115 > static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
116 > #endif
117  
118   /* Whether classes of instructions do or don't clobber the native flags */
119   #define CLOBBER_MOV
# Line 111 | Line 138 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
138   #define CLOBBER_TEST clobber_flags()
139   #define CLOBBER_CL16
140   #define CLOBBER_CL8  
141 + #define CLOBBER_SE32
142   #define CLOBBER_SE16
143   #define CLOBBER_SE8
144 + #define CLOBBER_ZE32
145   #define CLOBBER_ZE16
146   #define CLOBBER_ZE8
147   #define CLOBBER_SW16 clobber_flags()
# Line 122 | Line 151 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
151   #define CLOBBER_BT   clobber_flags()
152   #define CLOBBER_BSF  clobber_flags()
153  
154 + /* FIXME: disabled until that's proofread.  */
155 + #if defined(__x86_64__)
156 + #define USE_NEW_RTASM 1
157 + #endif
158 +
159 + #if USE_NEW_RTASM
160 +
161 + #if defined(__x86_64__)
162 + #define X86_TARGET_64BIT                1
163 + #endif
164 + #define X86_FLAT_REGISTERS              0
165 + #define X86_OPTIMIZE_ALU                1
166 + #define X86_OPTIMIZE_ROTSHI             1
167 + #include "codegen_x86.h"
168 +
169 + #define x86_emit_byte(B)                emit_byte(B)
170 + #define x86_emit_word(W)                emit_word(W)
171 + #define x86_emit_long(L)                emit_long(L)
172 + #define x86_emit_quad(Q)                emit_quad(Q)
173 + #define x86_get_target()                get_target()
174 + #define x86_emit_failure(MSG)   jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
175 +
176 + static void jit_fail(const char *msg, const char *file, int line, const char *function)
177 + {
178 +        fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
179 +                        function, file, line, msg);
180 +        abort();
181 + }
182 +
183 + LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
184 + {
185 + #if defined(__x86_64__)
186 +        PUSHQr(r);
187 + #else
188 +        PUSHLr(r);
189 + #endif
190 + }
191 + LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
192 +
193 + LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
194 + {
195 + #if defined(__x86_64__)
196 +        POPQr(r);
197 + #else
198 +        POPLr(r);
199 + #endif
200 + }
201 + LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
202 +
203 + LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
204 + {
205 +        BTLir(i, r);
206 + }
207 + LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
208 +
209 + LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
210 + {
211 +        BTLrr(b, r);
212 + }
213 + LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
214 +
215 + LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
216 + {
217 +        BTCLir(i, r);
218 + }
219 + LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
220 +
221 + LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
222 + {
223 +        BTCLrr(b, r);
224 + }
225 + LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
226 +
227 + LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
228 + {
229 +        BTRLir(i, r);
230 + }
231 + LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
232 +
233 + LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
234 + {
235 +        BTRLrr(b, r);
236 + }
237 + LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
238 +
239 + LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
240 + {
241 +        BTSLir(i, r);
242 + }
243 + LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
244 +
245 + LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
246 + {
247 +        BTSLrr(b, r);
248 + }
249 + LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
250 +
251 + LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
252 + {
253 +        SUBWir(i, d);
254 + }
255 + LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
256 +
257 + LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
258 + {
259 +        MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
260 + }
261 + LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
262 +
263 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
264 + {
265 +        MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
266 + }
267 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
268 +
269 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
270 + {
271 +        MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
272 + }
273 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
274 +
275 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
276 + {
277 +        MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
278 + }
279 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
280 +
281 + LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
282 + {
283 +        ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
284 + }
285 + LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
286 +
287 + LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
288 + {
289 +        ROLBir(i, r);
290 + }
291 + LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
292 +
293 + LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
294 + {
295 +        ROLWir(i, r);
296 + }
297 + LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
298 +
299 + LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
300 + {
301 +        ROLLir(i, r);
302 + }
303 + LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
304 +
305 + LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
306 + {
307 +        ROLLrr(r, d);
308 + }
309 + LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
310 +
311 + LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
312 + {
313 +        ROLWrr(r, d);
314 + }
315 + LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
316 +
317 + LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
318 + {
319 +        ROLBrr(r, d);
320 + }
321 + LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
322 +
323 + LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
324 + {
325 +        SHLLrr(r, d);
326 + }
327 + LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
328 +
329 + LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
330 + {
331 +        SHLWrr(r, d);
332 + }
333 + LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
334 +
335 + LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
336 + {
337 +        SHLBrr(r, d);
338 + }
339 + LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
340 +
341 + LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
342 + {
343 +        RORBir(i, r);
344 + }
345 + LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
346 +
347 + LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
348 + {
349 +        RORWir(i, r);
350 + }
351 + LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
352 +
353 + LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
354 + {
355 +        ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
356 + }
357 + LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
358 +
359 + LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
360 + {
361 +        RORLir(i, r);
362 + }
363 + LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
364 +
365 + LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
366 + {
367 +        RORLrr(r, d);
368 + }
369 + LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
370 +
371 + LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
372 + {
373 +        RORWrr(r, d);
374 + }
375 + LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
376 +
377 + LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
378 + {
379 +        RORBrr(r, d);
380 + }
381 + LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
382 +
383 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
384 + {
385 +        SHRLrr(r, d);
386 + }
387 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
388 +
389 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
390 + {
391 +        SHRWrr(r, d);
392 + }
393 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
394 +
395 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
396 + {
397 +        SHRBrr(r, d);
398 + }
399 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
400 +
401 + LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
402 + {
403 +        SARLrr(r, d);
404 + }
405 + LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
406 +
407 + LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
408 + {
409 +        SARWrr(r, d);
410 + }
411 + LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
412 +
413 + LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
414 + {
415 +        SARBrr(r, d);
416 + }
417 + LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
418 +
419 + LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
420 + {
421 +        SHLLir(i, r);
422 + }
423 + LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
424 +
425 + LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
426 + {
427 +        SHLWir(i, r);
428 + }
429 + LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
430 +
431 + LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
432 + {
433 +        SHLBir(i, r);
434 + }
435 + LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
436 +
437 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
438 + {
439 +        SHRLir(i, r);
440 + }
441 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
442 +
443 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
444 + {
445 +        SHRWir(i, r);
446 + }
447 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
448 +
449 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
450 + {
451 +        SHRBir(i, r);
452 + }
453 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
454 +
455 + LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
456 + {
457 +        SARLir(i, r);
458 + }
459 + LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
460 +
461 + LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
462 + {
463 +        SARWir(i, r);
464 + }
465 + LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
466 +
467 + LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
468 + {
469 +        SARBir(i, r);
470 + }
471 + LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
472 +
473 + LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
474 + {
475 +        SAHF();
476 + }
477 + LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
478 +
479 + LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
480 + {
481 +        CPUID();
482 + }
483 + LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
484 +
485 + LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
486 + {
487 +        LAHF();
488 + }
489 + LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
490 +
491 + LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
492 + {
493 +        SETCCir(cc, d);
494 + }
495 + LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
496 +
497 + LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
498 + {
499 +        SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
500 + }
501 + LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
502 +
503 + LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
504 + {
505 +        if (have_cmov)
506 +                CMOVLrr(cc, s, d);
507 +        else { /* replacement using branch and mov */
508 + #if defined(__x86_64__)
509 +                write_log("x86-64 implementations are bound to have CMOV!\n");
510 +                abort();
511 + #endif
512 +                JCCSii(cc^1, 2);
513 +                MOVLrr(s, d);
514 +        }
515 + }
516 + LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
517 +
518 + LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
519 + {
520 +        BSFLrr(s, d);
521 + }
522 + LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
523 +
524 + LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
525 + {
526 +        MOVSLQrr(s, d);
527 + }
528 + LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
529 +
530 + LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
531 + {
532 +        MOVSWLrr(s, d);
533 + }
534 + LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
535 +
536 + LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
537 + {
538 +        MOVSBLrr(s, d);
539 + }
540 + LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
541 +
542 + LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
543 + {
544 +        MOVZWLrr(s, d);
545 + }
546 + LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
547 +
548 + LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
549 + {
550 +        MOVZBLrr(s, d);
551 + }
552 + LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
553 +
554 + LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
555 + {
556 +        IMULLrr(s, d);
557 + }
558 + LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
559 +
560 + LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
561 + {
562 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
563 +        write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
564 +        abort();
565 +        }
566 +        IMULLr(s);
567 + }
568 + LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
569 +
570 + LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
571 + {
572 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
573 +        write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
574 +        abort();
575 +        }
576 +        MULLr(s);
577 + }
578 + LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
579 +
580 + LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
581 + {
582 +        abort(); /* %^$&%^$%#^ x86! */
583 + }
584 + LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
585 +
586 + LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
587 + {
588 +        MOVBrr(s, d);
589 + }
590 + LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
591 +
592 + LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
593 + {
594 +        MOVWrr(s, d);
595 + }
596 + LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
597 +
598 + LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
599 + {
600 +        MOVLmr(0, baser, index, factor, d);
601 + }
602 + LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
603 +
604 + LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
605 + {
606 +        MOVWmr(0, baser, index, factor, d);
607 + }
608 + LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
609 +
610 + LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
611 + {
612 +        MOVBmr(0, baser, index, factor, d);
613 + }
614 + LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
615 +
616 + LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
617 + {
618 +        MOVLrm(s, 0, baser, index, factor);
619 + }
620 + LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
621 +
622 + LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
623 + {
624 +        MOVWrm(s, 0, baser, index, factor);
625 + }
626 + LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
627 +
628 + LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
629 + {
630 +        MOVBrm(s, 0, baser, index, factor);
631 + }
632 + LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
633 +
634 + LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
635 + {
636 +        MOVLrm(s, base, baser, index, factor);
637 + }
638 + LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
639 +
640 + LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
641 + {
642 +        MOVWrm(s, base, baser, index, factor);
643 + }
644 + LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
645 +
646 + LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
647 + {
648 +        MOVBrm(s, base, baser, index, factor);
649 + }
650 + LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
651 +
652 + LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
653 + {
654 +        MOVLmr(base, baser, index, factor, d);
655 + }
656 + LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
657 +
658 + LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
659 + {
660 +        MOVWmr(base, baser, index, factor, d);
661 + }
662 + LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
663 +
664 + LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
665 + {
666 +        MOVBmr(base, baser, index, factor, d);
667 + }
668 + LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
669 +
670 + LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
671 + {
672 +        MOVLmr(base, X86_NOREG, index, factor, d);
673 + }
674 + LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
675 +
676 + LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
677 + {
678 +        if (have_cmov)
679 +                CMOVLmr(cond, base, X86_NOREG, index, factor, d);
680 +        else { /* replacement using branch and mov */
681 + #if defined(__x86_64__)
682 +                write_log("x86-64 implementations are bound to have CMOV!\n");
683 +                abort();
684 + #endif
685 +                JCCSii(cond^1, 7);
686 +                MOVLmr(base, X86_NOREG, index, factor, d);
687 +        }
688 + }
689 + LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
690 +
691 + LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
692 + {
693 +        if (have_cmov)
694 +                CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
695 +        else { /* replacement using branch and mov */
696 + #if defined(__x86_64__)
697 +                write_log("x86-64 implementations are bound to have CMOV!\n");
698 +                abort();
699 + #endif
700 +                JCCSii(cond^1, 6);
701 +                MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
702 +        }
703 + }
704 + LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
705 +
706 + LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
707 + {
708 +        MOVLmr(offset, s, X86_NOREG, 1, d);
709 + }
710 + LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
711 +
712 + LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
713 + {
714 +        MOVWmr(offset, s, X86_NOREG, 1, d);
715 + }
716 + LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
717 +
718 + LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
719 + {
720 +        MOVBmr(offset, s, X86_NOREG, 1, d);
721 + }
722 + LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
723 +
724 + LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
725 + {
726 +        MOVLmr(offset, s, X86_NOREG, 1, d);
727 + }
728 + LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
729 +
730 + LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
731 + {
732 +        MOVWmr(offset, s, X86_NOREG, 1, d);
733 + }
734 + LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
735 +
736 + LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
737 + {
738 +        MOVBmr(offset, s, X86_NOREG, 1, d);
739 + }
740 + LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
741 +
742 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
743 + {
744 +        MOVLim(i, offset, d, X86_NOREG, 1);
745 + }
746 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
747 +
748 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
749 + {
750 +        MOVWim(i, offset, d, X86_NOREG, 1);
751 + }
752 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
753 +
754 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
755 + {
756 +        MOVBim(i, offset, d, X86_NOREG, 1);
757 + }
758 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
759 +
760 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
761 + {
762 +        MOVLrm(s, offset, d, X86_NOREG, 1);
763 + }
764 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
765 +
766 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
767 + {
768 +        MOVWrm(s, offset, d, X86_NOREG, 1);
769 + }
770 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
771 +
772 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
773 + {
774 +        MOVBrm(s, offset, d, X86_NOREG, 1);
775 + }
776 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
777 +
778 + LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
779 + {
780 +        LEALmr(offset, s, X86_NOREG, 1, d);
781 + }
782 + LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
783 +
784 + LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
785 + {
786 +        LEALmr(offset, s, index, factor, d);
787 + }
788 + LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
789 +
790 + LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
791 + {
792 +        LEALmr(0, s, index, factor, d);
793 + }
794 + LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
795 +
796 + LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
797 + {
798 +        MOVLrm(s, offset, d, X86_NOREG, 1);
799 + }
800 + LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
801 +
802 + LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
803 + {
804 +        MOVWrm(s, offset, d, X86_NOREG, 1);
805 + }
806 + LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
807 +
808 + LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
809 + {
810 +        MOVBrm(s, offset, d, X86_NOREG, 1);
811 + }
812 + LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
813 +
814 + LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
815 + {
816 +        BSWAPLr(r);
817 + }
818 + LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
819 +
820 + LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
821 + {
822 +        ROLWir(8, r);
823 + }
824 + LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
825 +
826 + LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
827 + {
828 +        MOVLrr(s, d);
829 + }
830 + LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
831 +
832 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
833 + {
834 +        MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
835 + }
836 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
837 +
838 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
839 + {
840 +        MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
841 + }
842 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
843 +
844 + LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
845 + {
846 +        MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
847 + }
848 + LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
849 +
850 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
851 + {
852 +        MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
853 + }
854 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
855 +
856 + LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
857 + {
858 +        MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
859 + }
860 + LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
861 +
862 + LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
863 + {
864 +        MOVLir(s, d);
865 + }
866 + LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
867 +
868 + LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
869 + {
870 +        MOVWir(s, d);
871 + }
872 + LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
873 +
874 + LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
875 + {
876 +        MOVBir(s, d);
877 + }
878 + LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
879 +
880 + LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
881 + {
882 +        ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
883 + }
884 + LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
885 +
886 + LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
887 + {
888 +        ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
889 + }
890 + LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
891 +
892 + LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
893 + {
894 +        ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
895 + }
896 + LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
897 +
898 + LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
899 + {
900 +        ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
901 + }
902 + LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
903 +
904 + LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
905 + {
906 +        TESTLir(i, d);
907 + }
908 + LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
909 +
910 + LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
911 + {
912 +        TESTLrr(s, d);
913 + }
914 + LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
915 +
916 + LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
917 + {
918 +        TESTWrr(s, d);
919 + }
920 + LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
921 +
922 + LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
923 + {
924 +        TESTBrr(s, d);
925 + }
926 + LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
927 +
928 + LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
929 + {
930 +        ANDLir(i, d);
931 + }
932 + LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
933 +
934 + LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
935 + {
936 +        ANDWir(i, d);
937 + }
938 + LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
939 +
940 + LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
941 + {
942 +        ANDLrr(s, d);
943 + }
944 + LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
945 +
946 + LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
947 + {
948 +        ANDWrr(s, d);
949 + }
950 + LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
951 +
952 + LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
953 + {
954 +        ANDBrr(s, d);
955 + }
956 + LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
957 +
958 + LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
959 + {
960 +        ORLir(i, d);
961 + }
962 + LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
963 +
964 + LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
965 + {
966 +        ORLrr(s, d);
967 + }
968 + LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
969 +
970 + LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
971 + {
972 +        ORWrr(s, d);
973 + }
974 + LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
975 +
976 + LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
977 + {
978 +        ORBrr(s, d);
979 + }
980 + LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
981 +
982 + LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
983 + {
984 +        ADCLrr(s, d);
985 + }
986 + LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
987 +
988 + LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
989 + {
990 +        ADCWrr(s, d);
991 + }
992 + LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
993 +
994 + LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
995 + {
996 +        ADCBrr(s, d);
997 + }
998 + LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
999 +
1000 + LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1001 + {
1002 +        ADDLrr(s, d);
1003 + }
1004 + LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1005 +
1006 + LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1007 + {
1008 +        ADDWrr(s, d);
1009 + }
1010 + LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1011 +
1012 + LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1013 + {
1014 +        ADDBrr(s, d);
1015 + }
1016 + LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1017 +
1018 + LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1019 + {
1020 +        SUBLir(i, d);
1021 + }
1022 + LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1023 +
1024 + LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1025 + {
1026 +        SUBBir(i, d);
1027 + }
1028 + LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1029 +
1030 + LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1031 + {
1032 +        ADDLir(i, d);
1033 + }
1034 + LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1035 +
1036 + LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1037 + {
1038 +        ADDWir(i, d);
1039 + }
1040 + LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1041 +
1042 + LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1043 + {
1044 +        ADDBir(i, d);
1045 + }
1046 + LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1047 +
1048 + LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1049 + {
1050 +        SBBLrr(s, d);
1051 + }
1052 + LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1053 +
1054 + LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1055 + {
1056 +        SBBWrr(s, d);
1057 + }
1058 + LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1059 +
1060 + LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1061 + {
1062 +        SBBBrr(s, d);
1063 + }
1064 + LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1065 +
1066 + LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1067 + {
1068 +        SUBLrr(s, d);
1069 + }
1070 + LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1071 +
1072 + LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1073 + {
1074 +        SUBWrr(s, d);
1075 + }
1076 + LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1077 +
1078 + LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1079 + {
1080 +        SUBBrr(s, d);
1081 + }
1082 + LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1083 +
1084 + LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1085 + {
1086 +        CMPLrr(s, d);
1087 + }
1088 + LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1089 +
1090 + LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1091 + {
1092 +        CMPLir(i, r);
1093 + }
1094 + LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1095 +
1096 + LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1097 + {
1098 +        CMPWrr(s, d);
1099 + }
1100 + LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1101 +
1102 + LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1103 + {
1104 +        CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1105 + }
1106 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1107 +
1108 + LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1109 + {
1110 +        CMPBir(i, d);
1111 + }
1112 + LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1113 +
1114 + LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1115 + {
1116 +        CMPBrr(s, d);
1117 + }
1118 + LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1119 +
1120 + LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1121 + {
1122 +        CMPLmr(offset, X86_NOREG, index, factor, d);
1123 + }
1124 + LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1125 +
1126 + LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1127 + {
1128 +        XORLrr(s, d);
1129 + }
1130 + LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1131 +
1132 + LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1133 + {
1134 +        XORWrr(s, d);
1135 + }
1136 + LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1137 +
1138 + LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1139 + {
1140 +        XORBrr(s, d);
1141 + }
1142 + LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1143 +
1144 + LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1145 + {
1146 +        SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1147 + }
1148 + LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1149 +
1150 + LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1151 + {
1152 +        CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1153 + }
1154 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1155 +
1156 + LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1157 + {
1158 +        XCHGLrr(r2, r1);
1159 + }
1160 + LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1161 +
1162 + LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1163 + {
1164 +        PUSHF();
1165 + }
1166 + LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1167 +
1168 + LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1169 + {
1170 +        POPF();
1171 + }
1172 + LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1173 +
1174 + #else
1175 +
1176   const bool optimize_accum               = true;
1177   const bool optimize_imm8                = true;
1178   const bool optimize_shift_once  = true;
# Line 1071 | Line 2122 | LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d
2122  
2123   LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2124   {
2125 +        Dif(!isbyte(offset)) abort();
2126      emit_byte(0x8b);
2127      emit_byte(0x40+8*d+s);
2128      emit_byte(offset);
# Line 1079 | Line 2131 | LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d,
2131  
2132   LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2133   {
2134 +        Dif(!isbyte(offset)) abort();
2135      emit_byte(0x66);
2136      emit_byte(0x8b);
2137      emit_byte(0x40+8*d+s);
# Line 1088 | Line 2141 | LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d,
2141  
2142   LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2143   {
2144 +        Dif(!isbyte(offset)) abort();
2145      emit_byte(0x8a);
2146      emit_byte(0x40+8*d+s);
2147      emit_byte(offset);
# Line 1121 | Line 2175 | LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d
2175  
2176   LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2177   {
2178 +        Dif(!isbyte(offset)) abort();
2179      emit_byte(0xc7);
2180      emit_byte(0x40+d);
2181      emit_byte(offset);
# Line 1130 | Line 2185 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d
2185  
2186   LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2187   {
2188 +        Dif(!isbyte(offset)) abort();
2189      emit_byte(0x66);
2190      emit_byte(0xc7);
2191      emit_byte(0x40+d);
# Line 1140 | Line 2196 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d
2196  
2197   LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2198   {
2199 +        Dif(!isbyte(offset)) abort();
2200      emit_byte(0xc6);
2201      emit_byte(0x40+d);
2202      emit_byte(offset);
# Line 1149 | Line 2206 | LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d
2206  
2207   LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2208   {
2209 +        Dif(!isbyte(offset)) abort();
2210      emit_byte(0x89);
2211      emit_byte(0x40+8*s+d);
2212      emit_byte(offset);
# Line 1157 | Line 2215 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d
2215  
2216   LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2217   {
2218 +        Dif(!isbyte(offset)) abort();
2219      emit_byte(0x66);
2220      emit_byte(0x89);
2221      emit_byte(0x40+8*s+d);
# Line 1166 | Line 2225 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d
2225  
2226   LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2227   {
2228 +        Dif(!isbyte(offset)) abort();
2229      emit_byte(0x88);
2230      emit_byte(0x40+8*s+d);
2231      emit_byte(offset);
# Line 1856 | Line 2916 | LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r
2916   LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2917  
2918   /*************************************************************************
1859 * FIXME: string-related instructions                                    *
1860 *************************************************************************/
1861
1862 LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1863 {
1864        emit_byte(0xfc);
1865 }
1866 LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1867
1868 LOWFUNC(WRITE,NONE,0,raw_std,(void))
1869 {
1870        emit_byte(0xfd);
1871 }
1872 LENDFUNC(WRITE,NONE,0,raw_std,(void))
1873
1874 LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1875 {
1876        emit_byte(0xa4);
1877 }
1878 LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1879
1880 LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1881 {
1882        emit_byte(0xa5);
1883 }
1884 LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1885
1886 LOWFUNC(NONE,RMW,0,raw_rep,(void))
1887 {
1888        emit_byte(0xf3);
1889 }
1890 LENDFUNC(NONE,RMW,0,raw_rep,(void))
1891
1892 LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1893 {
1894        raw_rep();
1895        raw_movs_b();
1896 }
1897 LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1898
1899 LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1900 {
1901        raw_rep();
1902        raw_movs_l();
1903 }
1904 LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1905
1906 /*************************************************************************
2919   * FIXME: mem access modes probably wrong                                *
2920   *************************************************************************/
2921  
# Line 1919 | Line 2931 | LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2931   }
2932   LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2933  
2934 + #endif
2935 +
2936   /*************************************************************************
2937   * Unoptimizable stuff --- jump                                          *
2938   *************************************************************************/
2939  
2940   static __inline__ void raw_call_r(R4 r)
2941   {
2942 + #if USE_NEW_RTASM
2943 +    CALLsr(r);
2944 + #else
2945      emit_byte(0xff);
2946      emit_byte(0xd0+r);
2947 + #endif
2948   }
2949  
2950   static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2951   {
2952 + #if USE_NEW_RTASM
2953 +    CALLsm(base, X86_NOREG, r, m);
2954 + #else
2955      int mu;
2956      switch(m) {
2957       case 1: mu=0; break;
# Line 1943 | Line 2964 | static __inline__ void raw_call_m_indexe
2964      emit_byte(0x14);
2965      emit_byte(0x05+8*r+0x40*mu);
2966      emit_long(base);
2967 + #endif
2968   }
2969  
2970   static __inline__ void raw_jmp_r(R4 r)
2971   {
2972 + #if USE_NEW_RTASM
2973 +    JMPsr(r);
2974 + #else
2975      emit_byte(0xff);
2976      emit_byte(0xe0+r);
2977 + #endif
2978   }
2979  
2980   static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2981   {
2982 + #if USE_NEW_RTASM
2983 +    JMPsm(base, X86_NOREG, r, m);
2984 + #else
2985      int mu;
2986      switch(m) {
2987       case 1: mu=0; break;
# Line 1965 | Line 2994 | static __inline__ void raw_jmp_m_indexed
2994      emit_byte(0x24);
2995      emit_byte(0x05+8*r+0x40*mu);
2996      emit_long(base);
2997 + #endif
2998   }
2999  
3000   static __inline__ void raw_jmp_m(uae_u32 base)
# Line 1977 | Line 3007 | static __inline__ void raw_jmp_m(uae_u32
3007  
3008   static __inline__ void raw_call(uae_u32 t)
3009   {
3010 + #if USE_NEW_RTASM
3011 +    CALLm(t);
3012 + #else
3013      emit_byte(0xe8);
3014      emit_long(t-(uae_u32)target-4);
3015 + #endif
3016   }
3017  
3018   static __inline__ void raw_jmp(uae_u32 t)
3019   {
3020 + #if USE_NEW_RTASM
3021 +    JMPm(t);
3022 + #else
3023      emit_byte(0xe9);
3024      emit_long(t-(uae_u32)target-4);
3025 + #endif
3026   }
3027  
3028   static __inline__ void raw_jl(uae_u32 t)
3029   {
3030      emit_byte(0x0f);
3031      emit_byte(0x8c);
3032 <    emit_long(t-(uae_u32)target-4);
3032 >    emit_long(t-(uintptr)target-4);
3033   }
3034  
3035   static __inline__ void raw_jz(uae_u32 t)
3036   {
3037      emit_byte(0x0f);
3038      emit_byte(0x84);
3039 <    emit_long(t-(uae_u32)target-4);
3039 >    emit_long(t-(uintptr)target-4);
3040   }
3041  
3042   static __inline__ void raw_jnz(uae_u32 t)
3043   {
3044      emit_byte(0x0f);
3045      emit_byte(0x85);
3046 <    emit_long(t-(uae_u32)target-4);
3046 >    emit_long(t-(uintptr)target-4);
3047   }
3048  
3049   static __inline__ void raw_jnz_l_oponly(void)
# Line 2055 | Line 3093 | static __inline__ void raw_nop(void)
3093      emit_byte(0x90);
3094   }
3095  
3096 + static __inline__ void raw_emit_nop_filler(int nbytes)
3097 + {
3098 +  /* Source: GNU Binutils 2.12.90.0.15 */
3099 +  /* Various efficient no-op patterns for aligning code labels.
3100 +     Note: Don't try to assemble the instructions in the comments.
3101 +     0L and 0w are not legal.  */
3102 +  static const uae_u8 f32_1[] =
3103 +    {0x90};                                                                     /* nop                                  */
3104 +  static const uae_u8 f32_2[] =
3105 +    {0x89,0xf6};                                                        /* movl %esi,%esi               */
3106 +  static const uae_u8 f32_3[] =
3107 +    {0x8d,0x76,0x00};                                           /* leal 0(%esi),%esi    */
3108 +  static const uae_u8 f32_4[] =
3109 +    {0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3110 +  static const uae_u8 f32_5[] =
3111 +    {0x90,                                                                      /* nop                                  */
3112 +     0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3113 +  static const uae_u8 f32_6[] =
3114 +    {0x8d,0xb6,0x00,0x00,0x00,0x00};            /* leal 0L(%esi),%esi   */
3115 +  static const uae_u8 f32_7[] =
3116 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3117 +  static const uae_u8 f32_8[] =
3118 +    {0x90,                                                                      /* nop                                  */
3119 +     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3120 +  static const uae_u8 f32_9[] =
3121 +    {0x89,0xf6,                                                         /* movl %esi,%esi               */
3122 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3123 +  static const uae_u8 f32_10[] =
3124 +    {0x8d,0x76,0x00,                                            /* leal 0(%esi),%esi    */
3125 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3126 +  static const uae_u8 f32_11[] =
3127 +    {0x8d,0x74,0x26,0x00,                                       /* leal 0(%esi,1),%esi  */
3128 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3129 +  static const uae_u8 f32_12[] =
3130 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3131 +     0x8d,0xbf,0x00,0x00,0x00,0x00};            /* leal 0L(%edi),%edi   */
3132 +  static const uae_u8 f32_13[] =
3133 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3134 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3135 +  static const uae_u8 f32_14[] =
3136 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00,        /* leal 0L(%esi,1),%esi */
3137 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3138 +  static const uae_u8 f32_15[] =
3139 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3140 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3141 +  static const uae_u8 f32_16[] =
3142 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3143 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3144 +  static const uae_u8 *const f32_patt[] = {
3145 +    f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3146 +    f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3147 +  };
3148 +  static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3149 +
3150 + #if defined(__x86_64__)
3151 +  /* The recommended way to pad 64bit code is to use NOPs preceded by
3152 +     maximally four 0x66 prefixes.  Balance the size of nops.  */
3153 +  if (nbytes == 0)
3154 +          return;
3155 +
3156 +  int i;
3157 +  int nnops = (nbytes + 3) / 4;
3158 +  int len = nbytes / nnops;
3159 +  int remains = nbytes - nnops * len;
3160 +
3161 +  for (i = 0; i < remains; i++) {
3162 +          emit_block(prefixes, len);
3163 +          raw_nop();
3164 +  }
3165 +  for (; i < nnops; i++) {
3166 +          emit_block(prefixes, len - 1);
3167 +          raw_nop();
3168 +  }
3169 + #else
3170 +  int nloops = nbytes / 16;
3171 +  while (nloops-- > 0)
3172 +        emit_block(f32_16, sizeof(f32_16));
3173 +
3174 +  nbytes %= 16;
3175 +  if (nbytes)
3176 +        emit_block(f32_patt[nbytes - 1], nbytes);
3177 + #endif
3178 + }
3179 +
3180  
3181   /*************************************************************************
3182   * Flag handling, to and fro UAE flag register                           *
# Line 2068 | Line 3190 | static __inline__ void raw_flags_to_reg(
3190   {
3191    raw_lahf(0);  /* Most flags in AH */
3192    //raw_setcc(r,0); /* V flag in AL */
3193 <  raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
3193 >  raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3194    
3195   #if 1   /* Let's avoid those nasty partial register stalls */
3196 <  //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
3197 <  raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
3196 >  //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3197 >  raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3198    //live.state[FLAGTMP].status=CLEAN;
3199    live.state[FLAGTMP].status=INMEM;
3200    live.state[FLAGTMP].realreg=-1;
# Line 2099 | Line 3221 | static __inline__ void raw_flags_to_reg(
3221   {
3222          raw_pushfl();
3223          raw_pop_l_r(r);
3224 <        raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
3224 >        raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3225   //      live.state[FLAGTMP].status=CLEAN;
3226          live.state[FLAGTMP].status=INMEM;
3227          live.state[FLAGTMP].realreg=-1;
# Line 2125 | Line 3247 | static __inline__ void raw_reg_to_flags(
3247   static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3248   {
3249   #if 1
3250 <    raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3250 >    raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3251   #else
3252 <    raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3253 <    raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
3252 >    raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3253 >    raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3254   #endif
3255   }
3256  
# Line 2136 | Line 3258 | static __inline__ void raw_load_flagreg(
3258   static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3259   {
3260      if (live.nat[target].canbyte)
3261 <        raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3261 >        raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3262      else if (live.nat[target].canword)
3263 <        raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
3263 >        raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3264      else
3265 <        raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3265 >        raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3266   }
3267  
3268 + #define NATIVE_FLAG_Z 0x40
3269 + static __inline__ void raw_flags_set_zero(int f, int r, int t)
3270 + {
3271 +        // FIXME: this is really suboptimal
3272 +        raw_pushfl();
3273 +        raw_pop_l_r(f);
3274 +        raw_and_l_ri(f,~NATIVE_FLAG_Z);
3275 +        raw_test_l_rr(r,r);
3276 +        raw_mov_l_ri(r,0);
3277 +        raw_mov_l_ri(t,NATIVE_FLAG_Z);
3278 +        raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
3279 +        raw_or_l(f,r);
3280 +        raw_push_l_r(f);
3281 +        raw_popfl();
3282 + }
3283  
3284   static __inline__ void raw_inc_sp(int off)
3285   {
# Line 2305 | Line 3442 | static void vec(int x, struct sigcontext
3442                  for (i=0;i<5;i++)
3443                      vecbuf[i]=target[i];
3444                  emit_byte(0xe9);
3445 <                emit_long((uae_u32)veccode-(uae_u32)target-4);
3445 >                emit_long((uintptr)veccode-(uintptr)target-4);
3446                  write_log("Create jump to %p\n",veccode);
3447              
3448                  write_log("Handled one access!\n");
# Line 2332 | Line 3469 | static void vec(int x, struct sigcontext
3469                  }
3470                  for (i=0;i<5;i++)
3471                      raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3472 <                raw_mov_l_mi((uae_u32)&in_handler,0);
3472 >                raw_mov_l_mi((uintptr)&in_handler,0);
3473                  emit_byte(0xe9);
3474 <                emit_long(sc.eip+len-(uae_u32)target-4);
3474 >                emit_long(sc.eip+len-(uintptr)target-4);
3475                  in_handler=1;
3476                  target=tmp;
3477              }
# Line 2429 | Line 3566 | enum {
3566    X86_PROCESSOR_K6,
3567    X86_PROCESSOR_ATHLON,
3568    X86_PROCESSOR_PENTIUM4,
3569 +  X86_PROCESSOR_K8,
3570    X86_PROCESSOR_max
3571   };
3572  
# Line 2439 | Line 3577 | static const char * x86_processor_string
3577    "PentiumPro",
3578    "K6",
3579    "Athlon",
3580 <  "Pentium4"
3580 >  "Pentium4",
3581 >  "K8"
3582   };
3583  
3584   static struct ptt {
# Line 2456 | Line 3595 | x86_alignments[X86_PROCESSOR_max] = {
3595    { 16, 15, 16,  7, 16 },
3596    { 32,  7, 32,  7, 32 },
3597    { 16,  7, 16,  7, 16 },
3598 <  {  0,  0,  0,  0,  0 }
3598 >  {  0,  0,  0,  0,  0 },
3599 >  { 16,  7, 16,  7, 16 }
3600   };
3601  
3602   static void
# Line 2491 | Line 3631 | static void
3631   cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3632   {
3633    static uae_u8 cpuid_space[256];  
3634 +  static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3635    uae_u8* tmp=get_target();
3636  
3637 +  s_op = op;
3638    set_target(cpuid_space);
3639    raw_push_l_r(0); /* eax */
3640    raw_push_l_r(1); /* ecx */
3641    raw_push_l_r(2); /* edx */
3642    raw_push_l_r(3); /* ebx */
3643 <  raw_mov_l_rm(0,(uae_u32)&op);
3643 >  raw_mov_l_rm(0,(uintptr)&s_op);
3644    raw_cpuid(0);
3645 <  if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
3646 <  if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
3647 <  if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
3648 <  if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
3645 >  raw_mov_l_mr((uintptr)&s_eax,0);
3646 >  raw_mov_l_mr((uintptr)&s_ebx,3);
3647 >  raw_mov_l_mr((uintptr)&s_ecx,1);
3648 >  raw_mov_l_mr((uintptr)&s_edx,2);
3649    raw_pop_l_r(3);
3650    raw_pop_l_r(2);
3651    raw_pop_l_r(1);
# Line 2512 | Line 3654 | cpuid(uae_u32 op, uae_u32 *eax, uae_u32
3654    set_target(tmp);
3655  
3656    ((cpuop_func*)cpuid_space)(0);
3657 +  if (eax != NULL) *eax = s_eax;
3658 +  if (ebx != NULL) *ebx = s_ebx;
3659 +  if (ecx != NULL) *ecx = s_ecx;
3660 +  if (edx != NULL) *edx = s_edx;
3661   }
3662  
3663   static void
# Line 2520 | Line 3666 | raw_init_cpu(void)
3666    struct cpuinfo_x86 *c = &cpuinfo;
3667  
3668    /* Defaults */
3669 +  c->x86_processor = X86_PROCESSOR_max;
3670    c->x86_vendor = X86_VENDOR_UNKNOWN;
3671    c->cpuid_level = -1;                          /* CPUID not detected */
3672    c->x86_model = c->x86_mask = 0;       /* So far unknown... */
# Line 2555 | Line 3702 | raw_init_cpu(void)
3702          c->x86 = 4;
3703    }
3704  
3705 +  /* AMD-defined flags: level 0x80000001 */
3706 +  uae_u32 xlvl;
3707 +  cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3708 +  if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3709 +        if ( xlvl >= 0x80000001 ) {
3710 +          uae_u32 features;
3711 +          cpuid(0x80000001, NULL, NULL, NULL, &features);
3712 +          if (features & (1 << 29)) {
3713 +                /* Assume x86-64 if long mode is supported */
3714 +                c->x86_processor = X86_PROCESSOR_K8;
3715 +          }
3716 +        }
3717 +  }
3718 +          
3719    /* Canonicalize processor ID */
2559  c->x86_processor = X86_PROCESSOR_max;
3720    switch (c->x86) {
3721    case 3:
3722          c->x86_processor = X86_PROCESSOR_I386;
# Line 2578 | Line 3738 | raw_init_cpu(void)
3738          break;
3739    case 15:
3740          if (c->x86_vendor == X86_VENDOR_INTEL) {
3741 <          /*  Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
3741 >          /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3742            if (c->x86_brand_id >= 8)
3743                  c->x86_processor = X86_PROCESSOR_PENTIUM4;
3744          }
3745 +        if (c->x86_vendor == X86_VENDOR_AMD) {
3746 +          /* Assume an Athlon processor if family == 15 and it was not
3747 +             detected as an x86-64 so far */
3748 +          if (c->x86_processor == X86_PROCESSOR_max)
3749 +                c->x86_processor = X86_PROCESSOR_ATHLON;
3750 +        }
3751          break;
3752    }
3753    if (c->x86_processor == X86_PROCESSOR_max) {
# Line 2589 | Line 3755 | raw_init_cpu(void)
3755          fprintf(stderr, "  Family  : %d\n", c->x86);
3756          fprintf(stderr, "  Model   : %d\n", c->x86_model);
3757          fprintf(stderr, "  Mask    : %d\n", c->x86_mask);
3758 +        fprintf(stderr, "  Vendor  : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3759          if (c->x86_brand_id)
3760            fprintf(stderr, "  BrandID : %02x\n", c->x86_brand_id);
3761          abort();
3762    }
3763  
3764    /* Have CMOV support? */
3765 <  have_cmov = (c->x86_hwcap & (1 << 15)) && true;
3765 >  have_cmov = c->x86_hwcap & (1 << 15);
3766  
3767    /* Can the host CPU suffer from partial register stalls? */
3768    have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
# Line 2618 | Line 3785 | raw_init_cpu(void)
3785                          x86_processor_string_table[c->x86_processor]);
3786   }
3787  
3788 + static bool target_check_bsf(void)
3789 + {
3790 +        bool mismatch = false;
3791 +        for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3792 +        for (int g_CF = 0; g_CF <= 1; g_CF++) {
3793 +        for (int g_OF = 0; g_OF <= 1; g_OF++) {
3794 +        for (int g_SF = 0; g_SF <= 1; g_SF++) {
3795 +                for (int value = -1; value <= 1; value++) {
3796 +                        int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3797 +                        int tmp = value;
3798 +                        __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3799 +                                                                  : "+r" (flags), "+r" (tmp) : : "cc");
3800 +                        int OF = (flags >> 11) & 1;
3801 +                        int SF = (flags >>  7) & 1;
3802 +                        int ZF = (flags >>  6) & 1;
3803 +                        int CF = flags & 1;
3804 +                        tmp = (value == 0);
3805 +                        if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3806 +                                mismatch = true;
3807 +                }
3808 +        }}}}
3809 +        if (mismatch)
3810 +                write_log("Target CPU defines all flags on BSF instruction\n");
3811 +        return !mismatch;
3812 + }
3813 +
3814  
3815   /*************************************************************************
3816   * FPU stuff                                                             *
# Line 3052 | Line 4245 | LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d
4245      emit_byte(0xf0);  /* f2xm1 */
4246      emit_byte(0xdc);
4247      emit_byte(0x05);
4248 <    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
4248 >    emit_long((uintptr)&one);  /* Add '1' without using extra stack space */
4249      emit_byte(0xd9);
4250      emit_byte(0xfd);  /* and scale it */
4251      emit_byte(0xdd);
# Line 3086 | Line 4279 | LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d,
4279      emit_byte(0xf0);  /* f2xm1 */
4280      emit_byte(0xdc);
4281      emit_byte(0x05);
4282 <    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
4282 >    emit_long((uintptr)&one);  /* Add '1' without using extra stack space */
4283      emit_byte(0xd9);
4284      emit_byte(0xfd);  /* and scale it */
4285      emit_byte(0xdd);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines