ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp (file contents):
Revision 1.6 by gbeauche, 2002-10-03T16:13:46Z vs.
Revision 1.21 by gbeauche, 2004-11-01T17:12:55Z

# Line 3 | Line 3
3   *
4   *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5   *
6 < *  Adaptation for Basilisk II and improvements, copyright 2000-2002
6 > *  Adaptation for Basilisk II and improvements, copyright 2000-2004
7   *    Gwenole Beauchesne
8   *
9 < *  Basilisk II (C) 1997-2002 Christian Bauer
9 > *  Basilisk II (C) 1997-2004 Christian Bauer
10 > *
11 > *  Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12   *  
13   *  This program is free software; you can redistribute it and/or modify
14   *  it under the terms of the GNU General Public License as published by
# Line 40 | Line 42
42   #define EBP_INDEX 5
43   #define ESI_INDEX 6
44   #define EDI_INDEX 7
45 + #if defined(__x86_64__)
46 + #define R8_INDEX  8
47 + #define R9_INDEX  9
48 + #define R10_INDEX 10
49 + #define R11_INDEX 11
50 + #define R12_INDEX 12
51 + #define R13_INDEX 13
52 + #define R14_INDEX 14
53 + #define R15_INDEX 15
54 + #endif
55  
56   /* The register in which subroutines return an integer return value */
57 < #define REG_RESULT 0
57 > #define REG_RESULT EAX_INDEX
58  
59   /* The registers subroutines take their first and second argument in */
60   #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61   /* Handle the _fastcall parameters of ECX and EDX */
62 < #define REG_PAR1 1
63 < #define REG_PAR2 2
62 > #define REG_PAR1 ECX_INDEX
63 > #define REG_PAR2 EDX_INDEX
64 > #elif defined(__x86_64__)
65 > #define REG_PAR1 EDI_INDEX
66 > #define REG_PAR2 ESI_INDEX
67   #else
68 < #define REG_PAR1 0
69 < #define REG_PAR2 2
68 > #define REG_PAR1 EAX_INDEX
69 > #define REG_PAR2 EDX_INDEX
70   #endif
71  
72 < /* Three registers that are not used for any of the above */
58 < #define REG_NOPAR1 6
59 < #define REG_NOPAR2 5
60 < #define REG_NOPAR3 3
61 <
62 < #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
72 > #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73   #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 < #define REG_PC_TMP 0
74 > #define REG_PC_TMP EAX_INDEX
75   #else
76 < #define REG_PC_TMP 1 /* Another register that is not the above */
76 > #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77   #endif
78  
79 < #define SHIFTCOUNT_NREG 1  /* Register that can be used for shiftcount.
79 > #define SHIFTCOUNT_NREG ECX_INDEX  /* Register that can be used for shiftcount.
80                                -1 if any reg will do */
81 < #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
82 < #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
81 > #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82 > #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83  
84   uae_s8 always_used[]={4,-1};
85 + #if defined(__x86_64__)
86 + uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87 + uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88 + #else
89   uae_s8 can_byte[]={0,1,2,3,-1};
90   uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 + #endif
92  
93 + #if USE_OPTIMIZED_CALLS
94 + /* Make sure interpretive core does not use cpuopti */
95 + uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 + #error FIXME: code not ready
97 + #else
98   /* cpuopti mutate instruction handlers to assume registers are saved
99     by the caller */
100 < uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
100 > uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 > #endif
102  
103   /* This *should* be the same as call_saved. But:
104     - We might not really know which registers are saved, and which aren't,
# Line 86 | Line 107 | uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
107     - Special registers (such like the stack pointer) should not be "preserved"
108       by pushing, even though they are "saved" across function calls
109   */
110 < uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
110 > #if defined(__x86_64__)
111 > /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
112 > static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1};
113 > #else
114 > static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
115 > #endif
116  
117   /* Whether classes of instructions do or don't clobber the native flags */
118   #define CLOBBER_MOV
# Line 111 | Line 137 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
137   #define CLOBBER_TEST clobber_flags()
138   #define CLOBBER_CL16
139   #define CLOBBER_CL8  
140 + #define CLOBBER_SE32
141   #define CLOBBER_SE16
142   #define CLOBBER_SE8
143 + #define CLOBBER_ZE32
144   #define CLOBBER_ZE16
145   #define CLOBBER_ZE8
146   #define CLOBBER_SW16 clobber_flags()
# Line 122 | Line 150 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
150   #define CLOBBER_BT   clobber_flags()
151   #define CLOBBER_BSF  clobber_flags()
152  
153 + /* FIXME: disabled until that's proofread.  */
154 + #if defined(__x86_64__)
155 + #define USE_NEW_RTASM 1
156 + #endif
157 +
158 + #if USE_NEW_RTASM
159 +
160 + #if defined(__x86_64__)
161 + #define X86_TARGET_64BIT                1
162 + #endif
163 + #define X86_FLAT_REGISTERS              0
164 + #define X86_OPTIMIZE_ALU                1
165 + #define X86_OPTIMIZE_ROTSHI             1
166 + #include "codegen_x86.h"
167 +
168 + #define x86_emit_byte(B)                emit_byte(B)
169 + #define x86_emit_word(W)                emit_word(W)
170 + #define x86_emit_long(L)                emit_long(L)
171 + #define x86_emit_quad(Q)                emit_quad(Q)
172 + #define x86_get_target()                get_target()
173 + #define x86_emit_failure(MSG)   jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
174 +
175 + static void jit_fail(const char *msg, const char *file, int line, const char *function)
176 + {
177 +        fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
178 +                        function, file, line, msg);
179 +        abort();
180 + }
181 +
182 + LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
183 + {
184 + #if defined(__x86_64__)
185 +        PUSHQr(r);
186 + #else
187 +        PUSHLr(r);
188 + #endif
189 + }
190 + LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
191 +
192 + LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
193 + {
194 + #if defined(__x86_64__)
195 +        POPQr(r);
196 + #else
197 +        POPLr(r);
198 + #endif
199 + }
200 + LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
201 +
202 + LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
203 + {
204 +        BTLir(i, r);
205 + }
206 + LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
207 +
208 + LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
209 + {
210 +        BTLrr(b, r);
211 + }
212 + LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
213 +
214 + LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
215 + {
216 +        BTCLir(i, r);
217 + }
218 + LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
219 +
220 + LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
221 + {
222 +        BTCLrr(b, r);
223 + }
224 + LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
225 +
226 + LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
227 + {
228 +        BTRLir(i, r);
229 + }
230 + LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
231 +
232 + LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
233 + {
234 +        BTRLrr(b, r);
235 + }
236 + LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
237 +
238 + LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
239 + {
240 +        BTSLir(i, r);
241 + }
242 + LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
243 +
244 + LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
245 + {
246 +        BTSLrr(b, r);
247 + }
248 + LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
249 +
250 + LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
251 + {
252 +        SUBWir(i, d);
253 + }
254 + LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
255 +
256 + LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
257 + {
258 +        MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
259 + }
260 + LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
261 +
262 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
263 + {
264 +        MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
265 + }
266 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
267 +
268 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
269 + {
270 +        MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
271 + }
272 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
273 +
274 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
275 + {
276 +        MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
277 + }
278 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
279 +
280 + LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
281 + {
282 +        ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
283 + }
284 + LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
285 +
286 + LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
287 + {
288 +        ROLBir(i, r);
289 + }
290 + LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
291 +
292 + LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
293 + {
294 +        ROLWir(i, r);
295 + }
296 + LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
297 +
298 + LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
299 + {
300 +        ROLLir(i, r);
301 + }
302 + LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
303 +
304 + LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
305 + {
306 +        ROLLrr(r, d);
307 + }
308 + LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
309 +
310 + LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
311 + {
312 +        ROLWrr(r, d);
313 + }
314 + LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
315 +
316 + LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
317 + {
318 +        ROLBrr(r, d);
319 + }
320 + LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
321 +
322 + LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
323 + {
324 +        SHLLrr(r, d);
325 + }
326 + LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
327 +
328 + LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
329 + {
330 +        SHLWrr(r, d);
331 + }
332 + LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
333 +
334 + LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
335 + {
336 +        SHLBrr(r, d);
337 + }
338 + LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
339 +
340 + LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
341 + {
342 +        RORBir(i, r);
343 + }
344 + LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
345 +
346 + LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
347 + {
348 +        RORWir(i, r);
349 + }
350 + LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
351 +
352 + LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
353 + {
354 +        ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
355 + }
356 + LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
357 +
358 + LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
359 + {
360 +        RORLir(i, r);
361 + }
362 + LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
363 +
364 + LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
365 + {
366 +        RORLrr(r, d);
367 + }
368 + LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
369 +
370 + LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
371 + {
372 +        RORWrr(r, d);
373 + }
374 + LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
375 +
376 + LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
377 + {
378 +        RORBrr(r, d);
379 + }
380 + LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
381 +
382 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
383 + {
384 +        SHRLrr(r, d);
385 + }
386 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
387 +
388 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
389 + {
390 +        SHRWrr(r, d);
391 + }
392 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
393 +
394 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
395 + {
396 +        SHRBrr(r, d);
397 + }
398 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
399 +
400 + LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
401 + {
402 +        SARLrr(r, d);
403 + }
404 + LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
405 +
406 + LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
407 + {
408 +        SARWrr(r, d);
409 + }
410 + LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
411 +
412 + LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
413 + {
414 +        SARBrr(r, d);
415 + }
416 + LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
417 +
418 + LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
419 + {
420 +        SHLLir(i, r);
421 + }
422 + LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
423 +
424 + LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
425 + {
426 +        SHLWir(i, r);
427 + }
428 + LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
429 +
430 + LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
431 + {
432 +        SHLBir(i, r);
433 + }
434 + LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
435 +
436 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
437 + {
438 +        SHRLir(i, r);
439 + }
440 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
441 +
442 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
443 + {
444 +        SHRWir(i, r);
445 + }
446 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
447 +
448 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
449 + {
450 +        SHRBir(i, r);
451 + }
452 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
453 +
454 + LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
455 + {
456 +        SARLir(i, r);
457 + }
458 + LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
459 +
460 + LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
461 + {
462 +        SARWir(i, r);
463 + }
464 + LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
465 +
466 + LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
467 + {
468 +        SARBir(i, r);
469 + }
470 + LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
471 +
472 + LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
473 + {
474 +        SAHF();
475 + }
476 + LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
477 +
478 + LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
479 + {
480 +        CPUID();
481 + }
482 + LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
483 +
484 + LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
485 + {
486 +        LAHF();
487 + }
488 + LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
489 +
490 + LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
491 + {
492 +        SETCCir(cc, d);
493 + }
494 + LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
495 +
496 + LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
497 + {
498 +        SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
499 + }
500 + LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
501 +
502 + LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
503 + {
504 +        if (have_cmov)
505 +                CMOVLrr(cc, s, d);
506 +        else { /* replacement using branch and mov */
507 + #if defined(__x86_64__)
508 +                write_log("x86-64 implementations are bound to have CMOV!\n");
509 +                abort();
510 + #endif
511 +                JCCSii(cc^1, 2);
512 +                MOVLrr(s, d);
513 +        }
514 + }
515 + LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
516 +
517 + LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
518 + {
519 +        BSFLrr(s, d);
520 + }
521 + LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
522 +
523 + LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
524 + {
525 +        MOVSLQrr(s, d);
526 + }
527 + LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
528 +
529 + LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
530 + {
531 +        MOVSWLrr(s, d);
532 + }
533 + LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
534 +
535 + LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
536 + {
537 +        MOVSBLrr(s, d);
538 + }
539 + LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
540 +
541 + LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
542 + {
543 +        MOVZWLrr(s, d);
544 + }
545 + LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
546 +
547 + LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
548 + {
549 +        MOVZBLrr(s, d);
550 + }
551 + LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
552 +
553 + LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
554 + {
555 +        IMULLrr(s, d);
556 + }
557 + LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
558 +
559 + LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
560 + {
561 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
562 +        write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
563 +        abort();
564 +        }
565 +        IMULLr(s);
566 + }
567 + LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
568 +
569 + LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
570 + {
571 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
572 +        write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
573 +        abort();
574 +        }
575 +        MULLr(s);
576 + }
577 + LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
578 +
579 + LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
580 + {
581 +        abort(); /* %^$&%^$%#^ x86! */
582 + }
583 + LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
584 +
585 + LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
586 + {
587 +        MOVBrr(s, d);
588 + }
589 + LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
590 +
591 + LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
592 + {
593 +        MOVWrr(s, d);
594 + }
595 + LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
596 +
597 + LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
598 + {
599 +        MOVLmr(0, baser, index, factor, d);
600 + }
601 + LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
602 +
603 + LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
604 + {
605 +        MOVWmr(0, baser, index, factor, d);
606 + }
607 + LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
608 +
609 + LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
610 + {
611 +        MOVBmr(0, baser, index, factor, d);
612 + }
613 + LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
614 +
615 + LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
616 + {
617 +        MOVLrm(s, 0, baser, index, factor);
618 + }
619 + LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
620 +
621 + LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
622 + {
623 +        MOVWrm(s, 0, baser, index, factor);
624 + }
625 + LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
626 +
627 + LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
628 + {
629 +        MOVBrm(s, 0, baser, index, factor);
630 + }
631 + LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
632 +
633 + LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
634 + {
635 +        MOVLrm(s, base, baser, index, factor);
636 + }
637 + LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
638 +
639 + LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
640 + {
641 +        MOVWrm(s, base, baser, index, factor);
642 + }
643 + LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
644 +
645 + LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
646 + {
647 +        MOVBrm(s, base, baser, index, factor);
648 + }
649 + LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
650 +
651 + LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
652 + {
653 +        MOVLmr(base, baser, index, factor, d);
654 + }
655 + LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
656 +
657 + LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
658 + {
659 +        MOVWmr(base, baser, index, factor, d);
660 + }
661 + LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
662 +
663 + LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
664 + {
665 +        MOVBmr(base, baser, index, factor, d);
666 + }
667 + LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
668 +
669 + LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
670 + {
671 +        MOVLmr(base, X86_NOREG, index, factor, d);
672 + }
673 + LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
674 +
675 + LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
676 + {
677 +        if (have_cmov)
678 +                CMOVLmr(cond, base, X86_NOREG, index, factor, d);
679 +        else { /* replacement using branch and mov */
680 + #if defined(__x86_64__)
681 +                write_log("x86-64 implementations are bound to have CMOV!\n");
682 +                abort();
683 + #endif
684 +                JCCSii(cond^1, 7);
685 +                MOVLmr(base, X86_NOREG, index, factor, d);
686 +        }
687 + }
688 + LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
689 +
690 + LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
691 + {
692 +        if (have_cmov)
693 +                CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
694 +        else { /* replacement using branch and mov */
695 + #if defined(__x86_64__)
696 +                write_log("x86-64 implementations are bound to have CMOV!\n");
697 +                abort();
698 + #endif
699 +                JCCSii(cond^1, 6);
700 +                MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
701 +        }
702 + }
703 + LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
704 +
705 + LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
706 + {
707 +        MOVLmr(offset, s, X86_NOREG, 1, d);
708 + }
709 + LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
710 +
711 + LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
712 + {
713 +        MOVWmr(offset, s, X86_NOREG, 1, d);
714 + }
715 + LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
716 +
717 + LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
718 + {
719 +        MOVBmr(offset, s, X86_NOREG, 1, d);
720 + }
721 + LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
722 +
723 + LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
724 + {
725 +        MOVLmr(offset, s, X86_NOREG, 1, d);
726 + }
727 + LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
728 +
729 + LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
730 + {
731 +        MOVWmr(offset, s, X86_NOREG, 1, d);
732 + }
733 + LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
734 +
735 + LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
736 + {
737 +        MOVBmr(offset, s, X86_NOREG, 1, d);
738 + }
739 + LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
740 +
741 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
742 + {
743 +        MOVLim(i, offset, d, X86_NOREG, 1);
744 + }
745 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
746 +
747 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
748 + {
749 +        MOVWim(i, offset, d, X86_NOREG, 1);
750 + }
751 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
752 +
753 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
754 + {
755 +        MOVBim(i, offset, d, X86_NOREG, 1);
756 + }
757 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
758 +
759 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
760 + {
761 +        MOVLrm(s, offset, d, X86_NOREG, 1);
762 + }
763 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
764 +
765 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
766 + {
767 +        MOVWrm(s, offset, d, X86_NOREG, 1);
768 + }
769 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
770 +
771 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
772 + {
773 +        MOVBrm(s, offset, d, X86_NOREG, 1);
774 + }
775 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
776 +
777 + LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
778 + {
779 +        LEALmr(offset, s, X86_NOREG, 1, d);
780 + }
781 + LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
782 +
783 + LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
784 + {
785 +        LEALmr(offset, s, index, factor, d);
786 + }
787 + LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
788 +
789 + LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
790 + {
791 +        LEALmr(0, s, index, factor, d);
792 + }
793 + LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
794 +
795 + LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
796 + {
797 +        MOVLrm(s, offset, d, X86_NOREG, 1);
798 + }
799 + LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
800 +
801 + LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
802 + {
803 +        MOVWrm(s, offset, d, X86_NOREG, 1);
804 + }
805 + LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
806 +
807 + LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
808 + {
809 +        MOVBrm(s, offset, d, X86_NOREG, 1);
810 + }
811 + LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
812 +
813 + LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
814 + {
815 +        BSWAPLr(r);
816 + }
817 + LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
818 +
819 + LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
820 + {
821 +        ROLWir(8, r);
822 + }
823 + LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
824 +
825 + LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
826 + {
827 +        MOVLrr(s, d);
828 + }
829 + LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
830 +
831 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
832 + {
833 +        MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
834 + }
835 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
836 +
837 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
838 + {
839 +        MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
840 + }
841 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
842 +
843 + LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
844 + {
845 +        MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
846 + }
847 + LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
848 +
849 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
850 + {
851 +        MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
852 + }
853 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
854 +
855 + LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
856 + {
857 +        MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
858 + }
859 + LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
860 +
861 + LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
862 + {
863 +        MOVLir(s, d);
864 + }
865 + LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
866 +
867 + LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
868 + {
869 +        MOVWir(s, d);
870 + }
871 + LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
872 +
873 + LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
874 + {
875 +        MOVBir(s, d);
876 + }
877 + LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
878 +
879 + LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
880 + {
881 +        ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
882 + }
883 + LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
884 +
885 + LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
886 + {
887 +        ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
888 + }
889 + LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
890 +
891 + LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
892 + {
893 +        ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
894 + }
895 + LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
896 +
897 + LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
898 + {
899 +        ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
900 + }
901 + LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
902 +
903 + LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
904 + {
905 +        TESTLir(i, d);
906 + }
907 + LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
908 +
909 + LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
910 + {
911 +        TESTLrr(s, d);
912 + }
913 + LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
914 +
915 + LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
916 + {
917 +        TESTWrr(s, d);
918 + }
919 + LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
920 +
921 + LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
922 + {
923 +        TESTBrr(s, d);
924 + }
925 + LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
926 +
927 + LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
928 + {
929 +        ANDLir(i, d);
930 + }
931 + LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
932 +
933 + LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
934 + {
935 +        ANDWir(i, d);
936 + }
937 + LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
938 +
939 + LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
940 + {
941 +        ANDLrr(s, d);
942 + }
943 + LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
944 +
945 + LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
946 + {
947 +        ANDWrr(s, d);
948 + }
949 + LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
950 +
951 + LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
952 + {
953 +        ANDBrr(s, d);
954 + }
955 + LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
956 +
957 + LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
958 + {
959 +        ORLir(i, d);
960 + }
961 + LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
962 +
963 + LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
964 + {
965 +        ORLrr(s, d);
966 + }
967 + LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
968 +
969 + LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
970 + {
971 +        ORWrr(s, d);
972 + }
973 + LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
974 +
975 + LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
976 + {
977 +        ORBrr(s, d);
978 + }
979 + LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
980 +
981 + LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
982 + {
983 +        ADCLrr(s, d);
984 + }
985 + LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
986 +
987 + LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
988 + {
989 +        ADCWrr(s, d);
990 + }
991 + LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
992 +
993 + LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
994 + {
995 +        ADCBrr(s, d);
996 + }
997 + LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
998 +
999 + LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1000 + {
1001 +        ADDLrr(s, d);
1002 + }
1003 + LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1004 +
1005 + LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1006 + {
1007 +        ADDWrr(s, d);
1008 + }
1009 + LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1010 +
1011 + LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1012 + {
1013 +        ADDBrr(s, d);
1014 + }
1015 + LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1016 +
1017 + LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1018 + {
1019 +        SUBLir(i, d);
1020 + }
1021 + LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1022 +
1023 + LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1024 + {
1025 +        SUBBir(i, d);
1026 + }
1027 + LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1028 +
1029 + LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1030 + {
1031 +        ADDLir(i, d);
1032 + }
1033 + LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1034 +
1035 + LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1036 + {
1037 +        ADDWir(i, d);
1038 + }
1039 + LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1040 +
1041 + LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1042 + {
1043 +        ADDBir(i, d);
1044 + }
1045 + LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1046 +
1047 + LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1048 + {
1049 +        SBBLrr(s, d);
1050 + }
1051 + LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1052 +
1053 + LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1054 + {
1055 +        SBBWrr(s, d);
1056 + }
1057 + LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1058 +
1059 + LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1060 + {
1061 +        SBBBrr(s, d);
1062 + }
1063 + LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1064 +
1065 + LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1066 + {
1067 +        SUBLrr(s, d);
1068 + }
1069 + LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1070 +
1071 + LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1072 + {
1073 +        SUBWrr(s, d);
1074 + }
1075 + LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1076 +
1077 + LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1078 + {
1079 +        SUBBrr(s, d);
1080 + }
1081 + LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1082 +
1083 + LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1084 + {
1085 +        CMPLrr(s, d);
1086 + }
1087 + LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1088 +
1089 + LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1090 + {
1091 +        CMPLir(i, r);
1092 + }
1093 + LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1094 +
1095 + LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1096 + {
1097 +        CMPWrr(s, d);
1098 + }
1099 + LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1100 +
1101 + LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1102 + {
1103 +        CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1104 + }
1105 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1106 +
1107 + LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1108 + {
1109 +        CMPBir(i, d);
1110 + }
1111 + LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1112 +
1113 + LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1114 + {
1115 +        CMPBrr(s, d);
1116 + }
1117 + LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1118 +
1119 + LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1120 + {
1121 +        CMPLmr(offset, X86_NOREG, index, factor, d);
1122 + }
1123 + LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1124 +
1125 + LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1126 + {
1127 +        XORLrr(s, d);
1128 + }
1129 + LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1130 +
1131 + LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1132 + {
1133 +        XORWrr(s, d);
1134 + }
1135 + LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1136 +
1137 + LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1138 + {
1139 +        XORBrr(s, d);
1140 + }
1141 + LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1142 +
1143 + LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1144 + {
1145 +        SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1146 + }
1147 + LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1148 +
1149 + LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1150 + {
1151 +        CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1152 + }
1153 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1154 +
1155 + LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1156 + {
1157 +        XCHGLrr(r2, r1);
1158 + }
1159 + LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1160 +
1161 + LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1162 + {
1163 +        PUSHF();
1164 + }
1165 + LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1166 +
1167 + LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1168 + {
1169 +        POPF();
1170 + }
1171 + LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1172 +
1173 + #else
1174 +
1175   const bool optimize_accum               = true;
1176   const bool optimize_imm8                = true;
1177   const bool optimize_shift_once  = true;
# Line 1071 | Line 2121 | LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d
2121  
2122   LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2123   {
2124 +        Dif(!isbyte(offset)) abort();
2125      emit_byte(0x8b);
2126      emit_byte(0x40+8*d+s);
2127      emit_byte(offset);
# Line 1079 | Line 2130 | LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d,
2130  
2131   LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2132   {
2133 +        Dif(!isbyte(offset)) abort();
2134      emit_byte(0x66);
2135      emit_byte(0x8b);
2136      emit_byte(0x40+8*d+s);
# Line 1088 | Line 2140 | LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d,
2140  
2141   LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2142   {
2143 +        Dif(!isbyte(offset)) abort();
2144      emit_byte(0x8a);
2145      emit_byte(0x40+8*d+s);
2146      emit_byte(offset);
# Line 1121 | Line 2174 | LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d
2174  
2175   LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2176   {
2177 +        Dif(!isbyte(offset)) abort();
2178      emit_byte(0xc7);
2179      emit_byte(0x40+d);
2180      emit_byte(offset);
# Line 1130 | Line 2184 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d
2184  
2185   LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2186   {
2187 +        Dif(!isbyte(offset)) abort();
2188      emit_byte(0x66);
2189      emit_byte(0xc7);
2190      emit_byte(0x40+d);
# Line 1140 | Line 2195 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d
2195  
2196   LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2197   {
2198 +        Dif(!isbyte(offset)) abort();
2199      emit_byte(0xc6);
2200      emit_byte(0x40+d);
2201      emit_byte(offset);
# Line 1149 | Line 2205 | LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d
2205  
2206   LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2207   {
2208 +        Dif(!isbyte(offset)) abort();
2209      emit_byte(0x89);
2210      emit_byte(0x40+8*s+d);
2211      emit_byte(offset);
# Line 1157 | Line 2214 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d
2214  
2215   LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2216   {
2217 +        Dif(!isbyte(offset)) abort();
2218      emit_byte(0x66);
2219      emit_byte(0x89);
2220      emit_byte(0x40+8*s+d);
# Line 1166 | Line 2224 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d
2224  
2225   LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2226   {
2227 +        Dif(!isbyte(offset)) abort();
2228      emit_byte(0x88);
2229      emit_byte(0x40+8*s+d);
2230      emit_byte(offset);
# Line 1856 | Line 2915 | LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r
2915   LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2916  
2917   /*************************************************************************
1859 * FIXME: string-related instructions                                    *
1860 *************************************************************************/
1861
1862 LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1863 {
1864        emit_byte(0xfc);
1865 }
1866 LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1867
1868 LOWFUNC(WRITE,NONE,0,raw_std,(void))
1869 {
1870        emit_byte(0xfd);
1871 }
1872 LENDFUNC(WRITE,NONE,0,raw_std,(void))
1873
1874 LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1875 {
1876        emit_byte(0xa4);
1877 }
1878 LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1879
1880 LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1881 {
1882        emit_byte(0xa5);
1883 }
1884 LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1885
1886 LOWFUNC(NONE,RMW,0,raw_rep,(void))
1887 {
1888        emit_byte(0xf3);
1889 }
1890 LENDFUNC(NONE,RMW,0,raw_rep,(void))
1891
1892 LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1893 {
1894        raw_rep();
1895        raw_movs_b();
1896 }
1897 LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1898
1899 LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1900 {
1901        raw_rep();
1902        raw_movs_l();
1903 }
1904 LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1905
1906 /*************************************************************************
2918   * FIXME: mem access modes probably wrong                                *
2919   *************************************************************************/
2920  
# Line 1919 | Line 2930 | LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2930   }
2931   LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2932  
2933 + #endif
2934 +
2935   /*************************************************************************
2936   * Unoptimizable stuff --- jump                                          *
2937   *************************************************************************/
2938  
2939   static __inline__ void raw_call_r(R4 r)
2940   {
2941 + #if USE_NEW_RTASM
2942 +    CALLsr(r);
2943 + #else
2944      emit_byte(0xff);
2945      emit_byte(0xd0+r);
2946 + #endif
2947   }
2948  
2949   static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2950   {
2951 + #if USE_NEW_RTASM
2952 +    CALLsm(base, X86_NOREG, r, m);
2953 + #else
2954      int mu;
2955      switch(m) {
2956       case 1: mu=0; break;
# Line 1943 | Line 2963 | static __inline__ void raw_call_m_indexe
2963      emit_byte(0x14);
2964      emit_byte(0x05+8*r+0x40*mu);
2965      emit_long(base);
2966 + #endif
2967   }
2968  
2969   static __inline__ void raw_jmp_r(R4 r)
2970   {
2971 + #if USE_NEW_RTASM
2972 +    JMPsr(r);
2973 + #else
2974      emit_byte(0xff);
2975      emit_byte(0xe0+r);
2976 + #endif
2977   }
2978  
2979   static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2980   {
2981 + #if USE_NEW_RTASM
2982 +    JMPsm(base, X86_NOREG, r, m);
2983 + #else
2984      int mu;
2985      switch(m) {
2986       case 1: mu=0; break;
# Line 1965 | Line 2993 | static __inline__ void raw_jmp_m_indexed
2993      emit_byte(0x24);
2994      emit_byte(0x05+8*r+0x40*mu);
2995      emit_long(base);
2996 + #endif
2997   }
2998  
2999   static __inline__ void raw_jmp_m(uae_u32 base)
# Line 1977 | Line 3006 | static __inline__ void raw_jmp_m(uae_u32
3006  
3007   static __inline__ void raw_call(uae_u32 t)
3008   {
3009 + #if USE_NEW_RTASM
3010 +    CALLm(t);
3011 + #else
3012      emit_byte(0xe8);
3013      emit_long(t-(uae_u32)target-4);
3014 + #endif
3015   }
3016  
3017   static __inline__ void raw_jmp(uae_u32 t)
3018   {
3019 + #if USE_NEW_RTASM
3020 +    JMPm(t);
3021 + #else
3022      emit_byte(0xe9);
3023      emit_long(t-(uae_u32)target-4);
3024 + #endif
3025   }
3026  
3027   static __inline__ void raw_jl(uae_u32 t)
3028   {
3029      emit_byte(0x0f);
3030      emit_byte(0x8c);
3031 <    emit_long(t-(uae_u32)target-4);
3031 >    emit_long(t-(uintptr)target-4);
3032   }
3033  
3034   static __inline__ void raw_jz(uae_u32 t)
3035   {
3036      emit_byte(0x0f);
3037      emit_byte(0x84);
3038 <    emit_long(t-(uae_u32)target-4);
3038 >    emit_long(t-(uintptr)target-4);
3039   }
3040  
3041   static __inline__ void raw_jnz(uae_u32 t)
3042   {
3043      emit_byte(0x0f);
3044      emit_byte(0x85);
3045 <    emit_long(t-(uae_u32)target-4);
3045 >    emit_long(t-(uintptr)target-4);
3046   }
3047  
3048   static __inline__ void raw_jnz_l_oponly(void)
# Line 2055 | Line 3092 | static __inline__ void raw_nop(void)
3092      emit_byte(0x90);
3093   }
3094  
3095 + static __inline__ void raw_emit_nop_filler(int nbytes)
3096 + {
3097 +  /* Source: GNU Binutils 2.12.90.0.15 */
3098 +  /* Various efficient no-op patterns for aligning code labels.
3099 +     Note: Don't try to assemble the instructions in the comments.
3100 +     0L and 0w are not legal.  */
3101 +  static const uae_u8 f32_1[] =
3102 +    {0x90};                                                                     /* nop                                  */
3103 +  static const uae_u8 f32_2[] =
3104 +    {0x89,0xf6};                                                        /* movl %esi,%esi               */
3105 +  static const uae_u8 f32_3[] =
3106 +    {0x8d,0x76,0x00};                                           /* leal 0(%esi),%esi    */
3107 +  static const uae_u8 f32_4[] =
3108 +    {0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3109 +  static const uae_u8 f32_5[] =
3110 +    {0x90,                                                                      /* nop                                  */
3111 +     0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3112 +  static const uae_u8 f32_6[] =
3113 +    {0x8d,0xb6,0x00,0x00,0x00,0x00};            /* leal 0L(%esi),%esi   */
3114 +  static const uae_u8 f32_7[] =
3115 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3116 +  static const uae_u8 f32_8[] =
3117 +    {0x90,                                                                      /* nop                                  */
3118 +     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3119 +  static const uae_u8 f32_9[] =
3120 +    {0x89,0xf6,                                                         /* movl %esi,%esi               */
3121 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3122 +  static const uae_u8 f32_10[] =
3123 +    {0x8d,0x76,0x00,                                            /* leal 0(%esi),%esi    */
3124 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3125 +  static const uae_u8 f32_11[] =
3126 +    {0x8d,0x74,0x26,0x00,                                       /* leal 0(%esi,1),%esi  */
3127 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3128 +  static const uae_u8 f32_12[] =
3129 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3130 +     0x8d,0xbf,0x00,0x00,0x00,0x00};            /* leal 0L(%edi),%edi   */
3131 +  static const uae_u8 f32_13[] =
3132 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3133 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3134 +  static const uae_u8 f32_14[] =
3135 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00,        /* leal 0L(%esi,1),%esi */
3136 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3137 +  static const uae_u8 f32_15[] =
3138 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3139 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3140 +  static const uae_u8 f32_16[] =
3141 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3142 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3143 +  static const uae_u8 *const f32_patt[] = {
3144 +    f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3145 +    f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3146 +  };
3147 +  static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3148 +
3149 + #if defined(__x86_64__)
3150 +  /* The recommended way to pad 64bit code is to use NOPs preceded by
3151 +     maximally four 0x66 prefixes.  Balance the size of nops.  */
3152 +  if (nbytes == 0)
3153 +          return;
3154 +
3155 +  int i;
3156 +  int nnops = (nbytes + 3) / 4;
3157 +  int len = nbytes / nnops;
3158 +  int remains = nbytes - nnops * len;
3159 +
3160 +  for (i = 0; i < remains; i++) {
3161 +          emit_block(prefixes, len);
3162 +          raw_nop();
3163 +  }
3164 +  for (; i < nnops; i++) {
3165 +          emit_block(prefixes, len - 1);
3166 +          raw_nop();
3167 +  }
3168 + #else
3169 +  int nloops = nbytes / 16;
3170 +  while (nloops-- > 0)
3171 +        emit_block(f32_16, sizeof(f32_16));
3172 +
3173 +  nbytes %= 16;
3174 +  if (nbytes)
3175 +        emit_block(f32_patt[nbytes - 1], nbytes);
3176 + #endif
3177 + }
3178 +
3179  
3180   /*************************************************************************
3181   * Flag handling, to and fro UAE flag register                           *
# Line 2068 | Line 3189 | static __inline__ void raw_flags_to_reg(
3189   {
3190    raw_lahf(0);  /* Most flags in AH */
3191    //raw_setcc(r,0); /* V flag in AL */
3192 <  raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
3192 >  raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3193    
3194   #if 1   /* Let's avoid those nasty partial register stalls */
3195 <  //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
3196 <  raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
3195 >  //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3196 >  raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3197    //live.state[FLAGTMP].status=CLEAN;
3198    live.state[FLAGTMP].status=INMEM;
3199    live.state[FLAGTMP].realreg=-1;
# Line 2099 | Line 3220 | static __inline__ void raw_flags_to_reg(
3220   {
3221          raw_pushfl();
3222          raw_pop_l_r(r);
3223 <        raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
3223 >        raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3224   //      live.state[FLAGTMP].status=CLEAN;
3225          live.state[FLAGTMP].status=INMEM;
3226          live.state[FLAGTMP].realreg=-1;
# Line 2125 | Line 3246 | static __inline__ void raw_reg_to_flags(
3246   static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3247   {
3248   #if 1
3249 <    raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3249 >    raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3250   #else
3251 <    raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3252 <    raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
3251 >    raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3252 >    raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3253   #endif
3254   }
3255  
# Line 2136 | Line 3257 | static __inline__ void raw_load_flagreg(
3257   static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3258   {
3259      if (live.nat[target].canbyte)
3260 <        raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3260 >        raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3261      else if (live.nat[target].canword)
3262 <        raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
3262 >        raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3263      else
3264 <        raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3264 >        raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3265   }
3266  
3267 + #define NATIVE_FLAG_Z 0x40
3268 + static __inline__ void raw_flags_set_zero(int f, int r, int t)
3269 + {
3270 +        // FIXME: this is really suboptimal
3271 +        raw_pushfl();
3272 +        raw_pop_l_r(f);
3273 +        raw_and_l_ri(f,~NATIVE_FLAG_Z);
3274 +        raw_test_l_rr(r,r);
3275 +        raw_mov_l_ri(r,0);
3276 +        raw_mov_l_ri(t,NATIVE_FLAG_Z);
3277 +        raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
3278 +        raw_or_l(f,r);
3279 +        raw_push_l_r(f);
3280 +        raw_popfl();
3281 + }
3282  
3283   static __inline__ void raw_inc_sp(int off)
3284   {
# Line 2305 | Line 3441 | static void vec(int x, struct sigcontext
3441                  for (i=0;i<5;i++)
3442                      vecbuf[i]=target[i];
3443                  emit_byte(0xe9);
3444 <                emit_long((uae_u32)veccode-(uae_u32)target-4);
3444 >                emit_long((uintptr)veccode-(uintptr)target-4);
3445                  write_log("Create jump to %p\n",veccode);
3446              
3447                  write_log("Handled one access!\n");
# Line 2332 | Line 3468 | static void vec(int x, struct sigcontext
3468                  }
3469                  for (i=0;i<5;i++)
3470                      raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3471 <                raw_mov_l_mi((uae_u32)&in_handler,0);
3471 >                raw_mov_l_mi((uintptr)&in_handler,0);
3472                  emit_byte(0xe9);
3473 <                emit_long(sc.eip+len-(uae_u32)target-4);
3473 >                emit_long(sc.eip+len-(uintptr)target-4);
3474                  in_handler=1;
3475                  target=tmp;
3476              }
# Line 2429 | Line 3565 | enum {
3565    X86_PROCESSOR_K6,
3566    X86_PROCESSOR_ATHLON,
3567    X86_PROCESSOR_PENTIUM4,
3568 +  X86_PROCESSOR_K8,
3569    X86_PROCESSOR_max
3570   };
3571  
# Line 2439 | Line 3576 | static const char * x86_processor_string
3576    "PentiumPro",
3577    "K6",
3578    "Athlon",
3579 <  "Pentium4"
3579 >  "Pentium4",
3580 >  "K8"
3581   };
3582  
3583   static struct ptt {
# Line 2456 | Line 3594 | x86_alignments[X86_PROCESSOR_max] = {
3594    { 16, 15, 16,  7, 16 },
3595    { 32,  7, 32,  7, 32 },
3596    { 16,  7, 16,  7, 16 },
3597 <  {  0,  0,  0,  0,  0 }
3597 >  {  0,  0,  0,  0,  0 },
3598 >  { 16,  7, 16,  7, 16 }
3599   };
3600  
3601   static void
# Line 2491 | Line 3630 | static void
3630   cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3631   {
3632    static uae_u8 cpuid_space[256];  
3633 +  static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3634    uae_u8* tmp=get_target();
3635  
3636 +  s_op = op;
3637    set_target(cpuid_space);
3638    raw_push_l_r(0); /* eax */
3639    raw_push_l_r(1); /* ecx */
3640    raw_push_l_r(2); /* edx */
3641    raw_push_l_r(3); /* ebx */
3642 <  raw_mov_l_rm(0,(uae_u32)&op);
3642 >  raw_mov_l_rm(0,(uintptr)&s_op);
3643    raw_cpuid(0);
3644 <  if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
3645 <  if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
3646 <  if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
3647 <  if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
3644 >  raw_mov_l_mr((uintptr)&s_eax,0);
3645 >  raw_mov_l_mr((uintptr)&s_ebx,3);
3646 >  raw_mov_l_mr((uintptr)&s_ecx,1);
3647 >  raw_mov_l_mr((uintptr)&s_edx,2);
3648    raw_pop_l_r(3);
3649    raw_pop_l_r(2);
3650    raw_pop_l_r(1);
# Line 2512 | Line 3653 | cpuid(uae_u32 op, uae_u32 *eax, uae_u32
3653    set_target(tmp);
3654  
3655    ((cpuop_func*)cpuid_space)(0);
3656 +  if (eax != NULL) *eax = s_eax;
3657 +  if (ebx != NULL) *ebx = s_ebx;
3658 +  if (ecx != NULL) *ecx = s_ecx;
3659 +  if (edx != NULL) *edx = s_edx;
3660   }
3661  
3662   static void
# Line 2520 | Line 3665 | raw_init_cpu(void)
3665    struct cpuinfo_x86 *c = &cpuinfo;
3666  
3667    /* Defaults */
3668 +  c->x86_processor = X86_PROCESSOR_max;
3669    c->x86_vendor = X86_VENDOR_UNKNOWN;
3670    c->cpuid_level = -1;                          /* CPUID not detected */
3671    c->x86_model = c->x86_mask = 0;       /* So far unknown... */
# Line 2555 | Line 3701 | raw_init_cpu(void)
3701          c->x86 = 4;
3702    }
3703  
3704 +  /* AMD-defined flags: level 0x80000001 */
3705 +  uae_u32 xlvl;
3706 +  cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3707 +  if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3708 +        if ( xlvl >= 0x80000001 ) {
3709 +          uae_u32 features;
3710 +          cpuid(0x80000001, NULL, NULL, NULL, &features);
3711 +          if (features & (1 << 29)) {
3712 +                /* Assume x86-64 if long mode is supported */
3713 +                c->x86_processor = X86_PROCESSOR_K8;
3714 +          }
3715 +        }
3716 +  }
3717 +          
3718    /* Canonicalize processor ID */
2559  c->x86_processor = X86_PROCESSOR_max;
3719    switch (c->x86) {
3720    case 3:
3721          c->x86_processor = X86_PROCESSOR_I386;
# Line 2578 | Line 3737 | raw_init_cpu(void)
3737          break;
3738    case 15:
3739          if (c->x86_vendor == X86_VENDOR_INTEL) {
3740 <          /*  Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
3740 >          /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3741            if (c->x86_brand_id >= 8)
3742                  c->x86_processor = X86_PROCESSOR_PENTIUM4;
3743          }
3744 +        if (c->x86_vendor == X86_VENDOR_AMD) {
3745 +          /* Assume an Athlon processor if family == 15 and it was not
3746 +             detected as an x86-64 so far */
3747 +          if (c->x86_processor == X86_PROCESSOR_max)
3748 +                c->x86_processor = X86_PROCESSOR_ATHLON;
3749 +        }
3750          break;
3751    }
3752    if (c->x86_processor == X86_PROCESSOR_max) {
# Line 2589 | Line 3754 | raw_init_cpu(void)
3754          fprintf(stderr, "  Family  : %d\n", c->x86);
3755          fprintf(stderr, "  Model   : %d\n", c->x86_model);
3756          fprintf(stderr, "  Mask    : %d\n", c->x86_mask);
3757 +        fprintf(stderr, "  Vendor  : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3758          if (c->x86_brand_id)
3759            fprintf(stderr, "  BrandID : %02x\n", c->x86_brand_id);
3760          abort();
3761    }
3762  
3763    /* Have CMOV support? */
3764 <  have_cmov = (c->x86_hwcap & (1 << 15)) && true;
3764 >  have_cmov = c->x86_hwcap & (1 << 15);
3765  
3766    /* Can the host CPU suffer from partial register stalls? */
3767    have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
# Line 2618 | Line 3784 | raw_init_cpu(void)
3784                          x86_processor_string_table[c->x86_processor]);
3785   }
3786  
3787 + static bool target_check_bsf(void)
3788 + {
3789 +        bool mismatch = false;
3790 +        for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3791 +        for (int g_CF = 0; g_CF <= 1; g_CF++) {
3792 +        for (int g_OF = 0; g_OF <= 1; g_OF++) {
3793 +        for (int g_SF = 0; g_SF <= 1; g_SF++) {
3794 +                for (int value = -1; value <= 1; value++) {
3795 +                        int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3796 +                        int tmp = value;
3797 +                        __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3798 +                                                                  : "+r" (flags), "+r" (tmp) : : "cc");
3799 +                        int OF = (flags >> 11) & 1;
3800 +                        int SF = (flags >>  7) & 1;
3801 +                        int ZF = (flags >>  6) & 1;
3802 +                        int CF = flags & 1;
3803 +                        tmp = (value == 0);
3804 +                        if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3805 +                                mismatch = true;
3806 +                }
3807 +        }}}}
3808 +        if (mismatch)
3809 +                write_log("Target CPU defines all flags on BSF instruction\n");
3810 +        return !mismatch;
3811 + }
3812 +
3813  
3814   /*************************************************************************
3815   * FPU stuff                                                             *
# Line 3052 | Line 4244 | LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d
4244      emit_byte(0xf0);  /* f2xm1 */
4245      emit_byte(0xdc);
4246      emit_byte(0x05);
4247 <    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
4247 >    emit_long((uintptr)&one);  /* Add '1' without using extra stack space */
4248      emit_byte(0xd9);
4249      emit_byte(0xfd);  /* and scale it */
4250      emit_byte(0xdd);
# Line 3086 | Line 4278 | LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d,
4278      emit_byte(0xf0);  /* f2xm1 */
4279      emit_byte(0xdc);
4280      emit_byte(0x05);
4281 <    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
4281 >    emit_long((uintptr)&one);  /* Add '1' without using extra stack space */
4282      emit_byte(0xd9);
4283      emit_byte(0xfd);  /* and scale it */
4284      emit_byte(0xdd);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines