ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp (file contents):
Revision 1.6 by gbeauche, 2002-10-03T16:13:46Z vs.
Revision 1.36 by gbeauche, 2007-01-14T12:23:29Z

# Line 3 | Line 3
3   *
4   *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5   *
6 < *  Adaptation for Basilisk II and improvements, copyright 2000-2002
6 > *  Adaptation for Basilisk II and improvements, copyright 2000-2005
7   *    Gwenole Beauchesne
8   *
9 < *  Basilisk II (C) 1997-2002 Christian Bauer
9 > *  Basilisk II (C) 1997-2005 Christian Bauer
10 > *
11 > *  Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12   *  
13   *  This program is free software; you can redistribute it and/or modify
14   *  it under the terms of the GNU General Public License as published by
# Line 40 | Line 42
42   #define EBP_INDEX 5
43   #define ESI_INDEX 6
44   #define EDI_INDEX 7
45 + #if defined(__x86_64__)
46 + #define R8_INDEX  8
47 + #define R9_INDEX  9
48 + #define R10_INDEX 10
49 + #define R11_INDEX 11
50 + #define R12_INDEX 12
51 + #define R13_INDEX 13
52 + #define R14_INDEX 14
53 + #define R15_INDEX 15
54 + #endif
55 + /* XXX this has to match X86_Reg8H_Base + 4 */
56 + #define AH_INDEX (0x10+4+EAX_INDEX)
57 + #define CH_INDEX (0x10+4+ECX_INDEX)
58 + #define DH_INDEX (0x10+4+EDX_INDEX)
59 + #define BH_INDEX (0x10+4+EBX_INDEX)
60  
61   /* The register in which subroutines return an integer return value */
62 < #define REG_RESULT 0
62 > #define REG_RESULT EAX_INDEX
63  
64   /* The registers subroutines take their first and second argument in */
65   #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66   /* Handle the _fastcall parameters of ECX and EDX */
67 < #define REG_PAR1 1
68 < #define REG_PAR2 2
67 > #define REG_PAR1 ECX_INDEX
68 > #define REG_PAR2 EDX_INDEX
69 > #elif defined(__x86_64__)
70 > #define REG_PAR1 EDI_INDEX
71 > #define REG_PAR2 ESI_INDEX
72   #else
73 < #define REG_PAR1 0
74 < #define REG_PAR2 2
73 > #define REG_PAR1 EAX_INDEX
74 > #define REG_PAR2 EDX_INDEX
75   #endif
76  
77 < /* Three registers that are not used for any of the above */
58 < #define REG_NOPAR1 6
59 < #define REG_NOPAR2 5
60 < #define REG_NOPAR3 3
61 <
62 < #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
77 > #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
78   #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
79 < #define REG_PC_TMP 0
79 > #define REG_PC_TMP EAX_INDEX
80   #else
81 < #define REG_PC_TMP 1 /* Another register that is not the above */
81 > #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
82   #endif
83  
84 < #define SHIFTCOUNT_NREG 1  /* Register that can be used for shiftcount.
84 > #define SHIFTCOUNT_NREG ECX_INDEX  /* Register that can be used for shiftcount.
85                                -1 if any reg will do */
86 < #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
87 < #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
86 > #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
87 > #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
88 >
89 > #define STACK_ALIGN             16
90 > #define STACK_OFFSET    sizeof(void *)
91  
92   uae_s8 always_used[]={4,-1};
93 + #if defined(__x86_64__)
94 + uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
95 + uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
96 + #else
97   uae_s8 can_byte[]={0,1,2,3,-1};
98   uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
99 + #endif
100  
101 + #if USE_OPTIMIZED_CALLS
102 + /* Make sure interpretive core does not use cpuopti */
103 + uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
104 + #error FIXME: code not ready
105 + #else
106   /* cpuopti mutate instruction handlers to assume registers are saved
107     by the caller */
108 < uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
108 > uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
109 > #endif
110  
111   /* This *should* be the same as call_saved. But:
112     - We might not really know which registers are saved, and which aren't,
# Line 86 | Line 115 | uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
115     - Special registers (such like the stack pointer) should not be "preserved"
116       by pushing, even though they are "saved" across function calls
117   */
118 < uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
118 > #if defined(__x86_64__)
119 > /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
120 > /* preserve r11 because it's generally used to hold pointers to functions */
121 > static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
122 > #else
123 > /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
124 > static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
125 > #endif
126  
127   /* Whether classes of instructions do or don't clobber the native flags */
128   #define CLOBBER_MOV
# Line 111 | Line 147 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
147   #define CLOBBER_TEST clobber_flags()
148   #define CLOBBER_CL16
149   #define CLOBBER_CL8  
150 + #define CLOBBER_SE32
151   #define CLOBBER_SE16
152   #define CLOBBER_SE8
153 + #define CLOBBER_ZE32
154   #define CLOBBER_ZE16
155   #define CLOBBER_ZE8
156   #define CLOBBER_SW16 clobber_flags()
# Line 122 | Line 160 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
160   #define CLOBBER_BT   clobber_flags()
161   #define CLOBBER_BSF  clobber_flags()
162  
163 + /* FIXME: disabled until that's proofread.  */
164 + #if defined(__x86_64__)
165 + #define USE_NEW_RTASM 1
166 + #endif
167 +
168 + #if USE_NEW_RTASM
169 +
170 + #if defined(__x86_64__)
171 + #define X86_TARGET_64BIT                1
172 + /* The address override prefix causes a 5 cycles penalty on Intel Core
173 +   processors. Another solution would be to decompose the load in an LEA,
174 +   MOV (to zero-extend), MOV (from memory): is it better? */
175 + #define ADDR32                                  x86_emit_byte(0x67),
176 + #else
177 + #define ADDR32                                  /**/
178 + #endif
179 + #define X86_FLAT_REGISTERS              0
180 + #define X86_OPTIMIZE_ALU                1
181 + #define X86_OPTIMIZE_ROTSHI             1
182 + #include "codegen_x86.h"
183 +
184 + #define x86_emit_byte(B)                emit_byte(B)
185 + #define x86_emit_word(W)                emit_word(W)
186 + #define x86_emit_long(L)                emit_long(L)
187 + #define x86_emit_quad(Q)                emit_quad(Q)
188 + #define x86_get_target()                get_target()
189 + #define x86_emit_failure(MSG)   jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
190 +
191 + static void jit_fail(const char *msg, const char *file, int line, const char *function)
192 + {
193 +        fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
194 +                        function, file, line, msg);
195 +        abort();
196 + }
197 +
198 + LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
199 + {
200 + #if defined(__x86_64__)
201 +        PUSHQr(r);
202 + #else
203 +        PUSHLr(r);
204 + #endif
205 + }
206 + LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
207 +
208 + LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
209 + {
210 + #if defined(__x86_64__)
211 +        POPQr(r);
212 + #else
213 +        POPLr(r);
214 + #endif
215 + }
216 + LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
217 +
218 + LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
219 + {
220 + #if defined(__x86_64__)
221 +        POPQm(d, X86_NOREG, X86_NOREG, 1);
222 + #else
223 +        POPLm(d, X86_NOREG, X86_NOREG, 1);
224 + #endif
225 + }
226 + LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
227 +
228 + LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
229 + {
230 +        BTLir(i, r);
231 + }
232 + LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
233 +
234 + LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
235 + {
236 +        BTLrr(b, r);
237 + }
238 + LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
239 +
240 + LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
241 + {
242 +        BTCLir(i, r);
243 + }
244 + LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
245 +
246 + LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
247 + {
248 +        BTCLrr(b, r);
249 + }
250 + LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
251 +
252 + LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
253 + {
254 +        BTRLir(i, r);
255 + }
256 + LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
257 +
258 + LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
259 + {
260 +        BTRLrr(b, r);
261 + }
262 + LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
263 +
264 + LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
265 + {
266 +        BTSLir(i, r);
267 + }
268 + LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
269 +
270 + LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
271 + {
272 +        BTSLrr(b, r);
273 + }
274 + LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
275 +
276 + LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
277 + {
278 +        SUBWir(i, d);
279 + }
280 + LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
281 +
282 + LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
283 + {
284 +        MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
285 + }
286 + LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
287 +
288 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
289 + {
290 +        MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
291 + }
292 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
293 +
294 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
295 + {
296 +        MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
297 + }
298 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
299 +
300 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
301 + {
302 +        MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
303 + }
304 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
305 +
306 + LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
307 + {
308 +        ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
309 + }
310 + LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
311 +
312 + LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
313 + {
314 +        ROLBir(i, r);
315 + }
316 + LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
317 +
318 + LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
319 + {
320 +        ROLWir(i, r);
321 + }
322 + LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
323 +
324 + LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
325 + {
326 +        ROLLir(i, r);
327 + }
328 + LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
329 +
330 + LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
331 + {
332 +        ROLLrr(r, d);
333 + }
334 + LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
335 +
336 + LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
337 + {
338 +        ROLWrr(r, d);
339 + }
340 + LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
341 +
342 + LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
343 + {
344 +        ROLBrr(r, d);
345 + }
346 + LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
347 +
348 + LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
349 + {
350 +        SHLLrr(r, d);
351 + }
352 + LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
353 +
354 + LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
355 + {
356 +        SHLWrr(r, d);
357 + }
358 + LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
359 +
360 + LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
361 + {
362 +        SHLBrr(r, d);
363 + }
364 + LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
365 +
366 + LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
367 + {
368 +        RORBir(i, r);
369 + }
370 + LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
371 +
372 + LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
373 + {
374 +        RORWir(i, r);
375 + }
376 + LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
377 +
378 + LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
379 + {
380 +        ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
381 + }
382 + LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
383 +
384 + LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
385 + {
386 +        RORLir(i, r);
387 + }
388 + LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
389 +
390 + LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
391 + {
392 +        RORLrr(r, d);
393 + }
394 + LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
395 +
396 + LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
397 + {
398 +        RORWrr(r, d);
399 + }
400 + LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
401 +
402 + LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
403 + {
404 +        RORBrr(r, d);
405 + }
406 + LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
407 +
408 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
409 + {
410 +        SHRLrr(r, d);
411 + }
412 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
413 +
414 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
415 + {
416 +        SHRWrr(r, d);
417 + }
418 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
419 +
420 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
421 + {
422 +        SHRBrr(r, d);
423 + }
424 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
425 +
426 + LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
427 + {
428 +        SARLrr(r, d);
429 + }
430 + LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
431 +
432 + LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
433 + {
434 +        SARWrr(r, d);
435 + }
436 + LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
437 +
438 + LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
439 + {
440 +        SARBrr(r, d);
441 + }
442 + LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
443 +
444 + LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
445 + {
446 +        SHLLir(i, r);
447 + }
448 + LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
449 +
450 + LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
451 + {
452 +        SHLWir(i, r);
453 + }
454 + LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
455 +
456 + LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
457 + {
458 +        SHLBir(i, r);
459 + }
460 + LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
461 +
462 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
463 + {
464 +        SHRLir(i, r);
465 + }
466 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
467 +
468 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
469 + {
470 +        SHRWir(i, r);
471 + }
472 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
473 +
474 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
475 + {
476 +        SHRBir(i, r);
477 + }
478 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
479 +
480 + LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
481 + {
482 +        SARLir(i, r);
483 + }
484 + LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
485 +
486 + LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
487 + {
488 +        SARWir(i, r);
489 + }
490 + LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
491 +
492 + LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
493 + {
494 +        SARBir(i, r);
495 + }
496 + LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
497 +
498 + LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
499 + {
500 +        SAHF();
501 + }
502 + LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
503 +
504 + LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
505 + {
506 +        CPUID();
507 + }
508 + LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
509 +
510 + LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
511 + {
512 +        LAHF();
513 + }
514 + LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
515 +
516 + LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
517 + {
518 +        SETCCir(cc, d);
519 + }
520 + LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
521 +
522 + LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
523 + {
524 +        SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
525 + }
526 + LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
527 +
528 + LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
529 + {
530 +        if (have_cmov)
531 +                CMOVLrr(cc, s, d);
532 +        else { /* replacement using branch and mov */
533 + #if defined(__x86_64__)
534 +                write_log("x86-64 implementations are bound to have CMOV!\n");
535 +                abort();
536 + #endif
537 +                JCCSii(cc^1, 2);
538 +                MOVLrr(s, d);
539 +        }
540 + }
541 + LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
542 +
543 + LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
544 + {
545 +        BSFLrr(s, d);
546 + }
547 + LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
548 +
549 + LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
550 + {
551 +        MOVSLQrr(s, d);
552 + }
553 + LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
554 +
555 + LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
556 + {
557 +        MOVSWLrr(s, d);
558 + }
559 + LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
560 +
561 + LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
562 + {
563 +        MOVSBLrr(s, d);
564 + }
565 + LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
566 +
567 + LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
568 + {
569 +        MOVZWLrr(s, d);
570 + }
571 + LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
572 +
573 + LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
574 + {
575 +        MOVZBLrr(s, d);
576 + }
577 + LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
578 +
579 + LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
580 + {
581 +        IMULLrr(s, d);
582 + }
583 + LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
584 +
585 + LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
586 + {
587 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
588 +        write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
589 +        abort();
590 +        }
591 +        IMULLr(s);
592 + }
593 + LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
594 +
595 + LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
596 + {
597 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
598 +        write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
599 +        abort();
600 +        }
601 +        MULLr(s);
602 + }
603 + LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
604 +
605 + LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
606 + {
607 +        abort(); /* %^$&%^$%#^ x86! */
608 + }
609 + LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
610 +
611 + LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
612 + {
613 +        MOVBrr(s, d);
614 + }
615 + LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
616 +
617 + LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
618 + {
619 +        MOVWrr(s, d);
620 + }
621 + LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
622 +
623 + LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
624 + {
625 +        ADDR32 MOVLmr(0, baser, index, factor, d);
626 + }
627 + LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
628 +
629 + LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
630 + {
631 +        ADDR32 MOVWmr(0, baser, index, factor, d);
632 + }
633 + LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
634 +
635 + LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
636 + {
637 +        ADDR32 MOVBmr(0, baser, index, factor, d);
638 + }
639 + LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
640 +
641 + LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
642 + {
643 +        ADDR32 MOVLrm(s, 0, baser, index, factor);
644 + }
645 + LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
646 +
647 + LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
648 + {
649 +        ADDR32 MOVWrm(s, 0, baser, index, factor);
650 + }
651 + LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
652 +
653 + LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
654 + {
655 +        ADDR32 MOVBrm(s, 0, baser, index, factor);
656 + }
657 + LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
658 +
659 + LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
660 + {
661 +        ADDR32 MOVLrm(s, base, baser, index, factor);
662 + }
663 + LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
664 +
665 + LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
666 + {
667 +        ADDR32 MOVWrm(s, base, baser, index, factor);
668 + }
669 + LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
670 +
671 + LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
672 + {
673 +        ADDR32 MOVBrm(s, base, baser, index, factor);
674 + }
675 + LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
676 +
677 + LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
678 + {
679 +        ADDR32 MOVLmr(base, baser, index, factor, d);
680 + }
681 + LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
682 +
683 + LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
684 + {
685 +        ADDR32 MOVWmr(base, baser, index, factor, d);
686 + }
687 + LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
688 +
689 + LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
690 + {
691 +        ADDR32 MOVBmr(base, baser, index, factor, d);
692 + }
693 + LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
694 +
695 + LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
696 + {
697 +        ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
698 + }
699 + LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
700 +
701 + LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
702 + {
703 +        if (have_cmov)
704 +                ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
705 +        else { /* replacement using branch and mov */
706 + #if defined(__x86_64__)
707 +                write_log("x86-64 implementations are bound to have CMOV!\n");
708 +                abort();
709 + #endif
710 +                JCCSii(cond^1, 7);
711 +                ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
712 +        }
713 + }
714 + LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
715 +
716 + LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
717 + {
718 +        if (have_cmov)
719 +                CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
720 +        else { /* replacement using branch and mov */
721 + #if defined(__x86_64__)
722 +                write_log("x86-64 implementations are bound to have CMOV!\n");
723 +                abort();
724 + #endif
725 +                JCCSii(cond^1, 6);
726 +                MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
727 +        }
728 + }
729 + LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
730 +
731 + LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
732 + {
733 +        ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
734 + }
735 + LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
736 +
737 + LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
738 + {
739 +        ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
740 + }
741 + LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
742 +
743 + LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
744 + {
745 +        ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
746 + }
747 + LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
748 +
749 + LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
750 + {
751 +        ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
752 + }
753 + LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
754 +
755 + LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
756 + {
757 +        ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
758 + }
759 + LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
760 +
761 + LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
762 + {
763 +        ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
764 + }
765 + LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
766 +
767 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
768 + {
769 +        ADDR32 MOVLim(i, offset, d, X86_NOREG, 1);
770 + }
771 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
772 +
773 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
774 + {
775 +        ADDR32 MOVWim(i, offset, d, X86_NOREG, 1);
776 + }
777 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
778 +
779 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
780 + {
781 +        ADDR32 MOVBim(i, offset, d, X86_NOREG, 1);
782 + }
783 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
784 +
785 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
786 + {
787 +        ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
788 + }
789 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
790 +
791 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
792 + {
793 +        ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
794 + }
795 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
796 +
797 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
798 + {
799 +        ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
800 + }
801 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
802 +
803 + LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
804 + {
805 +        LEALmr(offset, s, X86_NOREG, 1, d);
806 + }
807 + LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
808 +
809 + LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
810 + {
811 +        LEALmr(offset, s, index, factor, d);
812 + }
813 + LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
814 +
815 + LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
816 + {
817 +        LEALmr(0, s, index, factor, d);
818 + }
819 + LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
820 +
821 + LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
822 + {
823 +        LEALmr(0, X86_NOREG, index, factor, d);
824 + }
825 + LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
826 +
827 + LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
828 + {
829 +        ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
830 + }
831 + LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
832 +
833 + LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
834 + {
835 +        ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
836 + }
837 + LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
838 +
839 + LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
840 + {
841 +        ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
842 + }
843 + LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
844 +
845 + LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
846 + {
847 +        BSWAPLr(r);
848 + }
849 + LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
850 +
851 + LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
852 + {
853 +        ROLWir(8, r);
854 + }
855 + LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
856 +
857 + LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
858 + {
859 +        MOVLrr(s, d);
860 + }
861 + LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
862 +
863 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
864 + {
865 +        MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
866 + }
867 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
868 +
869 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
870 + {
871 +        MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
872 + }
873 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
874 +
875 + LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
876 + {
877 +        MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
878 + }
879 + LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
880 +
881 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
882 + {
883 +        MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
884 + }
885 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
886 +
887 + LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
888 + {
889 +        MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
890 + }
891 + LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
892 +
893 + LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
894 + {
895 +        MOVLir(s, d);
896 + }
897 + LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
898 +
899 + LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
900 + {
901 +        MOVWir(s, d);
902 + }
903 + LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
904 +
905 + LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
906 + {
907 +        MOVBir(s, d);
908 + }
909 + LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
910 +
911 + LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
912 + {
913 +        ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
914 + }
915 + LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
916 +
917 + LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
918 + {
919 +        ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
920 + }
921 + LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
922 +
923 + LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
924 + {
925 +        ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
926 + }
927 + LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
928 +
929 + LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
930 + {
931 +        ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
932 + }
933 + LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
934 +
935 + LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
936 + {
937 +        TESTLir(i, d);
938 + }
939 + LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
940 +
941 + LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
942 + {
943 +        TESTLrr(s, d);
944 + }
945 + LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
946 +
947 + LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
948 + {
949 +        TESTWrr(s, d);
950 + }
951 + LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
952 +
953 + LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
954 + {
955 +        TESTBrr(s, d);
956 + }
957 + LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
958 +
959 + LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
960 + {
961 +        XORLir(i, d);
962 + }
963 + LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
964 +
965 + LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
966 + {
967 +        ANDLir(i, d);
968 + }
969 + LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
970 +
971 + LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
972 + {
973 +        ANDWir(i, d);
974 + }
975 + LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
976 +
977 + LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
978 + {
979 +        ANDLrr(s, d);
980 + }
981 + LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
982 +
983 + LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
984 + {
985 +        ANDWrr(s, d);
986 + }
987 + LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
988 +
989 + LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
990 + {
991 +        ANDBrr(s, d);
992 + }
993 + LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
994 +
995 + LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
996 + {
997 +        ORLir(i, d);
998 + }
999 + LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1000 +
1001 + LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1002 + {
1003 +        ORLrr(s, d);
1004 + }
1005 + LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1006 +
1007 + LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1008 + {
1009 +        ORWrr(s, d);
1010 + }
1011 + LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1012 +
1013 + LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1014 + {
1015 +        ORBrr(s, d);
1016 + }
1017 + LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1018 +
1019 + LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1020 + {
1021 +        ADCLrr(s, d);
1022 + }
1023 + LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1024 +
1025 + LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1026 + {
1027 +        ADCWrr(s, d);
1028 + }
1029 + LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1030 +
1031 + LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1032 + {
1033 +        ADCBrr(s, d);
1034 + }
1035 + LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1036 +
1037 + LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1038 + {
1039 +        ADDLrr(s, d);
1040 + }
1041 + LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1042 +
1043 + LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1044 + {
1045 +        ADDWrr(s, d);
1046 + }
1047 + LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1048 +
1049 + LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1050 + {
1051 +        ADDBrr(s, d);
1052 + }
1053 + LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1054 +
1055 + LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1056 + {
1057 +        SUBLir(i, d);
1058 + }
1059 + LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1060 +
1061 + LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1062 + {
1063 +        SUBBir(i, d);
1064 + }
1065 + LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1066 +
1067 + LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1068 + {
1069 +        ADDLir(i, d);
1070 + }
1071 + LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1072 +
1073 + LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1074 + {
1075 +        ADDWir(i, d);
1076 + }
1077 + LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1078 +
1079 + LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1080 + {
1081 +        ADDBir(i, d);
1082 + }
1083 + LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1084 +
1085 + LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1086 + {
1087 +        SBBLrr(s, d);
1088 + }
1089 + LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1090 +
1091 + LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1092 + {
1093 +        SBBWrr(s, d);
1094 + }
1095 + LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1096 +
1097 + LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1098 + {
1099 +        SBBBrr(s, d);
1100 + }
1101 + LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1102 +
1103 + LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1104 + {
1105 +        SUBLrr(s, d);
1106 + }
1107 + LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1108 +
1109 + LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1110 + {
1111 +        SUBWrr(s, d);
1112 + }
1113 + LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1114 +
1115 + LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1116 + {
1117 +        SUBBrr(s, d);
1118 + }
1119 + LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1120 +
1121 + LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1122 + {
1123 +        CMPLrr(s, d);
1124 + }
1125 + LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1126 +
1127 + LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1128 + {
1129 +        CMPLir(i, r);
1130 + }
1131 + LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1132 +
1133 + LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1134 + {
1135 +        CMPWrr(s, d);
1136 + }
1137 + LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1138 +
1139 + LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1140 + {
1141 +        CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1142 + }
1143 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1144 +
1145 + LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1146 + {
1147 +        CMPBir(i, d);
1148 + }
1149 + LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1150 +
1151 + LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1152 + {
1153 +        CMPBrr(s, d);
1154 + }
1155 + LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1156 +
1157 + LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1158 + {
1159 +        ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d);
1160 + }
1161 + LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1162 +
1163 + LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1164 + {
1165 +        XORLrr(s, d);
1166 + }
1167 + LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1168 +
1169 + LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1170 + {
1171 +        XORWrr(s, d);
1172 + }
1173 + LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1174 +
1175 + LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1176 + {
1177 +        XORBrr(s, d);
1178 + }
1179 + LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1180 +
1181 + LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1182 + {
1183 +        SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1184 + }
1185 + LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1186 +
1187 + LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1188 + {
1189 +        CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1190 + }
1191 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1192 +
1193 + LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1194 + {
1195 +        XCHGLrr(r2, r1);
1196 + }
1197 + LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1198 +
1199 + LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1200 + {
1201 +        XCHGBrr(r2, r1);
1202 + }
1203 + LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1204 +
1205 + LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1206 + {
1207 +        PUSHF();
1208 + }
1209 + LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1210 +
1211 + LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1212 + {
1213 +        POPF();
1214 + }
1215 + LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1216 +
1217 + /* Generate floating-point instructions */
1218 + static inline void x86_fadd_m(MEMR s)
1219 + {
1220 +        FADDLm(s,X86_NOREG,X86_NOREG,1);
1221 + }
1222 +
1223 + #else
1224 +
1225   const bool optimize_accum               = true;
1226   const bool optimize_imm8                = true;
1227   const bool optimize_shift_once  = true;
# Line 157 | Line 1257 | LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1257   }
1258   LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1259  
1260 + LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1261 + {
1262 +        emit_byte(0x8f);
1263 +        emit_byte(0x05);
1264 +        emit_long(d);
1265 + }
1266 + LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1267 +
1268   LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1269   {
1270          emit_byte(0x0f);
# Line 1071 | Line 2179 | LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d
2179  
2180   LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2181   {
2182 +        Dif(!isbyte(offset)) abort();
2183      emit_byte(0x8b);
2184      emit_byte(0x40+8*d+s);
2185      emit_byte(offset);
# Line 1079 | Line 2188 | LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d,
2188  
2189   LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2190   {
2191 +        Dif(!isbyte(offset)) abort();
2192      emit_byte(0x66);
2193      emit_byte(0x8b);
2194      emit_byte(0x40+8*d+s);
# Line 1088 | Line 2198 | LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d,
2198  
2199   LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2200   {
2201 +        Dif(!isbyte(offset)) abort();
2202      emit_byte(0x8a);
2203      emit_byte(0x40+8*d+s);
2204      emit_byte(offset);
# Line 1121 | Line 2232 | LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d
2232  
2233   LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2234   {
2235 +        Dif(!isbyte(offset)) abort();
2236      emit_byte(0xc7);
2237      emit_byte(0x40+d);
2238      emit_byte(offset);
# Line 1130 | Line 2242 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d
2242  
2243   LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2244   {
2245 +        Dif(!isbyte(offset)) abort();
2246      emit_byte(0x66);
2247      emit_byte(0xc7);
2248      emit_byte(0x40+d);
# Line 1140 | Line 2253 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d
2253  
2254   LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2255   {
2256 +        Dif(!isbyte(offset)) abort();
2257      emit_byte(0xc6);
2258      emit_byte(0x40+d);
2259      emit_byte(offset);
# Line 1149 | Line 2263 | LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d
2263  
2264   LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2265   {
2266 +        Dif(!isbyte(offset)) abort();
2267      emit_byte(0x89);
2268      emit_byte(0x40+8*s+d);
2269      emit_byte(offset);
# Line 1157 | Line 2272 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d
2272  
2273   LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2274   {
2275 +        Dif(!isbyte(offset)) abort();
2276      emit_byte(0x66);
2277      emit_byte(0x89);
2278      emit_byte(0x40+8*s+d);
# Line 1166 | Line 2282 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d
2282  
2283   LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2284   {
2285 +        Dif(!isbyte(offset)) abort();
2286      emit_byte(0x88);
2287      emit_byte(0x40+8*s+d);
2288      emit_byte(offset);
# Line 1326 | Line 2443 | LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d,
2443   LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2444   {
2445      emit_byte(0x88);
2446 <    emit_byte(0x05+8*s);
2446 >    emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
2447      emit_long(d);
2448   }
2449   LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
# Line 1440 | Line 2557 | LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d
2557   }
2558   LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2559  
2560 + LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2561 + {
2562 +    emit_byte(0x81);
2563 +    emit_byte(0xf0+d);
2564 +    emit_long(i);
2565 + }
2566 + LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2567 +
2568   LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2569   {
2570          if (optimize_imm8 && isbyte(i)) {
# Line 1855 | Line 2980 | LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r
2980   }
2981   LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2982  
2983 < /*************************************************************************
1859 < * FIXME: string-related instructions                                    *
1860 < *************************************************************************/
1861 <
1862 < LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1863 < {
1864 <        emit_byte(0xfc);
1865 < }
1866 < LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1867 <
1868 < LOWFUNC(WRITE,NONE,0,raw_std,(void))
1869 < {
1870 <        emit_byte(0xfd);
1871 < }
1872 < LENDFUNC(WRITE,NONE,0,raw_std,(void))
1873 <
1874 < LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1875 < {
1876 <        emit_byte(0xa4);
1877 < }
1878 < LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1879 <
1880 < LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1881 < {
1882 <        emit_byte(0xa5);
1883 < }
1884 < LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1885 <
1886 < LOWFUNC(NONE,RMW,0,raw_rep,(void))
1887 < {
1888 <        emit_byte(0xf3);
1889 < }
1890 < LENDFUNC(NONE,RMW,0,raw_rep,(void))
1891 <
1892 < LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1893 < {
1894 <        raw_rep();
1895 <        raw_movs_b();
1896 < }
1897 < LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1898 <
1899 < LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
2983 > LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
2984   {
2985 <        raw_rep();
2986 <        raw_movs_l();
2985 >  emit_byte(0x86);
2986 >  emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */
2987   }
2988 < LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
2988 > LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2989  
2990   /*************************************************************************
2991   * FIXME: mem access modes probably wrong                                *
# Line 1919 | Line 3003 | LOWFUNC(WRITE,READ,0,raw_popfl,(void))
3003   }
3004   LENDFUNC(WRITE,READ,0,raw_popfl,(void))
3005  
3006 + /* Generate floating-point instructions */
3007 + static inline void x86_fadd_m(MEMR s)
3008 + {
3009 +        emit_byte(0xdc);
3010 +        emit_byte(0x05);
3011 +        emit_long(s);
3012 + }
3013 +
3014 + #endif
3015 +
3016   /*************************************************************************
3017   * Unoptimizable stuff --- jump                                          *
3018   *************************************************************************/
3019  
3020   static __inline__ void raw_call_r(R4 r)
3021   {
3022 + #if USE_NEW_RTASM
3023 +    CALLsr(r);
3024 + #else
3025      emit_byte(0xff);
3026      emit_byte(0xd0+r);
3027 + #endif
3028   }
3029  
3030   static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3031   {
3032 + #if USE_NEW_RTASM
3033 +    CALLsm(base, X86_NOREG, r, m);
3034 + #else
3035      int mu;
3036      switch(m) {
3037       case 1: mu=0; break;
# Line 1943 | Line 3044 | static __inline__ void raw_call_m_indexe
3044      emit_byte(0x14);
3045      emit_byte(0x05+8*r+0x40*mu);
3046      emit_long(base);
3047 + #endif
3048   }
3049  
3050   static __inline__ void raw_jmp_r(R4 r)
3051   {
3052 + #if USE_NEW_RTASM
3053 +    JMPsr(r);
3054 + #else
3055      emit_byte(0xff);
3056      emit_byte(0xe0+r);
3057 + #endif
3058   }
3059  
3060   static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3061   {
3062 + #if USE_NEW_RTASM
3063 +    JMPsm(base, X86_NOREG, r, m);
3064 + #else
3065      int mu;
3066      switch(m) {
3067       case 1: mu=0; break;
# Line 1965 | Line 3074 | static __inline__ void raw_jmp_m_indexed
3074      emit_byte(0x24);
3075      emit_byte(0x05+8*r+0x40*mu);
3076      emit_long(base);
3077 + #endif
3078   }
3079  
3080   static __inline__ void raw_jmp_m(uae_u32 base)
# Line 1977 | Line 3087 | static __inline__ void raw_jmp_m(uae_u32
3087  
3088   static __inline__ void raw_call(uae_u32 t)
3089   {
3090 + #if USE_NEW_RTASM
3091 +    CALLm(t);
3092 + #else
3093      emit_byte(0xe8);
3094      emit_long(t-(uae_u32)target-4);
3095 + #endif
3096   }
3097  
3098   static __inline__ void raw_jmp(uae_u32 t)
3099   {
3100 + #if USE_NEW_RTASM
3101 +    JMPm(t);
3102 + #else
3103      emit_byte(0xe9);
3104      emit_long(t-(uae_u32)target-4);
3105 + #endif
3106   }
3107  
3108   static __inline__ void raw_jl(uae_u32 t)
3109   {
3110      emit_byte(0x0f);
3111      emit_byte(0x8c);
3112 <    emit_long(t-(uae_u32)target-4);
3112 >    emit_long(t-(uintptr)target-4);
3113   }
3114  
3115   static __inline__ void raw_jz(uae_u32 t)
3116   {
3117      emit_byte(0x0f);
3118      emit_byte(0x84);
3119 <    emit_long(t-(uae_u32)target-4);
3119 >    emit_long(t-(uintptr)target-4);
3120   }
3121  
3122   static __inline__ void raw_jnz(uae_u32 t)
3123   {
3124      emit_byte(0x0f);
3125      emit_byte(0x85);
3126 <    emit_long(t-(uae_u32)target-4);
3126 >    emit_long(t-(uintptr)target-4);
3127   }
3128  
3129   static __inline__ void raw_jnz_l_oponly(void)
# Line 2055 | Line 3173 | static __inline__ void raw_nop(void)
3173      emit_byte(0x90);
3174   }
3175  
3176 + static __inline__ void raw_emit_nop_filler(int nbytes)
3177 + {
3178 +  /* Source: GNU Binutils 2.12.90.0.15 */
3179 +  /* Various efficient no-op patterns for aligning code labels.
3180 +     Note: Don't try to assemble the instructions in the comments.
3181 +     0L and 0w are not legal.  */
3182 +  static const uae_u8 f32_1[] =
3183 +    {0x90};                                                                     /* nop                                  */
3184 +  static const uae_u8 f32_2[] =
3185 +    {0x89,0xf6};                                                        /* movl %esi,%esi               */
3186 +  static const uae_u8 f32_3[] =
3187 +    {0x8d,0x76,0x00};                                           /* leal 0(%esi),%esi    */
3188 +  static const uae_u8 f32_4[] =
3189 +    {0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3190 +  static const uae_u8 f32_5[] =
3191 +    {0x90,                                                                      /* nop                                  */
3192 +     0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3193 +  static const uae_u8 f32_6[] =
3194 +    {0x8d,0xb6,0x00,0x00,0x00,0x00};            /* leal 0L(%esi),%esi   */
3195 +  static const uae_u8 f32_7[] =
3196 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3197 +  static const uae_u8 f32_8[] =
3198 +    {0x90,                                                                      /* nop                                  */
3199 +     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3200 +  static const uae_u8 f32_9[] =
3201 +    {0x89,0xf6,                                                         /* movl %esi,%esi               */
3202 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3203 +  static const uae_u8 f32_10[] =
3204 +    {0x8d,0x76,0x00,                                            /* leal 0(%esi),%esi    */
3205 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3206 +  static const uae_u8 f32_11[] =
3207 +    {0x8d,0x74,0x26,0x00,                                       /* leal 0(%esi,1),%esi  */
3208 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3209 +  static const uae_u8 f32_12[] =
3210 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3211 +     0x8d,0xbf,0x00,0x00,0x00,0x00};            /* leal 0L(%edi),%edi   */
3212 +  static const uae_u8 f32_13[] =
3213 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3214 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3215 +  static const uae_u8 f32_14[] =
3216 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00,        /* leal 0L(%esi,1),%esi */
3217 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3218 +  static const uae_u8 f32_15[] =
3219 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3220 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3221 +  static const uae_u8 f32_16[] =
3222 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3223 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3224 +  static const uae_u8 *const f32_patt[] = {
3225 +    f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3226 +    f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3227 +  };
3228 +  static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3229 +
3230 + #if defined(__x86_64__)
3231 +  /* The recommended way to pad 64bit code is to use NOPs preceded by
3232 +     maximally four 0x66 prefixes.  Balance the size of nops.  */
3233 +  if (nbytes == 0)
3234 +          return;
3235 +
3236 +  int i;
3237 +  int nnops = (nbytes + 3) / 4;
3238 +  int len = nbytes / nnops;
3239 +  int remains = nbytes - nnops * len;
3240 +
3241 +  for (i = 0; i < remains; i++) {
3242 +          emit_block(prefixes, len);
3243 +          raw_nop();
3244 +  }
3245 +  for (; i < nnops; i++) {
3246 +          emit_block(prefixes, len - 1);
3247 +          raw_nop();
3248 +  }
3249 + #else
3250 +  int nloops = nbytes / 16;
3251 +  while (nloops-- > 0)
3252 +        emit_block(f32_16, sizeof(f32_16));
3253 +
3254 +  nbytes %= 16;
3255 +  if (nbytes)
3256 +        emit_block(f32_patt[nbytes - 1], nbytes);
3257 + #endif
3258 + }
3259 +
3260  
3261   /*************************************************************************
3262   * Flag handling, to and fro UAE flag register                           *
3263   *************************************************************************/
3264  
3265 < #ifdef SAHF_SETO_PROFITABLE
2064 <
2065 < #define FLAG_NREG1 0  /* Set to -1 if any register will do */
2066 <
2067 < static __inline__ void raw_flags_to_reg(int r)
3265 > static __inline__ void raw_flags_evicted(int r)
3266   {
2069  raw_lahf(0);  /* Most flags in AH */
2070  //raw_setcc(r,0); /* V flag in AL */
2071  raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2072  
2073 #if 1   /* Let's avoid those nasty partial register stalls */
2074  //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2075  raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
3267    //live.state[FLAGTMP].status=CLEAN;
3268    live.state[FLAGTMP].status=INMEM;
3269    live.state[FLAGTMP].realreg=-1;
# Line 2082 | Line 3273 | static __inline__ void raw_flags_to_reg(
3273        abort();
3274    }
3275    live.nat[r].nholds=0;
3276 + }
3277 +
3278 + #define FLAG_NREG1_FLAGREG 0  /* Set to -1 if any register will do */
3279 + static __inline__ void raw_flags_to_reg_FLAGREG(int r)
3280 + {
3281 +  raw_lahf(0);  /* Most flags in AH */
3282 +  //raw_setcc(r,0); /* V flag in AL */
3283 +  raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3284 +  
3285 + #if 1   /* Let's avoid those nasty partial register stalls */
3286 +  //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3287 +  raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
3288 +  raw_flags_evicted(r);
3289   #endif
3290   }
3291  
3292 < #define FLAG_NREG2 0  /* Set to -1 if any register will do */
3293 < static __inline__ void raw_reg_to_flags(int r)
3292 > #define FLAG_NREG2_FLAGREG 0  /* Set to -1 if any register will do */
3293 > static __inline__ void raw_reg_to_flags_FLAGREG(int r)
3294   {
3295    raw_cmp_b_ri(r,-127); /* set V */
3296    raw_sahf(0);
3297   }
3298  
3299 < #else
3299 > #define FLAG_NREG3_FLAGREG 0  /* Set to -1 if any register will do */
3300 > static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp)
3301 > {
3302 >    raw_mov_l_rr(tmp,s);
3303 >    raw_lahf(s); /* flags into ah */
3304 >    raw_and_l_ri(s,0xffffbfff);
3305 >    raw_and_l_ri(tmp,0x00004000);
3306 >    raw_xor_l_ri(tmp,0x00004000);
3307 >    raw_or_l(s,tmp);
3308 >    raw_sahf(s);
3309 > }
3310 >
3311 > static __inline__ void raw_flags_init_FLAGREG(void) { }
3312  
3313 < #define FLAG_NREG1 -1  /* Set to -1 if any register will do */
3314 < static __inline__ void raw_flags_to_reg(int r)
3313 > #define FLAG_NREG1_FLAGSTK -1  /* Set to -1 if any register will do */
3314 > static __inline__ void raw_flags_to_reg_FLAGSTK(int r)
3315   {
3316          raw_pushfl();
3317          raw_pop_l_r(r);
3318 <        raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
3319 < //      live.state[FLAGTMP].status=CLEAN;
2104 <        live.state[FLAGTMP].status=INMEM;
2105 <        live.state[FLAGTMP].realreg=-1;
2106 <        /* We just "evicted" FLAGTMP. */
2107 <        if (live.nat[r].nholds!=1) {
2108 <          /* Huh? */
2109 <          abort();
2110 <        }
2111 <        live.nat[r].nholds=0;
3318 >        raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3319 >        raw_flags_evicted(r);
3320   }
3321  
3322 < #define FLAG_NREG2 -1  /* Set to -1 if any register will do */
3323 < static __inline__ void raw_reg_to_flags(int r)
3322 > #define FLAG_NREG2_FLAGSTK -1  /* Set to -1 if any register will do */
3323 > static __inline__ void raw_reg_to_flags_FLAGSTK(int r)
3324   {
3325          raw_push_l_r(r);
3326          raw_popfl();
3327   }
3328  
3329 + #define FLAG_NREG3_FLAGSTK -1  /* Set to -1 if any register will do */
3330 + static __inline__ void raw_flags_set_zero_FLAGSTK(int s, int tmp)
3331 + {
3332 +    raw_mov_l_rr(tmp,s);
3333 +    raw_pushfl();
3334 +    raw_pop_l_r(s);
3335 +    raw_and_l_ri(s,0xffffffbf);
3336 +    raw_and_l_ri(tmp,0x00000040);
3337 +    raw_xor_l_ri(tmp,0x00000040);
3338 +    raw_or_l(s,tmp);
3339 +    raw_push_l_r(s);
3340 +    raw_popfl();
3341 + }
3342 +
3343 + static __inline__ void raw_flags_init_FLAGSTK(void) { }
3344 +
3345 + #if defined(__x86_64__)
3346 + /* Try to use the LAHF/SETO method on x86_64 since it is faster.
3347 +   This can't be the default because some older CPUs don't support
3348 +   LAHF/SAHF in long mode.  */
3349 + static int FLAG_NREG1_FLAGGEN = 0;
3350 + static __inline__ void raw_flags_to_reg_FLAGGEN(int r)
3351 + {
3352 +        if (have_lahf_lm) {
3353 +                // NOTE: the interpreter uses the normal EFLAGS layout
3354 +                //   pushf/popf CF(0) ZF( 6) SF( 7) OF(11)
3355 +                //   sahf/lahf  CF(8) ZF(14) SF(15) OF( 0)
3356 +                assert(r == 0);
3357 +                raw_setcc(r,0);                                 /* V flag in AL */
3358 +                raw_lea_l_r_scaled(0,0,8);              /* move it to its EFLAGS location */
3359 +                raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0);
3360 +                raw_lahf(0);                                    /* most flags in AH */
3361 +                raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX);
3362 +                raw_flags_evicted(r);
3363 +        }
3364 +        else
3365 +                raw_flags_to_reg_FLAGSTK(r);
3366 + }
3367 +
3368 + static int FLAG_NREG2_FLAGGEN = 0;
3369 + static __inline__ void raw_reg_to_flags_FLAGGEN(int r)
3370 + {
3371 +        if (have_lahf_lm) {
3372 +                raw_xchg_b_rr(0,AH_INDEX);
3373 +                raw_cmp_b_ri(r,-120); /* set V */
3374 +                raw_sahf(0);
3375 +        }
3376 +        else
3377 +                raw_reg_to_flags_FLAGSTK(r);
3378 + }
3379 +
3380 + static int FLAG_NREG3_FLAGGEN = 0;
3381 + static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp)
3382 + {
3383 +        if (have_lahf_lm)
3384 +                raw_flags_set_zero_FLAGREG(s, tmp);
3385 +        else
3386 +                raw_flags_set_zero_FLAGSTK(s, tmp);
3387 + }
3388 +
3389 + static __inline__ void raw_flags_init_FLAGGEN(void)
3390 + {
3391 +        if (have_lahf_lm) {
3392 +                FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG;
3393 +                FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG;
3394 +                FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG;
3395 +        }
3396 +        else {
3397 +                FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK;
3398 +                FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK;
3399 +                FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK;
3400 +        }
3401 + }
3402   #endif
3403  
3404 + #ifdef SAHF_SETO_PROFITABLE
3405 + #define FLAG_SUFFIX FLAGREG
3406 + #elif defined __x86_64__
3407 + #define FLAG_SUFFIX FLAGGEN
3408 + #else
3409 + #define FLAG_SUFFIX FLAGSTK
3410 + #endif
3411 +
3412 + #define FLAG_GLUE_2(x, y)               x ## _ ## y
3413 + #define FLAG_GLUE_1(x, y)               FLAG_GLUE_2(x, y)
3414 + #define FLAG_GLUE(x)                    FLAG_GLUE_1(x, FLAG_SUFFIX)
3415 +
3416 + #define raw_flags_init                  FLAG_GLUE(raw_flags_init)
3417 + #define FLAG_NREG1                              FLAG_GLUE(FLAG_NREG1)
3418 + #define raw_flags_to_reg                FLAG_GLUE(raw_flags_to_reg)
3419 + #define FLAG_NREG2                              FLAG_GLUE(FLAG_NREG2)
3420 + #define raw_reg_to_flags                FLAG_GLUE(raw_reg_to_flags)
3421 + #define FLAG_NREG3                              FLAG_GLUE(FLAG_NREG3)
3422 + #define raw_flags_set_zero              FLAG_GLUE(raw_flags_set_zero)
3423 +
3424   /* Apparently, there are enough instructions between flag store and
3425     flag reload to avoid the partial memory stall */
3426   static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3427   {
3428   #if 1
3429 <    raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3429 >    raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3430   #else
3431 <    raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3432 <    raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
3431 >    raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3432 >    raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3433   #endif
3434   }
3435  
# Line 2136 | Line 3437 | static __inline__ void raw_load_flagreg(
3437   static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3438   {
3439      if (live.nat[target].canbyte)
3440 <        raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3440 >        raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3441      else if (live.nat[target].canword)
3442 <        raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
3442 >        raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3443      else
3444 <        raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3444 >        raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3445   }
3446  
3447 + static __inline__ void raw_dec_sp(int off)
3448 + {
3449 +    if (off) raw_sub_l_ri(ESP_INDEX,off);
3450 + }
3451  
3452   static __inline__ void raw_inc_sp(int off)
3453   {
3454 <    raw_add_l_ri(ESP_INDEX,off);
3454 >    if (off) raw_add_l_ri(ESP_INDEX,off);
3455   }
3456  
3457   /*************************************************************************
# Line 2305 | Line 3610 | static void vec(int x, struct sigcontext
3610                  for (i=0;i<5;i++)
3611                      vecbuf[i]=target[i];
3612                  emit_byte(0xe9);
3613 <                emit_long((uae_u32)veccode-(uae_u32)target-4);
3613 >                emit_long((uintptr)veccode-(uintptr)target-4);
3614                  write_log("Create jump to %p\n",veccode);
3615              
3616                  write_log("Handled one access!\n");
# Line 2332 | Line 3637 | static void vec(int x, struct sigcontext
3637                  }
3638                  for (i=0;i<5;i++)
3639                      raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3640 <                raw_mov_l_mi((uae_u32)&in_handler,0);
3640 >                raw_mov_l_mi((uintptr)&in_handler,0);
3641                  emit_byte(0xe9);
3642 <                emit_long(sc.eip+len-(uae_u32)target-4);
3642 >                emit_long(sc.eip+len-(uintptr)target-4);
3643                  in_handler=1;
3644                  target=tmp;
3645              }
# Line 2429 | Line 3734 | enum {
3734    X86_PROCESSOR_K6,
3735    X86_PROCESSOR_ATHLON,
3736    X86_PROCESSOR_PENTIUM4,
3737 +  X86_PROCESSOR_X86_64,
3738    X86_PROCESSOR_max
3739   };
3740  
# Line 2439 | Line 3745 | static const char * x86_processor_string
3745    "PentiumPro",
3746    "K6",
3747    "Athlon",
3748 <  "Pentium4"
3748 >  "Pentium4",
3749 >  "x86-64"
3750   };
3751  
3752   static struct ptt {
# Line 2456 | Line 3763 | x86_alignments[X86_PROCESSOR_max] = {
3763    { 16, 15, 16,  7, 16 },
3764    { 32,  7, 32,  7, 32 },
3765    { 16,  7, 16,  7, 16 },
3766 <  {  0,  0,  0,  0,  0 }
3766 >  {  0,  0,  0,  0,  0 },
3767 >  { 16,  7, 16,  7, 16 }
3768   };
3769  
3770   static void
# Line 2490 | Line 3798 | x86_get_cpu_vendor(struct cpuinfo_x86 *c
3798   static void
3799   cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3800   {
3801 <  static uae_u8 cpuid_space[256];  
3801 >  const int CPUID_SPACE = 4096;
3802 >  uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3803 >  if (cpuid_space == VM_MAP_FAILED)
3804 >    abort();
3805 >  vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3806 >
3807 >  static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3808    uae_u8* tmp=get_target();
3809  
3810 +  s_op = op;
3811    set_target(cpuid_space);
3812    raw_push_l_r(0); /* eax */
3813    raw_push_l_r(1); /* ecx */
3814    raw_push_l_r(2); /* edx */
3815    raw_push_l_r(3); /* ebx */
3816 <  raw_mov_l_rm(0,(uae_u32)&op);
3816 >  raw_mov_l_rm(0,(uintptr)&s_op);
3817    raw_cpuid(0);
3818 <  if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
3819 <  if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
3820 <  if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
3821 <  if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
3818 >  raw_mov_l_mr((uintptr)&s_eax,0);
3819 >  raw_mov_l_mr((uintptr)&s_ebx,3);
3820 >  raw_mov_l_mr((uintptr)&s_ecx,1);
3821 >  raw_mov_l_mr((uintptr)&s_edx,2);
3822    raw_pop_l_r(3);
3823    raw_pop_l_r(2);
3824    raw_pop_l_r(1);
# Line 2512 | Line 3827 | cpuid(uae_u32 op, uae_u32 *eax, uae_u32
3827    set_target(tmp);
3828  
3829    ((cpuop_func*)cpuid_space)(0);
3830 +  if (eax != NULL) *eax = s_eax;
3831 +  if (ebx != NULL) *ebx = s_ebx;
3832 +  if (ecx != NULL) *ecx = s_ecx;
3833 +  if (edx != NULL) *edx = s_edx;
3834 +
3835 +  vm_release(cpuid_space, CPUID_SPACE);
3836   }
3837  
3838   static void
# Line 2520 | Line 3841 | raw_init_cpu(void)
3841    struct cpuinfo_x86 *c = &cpuinfo;
3842  
3843    /* Defaults */
3844 +  c->x86_processor = X86_PROCESSOR_max;
3845    c->x86_vendor = X86_VENDOR_UNKNOWN;
3846    c->cpuid_level = -1;                          /* CPUID not detected */
3847    c->x86_model = c->x86_mask = 0;       /* So far unknown... */
# Line 2541 | Line 3863 | raw_init_cpu(void)
3863          uae_u32 tfms, brand_id;
3864          cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3865          c->x86 = (tfms >> 8) & 15;
3866 +        if (c->x86 == 0xf)
3867 +                c->x86 += (tfms >> 20) & 0xff; /* extended family */
3868          c->x86_model = (tfms >> 4) & 15;
3869 +        if (c->x86_model == 0xf)
3870 +                c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3871          c->x86_brand_id = brand_id & 0xff;
2546        if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2547                 (c->x86 == 0xf)) {
2548          /* AMD Extended Family and Model Values */
2549          c->x86 += (tfms >> 20) & 0xff;
2550          c->x86_model += (tfms >> 12) & 0xf0;
2551        }
3872          c->x86_mask = tfms & 15;
3873    } else {
3874          /* Have CPUID level 0 only - unheard of */
3875          c->x86 = 4;
3876    }
3877  
3878 +  /* AMD-defined flags: level 0x80000001 */
3879 +  uae_u32 xlvl;
3880 +  cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3881 +  if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3882 +        if ( xlvl >= 0x80000001 ) {
3883 +          uae_u32 features, extra_features;
3884 +          cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3885 +          if (features & (1 << 29)) {
3886 +                /* Assume x86-64 if long mode is supported */
3887 +                c->x86_processor = X86_PROCESSOR_X86_64;
3888 +          }
3889 +          if (extra_features & (1 << 0))
3890 +                  have_lahf_lm = true;
3891 +        }
3892 +  }
3893 +          
3894    /* Canonicalize processor ID */
2559  c->x86_processor = X86_PROCESSOR_max;
3895    switch (c->x86) {
3896    case 3:
3897          c->x86_processor = X86_PROCESSOR_I386;
# Line 2577 | Line 3912 | raw_init_cpu(void)
3912            c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3913          break;
3914    case 15:
3915 <        if (c->x86_vendor == X86_VENDOR_INTEL) {
3916 <          /*  Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
3917 <          if (c->x86_brand_id >= 8)
3918 <                c->x86_processor = X86_PROCESSOR_PENTIUM4;
3919 <        }
3920 <        break;
3915 >          if (c->x86_processor == X86_PROCESSOR_max) {
3916 >                  switch (c->x86_vendor) {
3917 >                  case X86_VENDOR_INTEL:
3918 >                          c->x86_processor = X86_PROCESSOR_PENTIUM4;
3919 >                          break;
3920 >                  case X86_VENDOR_AMD:
3921 >                          /* Assume a 32-bit Athlon processor if not in long mode */
3922 >                          c->x86_processor = X86_PROCESSOR_ATHLON;
3923 >                          break;
3924 >                  }
3925 >          }
3926 >          break;
3927    }
3928    if (c->x86_processor == X86_PROCESSOR_max) {
3929 <        fprintf(stderr, "Error: unknown processor type\n");
3929 >        c->x86_processor = X86_PROCESSOR_I386;
3930 >        fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3931          fprintf(stderr, "  Family  : %d\n", c->x86);
3932          fprintf(stderr, "  Model   : %d\n", c->x86_model);
3933          fprintf(stderr, "  Mask    : %d\n", c->x86_mask);
3934 +        fprintf(stderr, "  Vendor  : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3935          if (c->x86_brand_id)
3936            fprintf(stderr, "  BrandID : %02x\n", c->x86_brand_id);
2594        abort();
3937    }
3938  
3939    /* Have CMOV support? */
3940 <  have_cmov = (c->x86_hwcap & (1 << 15)) && true;
3940 >  have_cmov = c->x86_hwcap & (1 << 15);
3941  
3942    /* Can the host CPU suffer from partial register stalls? */
3943    have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
# Line 2616 | Line 3958 | raw_init_cpu(void)
3958    write_log("Max CPUID level=%d Processor is %s [%s]\n",
3959                          c->cpuid_level, c->x86_vendor_id,
3960                          x86_processor_string_table[c->x86_processor]);
3961 +
3962 +  raw_flags_init();
3963 + }
3964 +
3965 + static bool target_check_bsf(void)
3966 + {
3967 +        bool mismatch = false;
3968 +        for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3969 +        for (int g_CF = 0; g_CF <= 1; g_CF++) {
3970 +        for (int g_OF = 0; g_OF <= 1; g_OF++) {
3971 +        for (int g_SF = 0; g_SF <= 1; g_SF++) {
3972 +                for (int value = -1; value <= 1; value++) {
3973 +                        unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3974 +                        unsigned long tmp = value;
3975 +                        __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3976 +                                                                  : "+r" (flags), "+r" (tmp) : : "cc");
3977 +                        int OF = (flags >> 11) & 1;
3978 +                        int SF = (flags >>  7) & 1;
3979 +                        int ZF = (flags >>  6) & 1;
3980 +                        int CF = flags & 1;
3981 +                        tmp = (value == 0);
3982 +                        if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3983 +                                mismatch = true;
3984 +                }
3985 +        }}}}
3986 +        if (mismatch)
3987 +                write_log("Target CPU defines all flags on BSF instruction\n");
3988 +        return !mismatch;
3989   }
3990  
3991  
# Line 2740 | Line 4110 | static __inline__ void tos_make(int r)
4110      emit_byte(0xd8+(live.tos+1)-live.spos[r]);  /* store top of stack in reg,
4111                                           and pop it*/
4112   }
4113 <    
4114 <        
4113 >
4114 > /* FP helper functions */
4115 > #if USE_NEW_RTASM
4116 > #define DEFINE_OP(NAME, GEN)                    \
4117 > static inline void raw_##NAME(uint32 m)         \
4118 > {                                               \
4119 >    GEN(m, X86_NOREG, X86_NOREG, 1);            \
4120 > }
4121 > DEFINE_OP(fstl,  FSTLm);
4122 > DEFINE_OP(fstpl, FSTPLm);
4123 > DEFINE_OP(fldl,  FLDLm);
4124 > DEFINE_OP(fildl, FILDLm);
4125 > DEFINE_OP(fistl, FISTLm);
4126 > DEFINE_OP(flds,  FLDSm);
4127 > DEFINE_OP(fsts,  FSTSm);
4128 > DEFINE_OP(fstpt, FSTPTm);
4129 > DEFINE_OP(fldt,  FLDTm);
4130 > #else
4131 > #define DEFINE_OP(NAME, OP1, OP2)               \
4132 > static inline void raw_##NAME(uint32 m)         \
4133 > {                                               \
4134 >    emit_byte(OP1);                             \
4135 >    emit_byte(OP2);                             \
4136 >    emit_long(m);                               \
4137 > }
4138 > DEFINE_OP(fstl,  0xdd, 0x15);
4139 > DEFINE_OP(fstpl, 0xdd, 0x1d);
4140 > DEFINE_OP(fldl,  0xdd, 0x05);
4141 > DEFINE_OP(fildl, 0xdb, 0x05);
4142 > DEFINE_OP(fistl, 0xdb, 0x15);
4143 > DEFINE_OP(flds,  0xd9, 0x05);
4144 > DEFINE_OP(fsts,  0xd9, 0x15);
4145 > DEFINE_OP(fstpt, 0xdb, 0x3d);
4146 > DEFINE_OP(fldt,  0xdb, 0x2d);
4147 > #endif
4148 > #undef DEFINE_OP
4149 >
4150   LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4151   {
4152      make_tos(r);
4153 <    emit_byte(0xdd);
2749 <    emit_byte(0x15);
2750 <    emit_long(m);
4153 >    raw_fstl(m);
4154   }
4155   LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4156  
4157   LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4158   {
4159      make_tos(r);
4160 <    emit_byte(0xdd);
2758 <    emit_byte(0x1d);
2759 <    emit_long(m);
4160 >    raw_fstpl(m);
4161      live.onstack[live.tos]=-1;
4162      live.tos--;
4163      live.spos[r]=-2;
# Line 2765 | Line 4166 | LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW
4166  
4167   LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4168   {
4169 <    emit_byte(0xdd);
2769 <    emit_byte(0x05);
2770 <    emit_long(m);
4169 >    raw_fldl(m);
4170      tos_make(r);
4171   }
4172   LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4173  
4174   LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4175   {
4176 <    emit_byte(0xdb);
2778 <    emit_byte(0x05);
2779 <    emit_long(m);
4176 >    raw_fildl(m);
4177      tos_make(r);
4178   }
4179   LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
# Line 2784 | Line 4181 | LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r,
4181   LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4182   {
4183      make_tos(r);
4184 <    emit_byte(0xdb);
2788 <    emit_byte(0x15);
2789 <    emit_long(m);
4184 >    raw_fistl(m);
4185   }
4186   LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4187  
4188   LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4189   {
4190 <    emit_byte(0xd9);
2796 <    emit_byte(0x05);
2797 <    emit_long(m);
4190 >    raw_flds(m);
4191      tos_make(r);
4192   }
4193   LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
# Line 2802 | Line 4195 | LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r,
4195   LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4196   {
4197      make_tos(r);
4198 <    emit_byte(0xd9);
2806 <    emit_byte(0x15);
2807 <    emit_long(m);
4198 >    raw_fsts(m);
4199   }
4200   LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4201  
# Line 2819 | Line 4210 | LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(ME
4210      emit_byte(0xd9);     /* Get a copy to the top of stack */
4211      emit_byte(0xc0+rs);
4212  
4213 <    emit_byte(0xdb);  /* store and pop it */
2823 <    emit_byte(0x3d);
2824 <    emit_long(m);
4213 >    raw_fstpt(m);       /* store and pop it */
4214   }
4215   LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4216  
# Line 2830 | Line 4219 | LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_dro
4219      int rs;
4220  
4221      make_tos(r);
4222 <    emit_byte(0xdb);  /* store and pop it */
2834 <    emit_byte(0x3d);
2835 <    emit_long(m);
4222 >    raw_fstpt(m);       /* store and pop it */
4223      live.onstack[live.tos]=-1;
4224      live.tos--;
4225      live.spos[r]=-2;
# Line 2841 | Line 4228 | LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(M
4228  
4229   LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4230   {
4231 <    emit_byte(0xdb);
2845 <    emit_byte(0x2d);
2846 <    emit_long(m);
4231 >    raw_fldt(m);
4232      tos_make(r);
4233   }
4234   LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
# Line 3030 | Line 4415 | LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, F
4415   }
4416   LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4417  
4418 < double one=1;
4418 > static const double one=1;
4419   LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4420   {
4421      int ds;
# Line 3050 | Line 4435 | LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d
4435      emit_byte(0xe1);  /* subtract rounded from original */
4436      emit_byte(0xd9);
4437      emit_byte(0xf0);  /* f2xm1 */
4438 <    emit_byte(0xdc);
3054 <    emit_byte(0x05);
3055 <    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
4438 >    x86_fadd_m((uintptr)&one);  /* Add '1' without using extra stack space */
4439      emit_byte(0xd9);
4440      emit_byte(0xfd);  /* and scale it */
4441      emit_byte(0xdd);
# Line 3084 | Line 4467 | LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d,
4467      emit_byte(0xe1);  /* subtract rounded from original */
4468      emit_byte(0xd9);
4469      emit_byte(0xf0);  /* f2xm1 */
4470 <    emit_byte(0xdc);
3088 <    emit_byte(0x05);
3089 <    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
4470 >    x86_fadd_m((uintptr)&one);  /* Add '1' without using extra stack space */
4471      emit_byte(0xd9);
4472      emit_byte(0xfd);  /* and scale it */
4473      emit_byte(0xdd);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines