ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp (file contents):
Revision 1.6 by gbeauche, 2002-10-03T16:13:46Z vs.
Revision 1.32 by gbeauche, 2006-01-16T21:31:41Z

# Line 3 | Line 3
3   *
4   *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5   *
6 < *  Adaptation for Basilisk II and improvements, copyright 2000-2002
6 > *  Adaptation for Basilisk II and improvements, copyright 2000-2005
7   *    Gwenole Beauchesne
8   *
9 < *  Basilisk II (C) 1997-2002 Christian Bauer
9 > *  Basilisk II (C) 1997-2005 Christian Bauer
10 > *
11 > *  Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12   *  
13   *  This program is free software; you can redistribute it and/or modify
14   *  it under the terms of the GNU General Public License as published by
# Line 40 | Line 42
42   #define EBP_INDEX 5
43   #define ESI_INDEX 6
44   #define EDI_INDEX 7
45 + #if defined(__x86_64__)
46 + #define R8_INDEX  8
47 + #define R9_INDEX  9
48 + #define R10_INDEX 10
49 + #define R11_INDEX 11
50 + #define R12_INDEX 12
51 + #define R13_INDEX 13
52 + #define R14_INDEX 14
53 + #define R15_INDEX 15
54 + #endif
55  
56   /* The register in which subroutines return an integer return value */
57 < #define REG_RESULT 0
57 > #define REG_RESULT EAX_INDEX
58  
59   /* The registers subroutines take their first and second argument in */
60   #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61   /* Handle the _fastcall parameters of ECX and EDX */
62 < #define REG_PAR1 1
63 < #define REG_PAR2 2
62 > #define REG_PAR1 ECX_INDEX
63 > #define REG_PAR2 EDX_INDEX
64 > #elif defined(__x86_64__)
65 > #define REG_PAR1 EDI_INDEX
66 > #define REG_PAR2 ESI_INDEX
67   #else
68 < #define REG_PAR1 0
69 < #define REG_PAR2 2
68 > #define REG_PAR1 EAX_INDEX
69 > #define REG_PAR2 EDX_INDEX
70   #endif
71  
72 < /* Three registers that are not used for any of the above */
58 < #define REG_NOPAR1 6
59 < #define REG_NOPAR2 5
60 < #define REG_NOPAR3 3
61 <
62 < #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
72 > #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73   #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 < #define REG_PC_TMP 0
74 > #define REG_PC_TMP EAX_INDEX
75   #else
76 < #define REG_PC_TMP 1 /* Another register that is not the above */
76 > #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77   #endif
78  
79 < #define SHIFTCOUNT_NREG 1  /* Register that can be used for shiftcount.
79 > #define SHIFTCOUNT_NREG ECX_INDEX  /* Register that can be used for shiftcount.
80                                -1 if any reg will do */
81 < #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
82 < #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
81 > #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82 > #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83 >
84 > #define STACK_ALIGN             16
85 > #define STACK_OFFSET    sizeof(void *)
86  
87   uae_s8 always_used[]={4,-1};
88 + #if defined(__x86_64__)
89 + uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
90 + uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
91 + #else
92   uae_s8 can_byte[]={0,1,2,3,-1};
93   uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
94 + #endif
95  
96 + #if USE_OPTIMIZED_CALLS
97 + /* Make sure interpretive core does not use cpuopti */
98 + uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
99 + #error FIXME: code not ready
100 + #else
101   /* cpuopti mutate instruction handlers to assume registers are saved
102     by the caller */
103 < uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
103 > uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
104 > #endif
105  
106   /* This *should* be the same as call_saved. But:
107     - We might not really know which registers are saved, and which aren't,
# Line 86 | Line 110 | uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
110     - Special registers (such like the stack pointer) should not be "preserved"
111       by pushing, even though they are "saved" across function calls
112   */
113 < uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
113 > #if defined(__x86_64__)
114 > /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
115 > /* preserve r11 because it's generally used to hold pointers to functions */
116 > static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
117 > #else
118 > /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
119 > static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
120 > #endif
121  
122   /* Whether classes of instructions do or don't clobber the native flags */
123   #define CLOBBER_MOV
# Line 111 | Line 142 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
142   #define CLOBBER_TEST clobber_flags()
143   #define CLOBBER_CL16
144   #define CLOBBER_CL8  
145 + #define CLOBBER_SE32
146   #define CLOBBER_SE16
147   #define CLOBBER_SE8
148 + #define CLOBBER_ZE32
149   #define CLOBBER_ZE16
150   #define CLOBBER_ZE8
151   #define CLOBBER_SW16 clobber_flags()
# Line 122 | Line 155 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
155   #define CLOBBER_BT   clobber_flags()
156   #define CLOBBER_BSF  clobber_flags()
157  
158 + /* FIXME: disabled until that's proofread.  */
159 + #if defined(__x86_64__)
160 + #define USE_NEW_RTASM 1
161 + #endif
162 +
163 + #if USE_NEW_RTASM
164 +
165 + #if defined(__x86_64__)
166 + #define X86_TARGET_64BIT                1
167 + #endif
168 + #define X86_FLAT_REGISTERS              0
169 + #define X86_OPTIMIZE_ALU                1
170 + #define X86_OPTIMIZE_ROTSHI             1
171 + #include "codegen_x86.h"
172 +
173 + #define x86_emit_byte(B)                emit_byte(B)
174 + #define x86_emit_word(W)                emit_word(W)
175 + #define x86_emit_long(L)                emit_long(L)
176 + #define x86_emit_quad(Q)                emit_quad(Q)
177 + #define x86_get_target()                get_target()
178 + #define x86_emit_failure(MSG)   jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
179 +
180 + static void jit_fail(const char *msg, const char *file, int line, const char *function)
181 + {
182 +        fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
183 +                        function, file, line, msg);
184 +        abort();
185 + }
186 +
187 + LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
188 + {
189 + #if defined(__x86_64__)
190 +        PUSHQr(r);
191 + #else
192 +        PUSHLr(r);
193 + #endif
194 + }
195 + LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
196 +
197 + LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
198 + {
199 + #if defined(__x86_64__)
200 +        POPQr(r);
201 + #else
202 +        POPLr(r);
203 + #endif
204 + }
205 + LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
206 +
207 + LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
208 + {
209 + #if defined(__x86_64__)
210 +        POPQm(d, X86_NOREG, X86_NOREG, 1);
211 + #else
212 +        POPLm(d, X86_NOREG, X86_NOREG, 1);
213 + #endif
214 + }
215 + LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
216 +
217 + LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
218 + {
219 +        BTLir(i, r);
220 + }
221 + LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
222 +
223 + LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
224 + {
225 +        BTLrr(b, r);
226 + }
227 + LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
228 +
229 + LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
230 + {
231 +        BTCLir(i, r);
232 + }
233 + LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
234 +
235 + LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
236 + {
237 +        BTCLrr(b, r);
238 + }
239 + LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
240 +
241 + LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
242 + {
243 +        BTRLir(i, r);
244 + }
245 + LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
246 +
247 + LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
248 + {
249 +        BTRLrr(b, r);
250 + }
251 + LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
252 +
253 + LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
254 + {
255 +        BTSLir(i, r);
256 + }
257 + LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
258 +
259 + LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
260 + {
261 +        BTSLrr(b, r);
262 + }
263 + LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
264 +
265 + LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
266 + {
267 +        SUBWir(i, d);
268 + }
269 + LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
270 +
271 + LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
272 + {
273 +        MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
274 + }
275 + LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
276 +
277 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
278 + {
279 +        MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
280 + }
281 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
282 +
283 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
284 + {
285 +        MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
286 + }
287 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
288 +
289 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
290 + {
291 +        MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
292 + }
293 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
294 +
295 + LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
296 + {
297 +        ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
298 + }
299 + LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
300 +
301 + LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
302 + {
303 +        ROLBir(i, r);
304 + }
305 + LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
306 +
307 + LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
308 + {
309 +        ROLWir(i, r);
310 + }
311 + LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
312 +
313 + LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
314 + {
315 +        ROLLir(i, r);
316 + }
317 + LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
318 +
319 + LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
320 + {
321 +        ROLLrr(r, d);
322 + }
323 + LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
324 +
325 + LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
326 + {
327 +        ROLWrr(r, d);
328 + }
329 + LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
330 +
331 + LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
332 + {
333 +        ROLBrr(r, d);
334 + }
335 + LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
336 +
337 + LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
338 + {
339 +        SHLLrr(r, d);
340 + }
341 + LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
342 +
343 + LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
344 + {
345 +        SHLWrr(r, d);
346 + }
347 + LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
348 +
349 + LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
350 + {
351 +        SHLBrr(r, d);
352 + }
353 + LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
354 +
355 + LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
356 + {
357 +        RORBir(i, r);
358 + }
359 + LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
360 +
361 + LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
362 + {
363 +        RORWir(i, r);
364 + }
365 + LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
366 +
367 + LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
368 + {
369 +        ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
370 + }
371 + LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
372 +
373 + LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
374 + {
375 +        RORLir(i, r);
376 + }
377 + LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
378 +
379 + LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
380 + {
381 +        RORLrr(r, d);
382 + }
383 + LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
384 +
385 + LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
386 + {
387 +        RORWrr(r, d);
388 + }
389 + LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
390 +
391 + LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
392 + {
393 +        RORBrr(r, d);
394 + }
395 + LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
396 +
397 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
398 + {
399 +        SHRLrr(r, d);
400 + }
401 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
402 +
403 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
404 + {
405 +        SHRWrr(r, d);
406 + }
407 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
408 +
409 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
410 + {
411 +        SHRBrr(r, d);
412 + }
413 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
414 +
415 + LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
416 + {
417 +        SARLrr(r, d);
418 + }
419 + LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
420 +
421 + LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
422 + {
423 +        SARWrr(r, d);
424 + }
425 + LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
426 +
427 + LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
428 + {
429 +        SARBrr(r, d);
430 + }
431 + LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
432 +
433 + LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
434 + {
435 +        SHLLir(i, r);
436 + }
437 + LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
438 +
439 + LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
440 + {
441 +        SHLWir(i, r);
442 + }
443 + LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
444 +
445 + LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
446 + {
447 +        SHLBir(i, r);
448 + }
449 + LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
450 +
451 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
452 + {
453 +        SHRLir(i, r);
454 + }
455 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
456 +
457 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
458 + {
459 +        SHRWir(i, r);
460 + }
461 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
462 +
463 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
464 + {
465 +        SHRBir(i, r);
466 + }
467 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
468 +
469 + LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
470 + {
471 +        SARLir(i, r);
472 + }
473 + LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
474 +
475 + LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
476 + {
477 +        SARWir(i, r);
478 + }
479 + LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
480 +
481 + LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
482 + {
483 +        SARBir(i, r);
484 + }
485 + LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
486 +
487 + LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
488 + {
489 +        SAHF();
490 + }
491 + LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
492 +
493 + LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
494 + {
495 +        CPUID();
496 + }
497 + LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
498 +
499 + LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
500 + {
501 +        LAHF();
502 + }
503 + LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
504 +
505 + LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
506 + {
507 +        SETCCir(cc, d);
508 + }
509 + LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
510 +
511 + LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
512 + {
513 +        SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
514 + }
515 + LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
516 +
517 + LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
518 + {
519 +        if (have_cmov)
520 +                CMOVLrr(cc, s, d);
521 +        else { /* replacement using branch and mov */
522 + #if defined(__x86_64__)
523 +                write_log("x86-64 implementations are bound to have CMOV!\n");
524 +                abort();
525 + #endif
526 +                JCCSii(cc^1, 2);
527 +                MOVLrr(s, d);
528 +        }
529 + }
530 + LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
531 +
532 + LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
533 + {
534 +        BSFLrr(s, d);
535 + }
536 + LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
537 +
538 + LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
539 + {
540 +        MOVSLQrr(s, d);
541 + }
542 + LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
543 +
544 + LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
545 + {
546 +        MOVSWLrr(s, d);
547 + }
548 + LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
549 +
550 + LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
551 + {
552 +        MOVSBLrr(s, d);
553 + }
554 + LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
555 +
556 + LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
557 + {
558 +        MOVZWLrr(s, d);
559 + }
560 + LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
561 +
562 + LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
563 + {
564 +        MOVZBLrr(s, d);
565 + }
566 + LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
567 +
568 + LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
569 + {
570 +        IMULLrr(s, d);
571 + }
572 + LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
573 +
574 + LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
575 + {
576 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
577 +        write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
578 +        abort();
579 +        }
580 +        IMULLr(s);
581 + }
582 + LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
583 +
584 + LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
585 + {
586 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
587 +        write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
588 +        abort();
589 +        }
590 +        MULLr(s);
591 + }
592 + LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
593 +
594 + LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
595 + {
596 +        abort(); /* %^$&%^$%#^ x86! */
597 + }
598 + LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
599 +
600 + LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
601 + {
602 +        MOVBrr(s, d);
603 + }
604 + LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
605 +
606 + LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
607 + {
608 +        MOVWrr(s, d);
609 + }
610 + LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
611 +
612 + LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
613 + {
614 +        MOVLmr(0, baser, index, factor, d);
615 + }
616 + LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
617 +
618 + LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
619 + {
620 +        MOVWmr(0, baser, index, factor, d);
621 + }
622 + LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
623 +
624 + LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
625 + {
626 +        MOVBmr(0, baser, index, factor, d);
627 + }
628 + LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
629 +
630 + LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
631 + {
632 +        MOVLrm(s, 0, baser, index, factor);
633 + }
634 + LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
635 +
636 + LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
637 + {
638 +        MOVWrm(s, 0, baser, index, factor);
639 + }
640 + LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
641 +
642 + LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
643 + {
644 +        MOVBrm(s, 0, baser, index, factor);
645 + }
646 + LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
647 +
648 + LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
649 + {
650 +        MOVLrm(s, base, baser, index, factor);
651 + }
652 + LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
653 +
654 + LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
655 + {
656 +        MOVWrm(s, base, baser, index, factor);
657 + }
658 + LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
659 +
660 + LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
661 + {
662 +        MOVBrm(s, base, baser, index, factor);
663 + }
664 + LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
665 +
666 + LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
667 + {
668 +        MOVLmr(base, baser, index, factor, d);
669 + }
670 + LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
671 +
672 + LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
673 + {
674 +        MOVWmr(base, baser, index, factor, d);
675 + }
676 + LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
677 +
678 + LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
679 + {
680 +        MOVBmr(base, baser, index, factor, d);
681 + }
682 + LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
683 +
684 + LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
685 + {
686 +        MOVLmr(base, X86_NOREG, index, factor, d);
687 + }
688 + LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
689 +
690 + LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
691 + {
692 +        if (have_cmov)
693 +                CMOVLmr(cond, base, X86_NOREG, index, factor, d);
694 +        else { /* replacement using branch and mov */
695 + #if defined(__x86_64__)
696 +                write_log("x86-64 implementations are bound to have CMOV!\n");
697 +                abort();
698 + #endif
699 +                JCCSii(cond^1, 7);
700 +                MOVLmr(base, X86_NOREG, index, factor, d);
701 +        }
702 + }
703 + LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
704 +
705 + LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
706 + {
707 +        if (have_cmov)
708 +                CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
709 +        else { /* replacement using branch and mov */
710 + #if defined(__x86_64__)
711 +                write_log("x86-64 implementations are bound to have CMOV!\n");
712 +                abort();
713 + #endif
714 +                JCCSii(cond^1, 6);
715 +                MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
716 +        }
717 + }
718 + LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
719 +
720 + LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
721 + {
722 +        MOVLmr(offset, s, X86_NOREG, 1, d);
723 + }
724 + LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
725 +
726 + LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
727 + {
728 +        MOVWmr(offset, s, X86_NOREG, 1, d);
729 + }
730 + LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
731 +
732 + LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
733 + {
734 +        MOVBmr(offset, s, X86_NOREG, 1, d);
735 + }
736 + LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
737 +
738 + LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
739 + {
740 +        MOVLmr(offset, s, X86_NOREG, 1, d);
741 + }
742 + LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
743 +
744 + LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
745 + {
746 +        MOVWmr(offset, s, X86_NOREG, 1, d);
747 + }
748 + LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
749 +
750 + LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
751 + {
752 +        MOVBmr(offset, s, X86_NOREG, 1, d);
753 + }
754 + LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
755 +
756 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
757 + {
758 +        MOVLim(i, offset, d, X86_NOREG, 1);
759 + }
760 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
761 +
762 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
763 + {
764 +        MOVWim(i, offset, d, X86_NOREG, 1);
765 + }
766 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
767 +
768 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
769 + {
770 +        MOVBim(i, offset, d, X86_NOREG, 1);
771 + }
772 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
773 +
774 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
775 + {
776 +        MOVLrm(s, offset, d, X86_NOREG, 1);
777 + }
778 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
779 +
780 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
781 + {
782 +        MOVWrm(s, offset, d, X86_NOREG, 1);
783 + }
784 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
785 +
786 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
787 + {
788 +        MOVBrm(s, offset, d, X86_NOREG, 1);
789 + }
790 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
791 +
792 + LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
793 + {
794 +        LEALmr(offset, s, X86_NOREG, 1, d);
795 + }
796 + LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
797 +
798 + LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
799 + {
800 +        LEALmr(offset, s, index, factor, d);
801 + }
802 + LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
803 +
804 + LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
805 + {
806 +        LEALmr(0, s, index, factor, d);
807 + }
808 + LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
809 +
810 + LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
811 + {
812 +        MOVLrm(s, offset, d, X86_NOREG, 1);
813 + }
814 + LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
815 +
816 + LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
817 + {
818 +        MOVWrm(s, offset, d, X86_NOREG, 1);
819 + }
820 + LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
821 +
822 + LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
823 + {
824 +        MOVBrm(s, offset, d, X86_NOREG, 1);
825 + }
826 + LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
827 +
828 + LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
829 + {
830 +        BSWAPLr(r);
831 + }
832 + LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
833 +
834 + LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
835 + {
836 +        ROLWir(8, r);
837 + }
838 + LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
839 +
840 + LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
841 + {
842 +        MOVLrr(s, d);
843 + }
844 + LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
845 +
846 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
847 + {
848 +        MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
849 + }
850 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
851 +
852 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
853 + {
854 +        MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
855 + }
856 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
857 +
858 + LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
859 + {
860 +        MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
861 + }
862 + LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
863 +
864 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
865 + {
866 +        MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
867 + }
868 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
869 +
870 + LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
871 + {
872 +        MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
873 + }
874 + LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
875 +
876 + LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
877 + {
878 +        MOVLir(s, d);
879 + }
880 + LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
881 +
882 + LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
883 + {
884 +        MOVWir(s, d);
885 + }
886 + LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
887 +
888 + LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
889 + {
890 +        MOVBir(s, d);
891 + }
892 + LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
893 +
894 + LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
895 + {
896 +        ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
897 + }
898 + LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
899 +
900 + LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
901 + {
902 +        ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
903 + }
904 + LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
905 +
906 + LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
907 + {
908 +        ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
909 + }
910 + LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
911 +
912 + LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
913 + {
914 +        ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
915 + }
916 + LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
917 +
918 + LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
919 + {
920 +        TESTLir(i, d);
921 + }
922 + LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
923 +
924 + LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
925 + {
926 +        TESTLrr(s, d);
927 + }
928 + LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
929 +
930 + LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
931 + {
932 +        TESTWrr(s, d);
933 + }
934 + LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
935 +
936 + LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
937 + {
938 +        TESTBrr(s, d);
939 + }
940 + LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
941 +
942 + LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
943 + {
944 +        XORLir(i, d);
945 + }
946 + LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
947 +
948 + LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
949 + {
950 +        ANDLir(i, d);
951 + }
952 + LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
953 +
954 + LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
955 + {
956 +        ANDWir(i, d);
957 + }
958 + LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
959 +
960 + LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
961 + {
962 +        ANDLrr(s, d);
963 + }
964 + LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
965 +
966 + LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
967 + {
968 +        ANDWrr(s, d);
969 + }
970 + LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
971 +
972 + LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
973 + {
974 +        ANDBrr(s, d);
975 + }
976 + LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
977 +
978 + LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
979 + {
980 +        ORLir(i, d);
981 + }
982 + LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
983 +
984 + LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
985 + {
986 +        ORLrr(s, d);
987 + }
988 + LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
989 +
990 + LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
991 + {
992 +        ORWrr(s, d);
993 + }
994 + LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
995 +
996 + LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
997 + {
998 +        ORBrr(s, d);
999 + }
1000 + LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1001 +
1002 + LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1003 + {
1004 +        ADCLrr(s, d);
1005 + }
1006 + LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1007 +
1008 + LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1009 + {
1010 +        ADCWrr(s, d);
1011 + }
1012 + LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1013 +
1014 + LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1015 + {
1016 +        ADCBrr(s, d);
1017 + }
1018 + LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1019 +
1020 + LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1021 + {
1022 +        ADDLrr(s, d);
1023 + }
1024 + LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1025 +
1026 + LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1027 + {
1028 +        ADDWrr(s, d);
1029 + }
1030 + LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1031 +
1032 + LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1033 + {
1034 +        ADDBrr(s, d);
1035 + }
1036 + LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1037 +
1038 + LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1039 + {
1040 +        SUBLir(i, d);
1041 + }
1042 + LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1043 +
1044 + LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1045 + {
1046 +        SUBBir(i, d);
1047 + }
1048 + LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1049 +
1050 + LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1051 + {
1052 +        ADDLir(i, d);
1053 + }
1054 + LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1055 +
1056 + LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1057 + {
1058 +        ADDWir(i, d);
1059 + }
1060 + LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1061 +
1062 + LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1063 + {
1064 +        ADDBir(i, d);
1065 + }
1066 + LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1067 +
1068 + LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1069 + {
1070 +        SBBLrr(s, d);
1071 + }
1072 + LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1073 +
1074 + LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1075 + {
1076 +        SBBWrr(s, d);
1077 + }
1078 + LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1079 +
1080 + LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1081 + {
1082 +        SBBBrr(s, d);
1083 + }
1084 + LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1085 +
1086 + LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1087 + {
1088 +        SUBLrr(s, d);
1089 + }
1090 + LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1091 +
1092 + LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1093 + {
1094 +        SUBWrr(s, d);
1095 + }
1096 + LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1097 +
1098 + LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1099 + {
1100 +        SUBBrr(s, d);
1101 + }
1102 + LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1103 +
1104 + LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1105 + {
1106 +        CMPLrr(s, d);
1107 + }
1108 + LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1109 +
1110 + LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1111 + {
1112 +        CMPLir(i, r);
1113 + }
1114 + LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1115 +
1116 + LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1117 + {
1118 +        CMPWrr(s, d);
1119 + }
1120 + LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1121 +
1122 + LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1123 + {
1124 +        CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1125 + }
1126 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1127 +
1128 + LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1129 + {
1130 +        CMPBir(i, d);
1131 + }
1132 + LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1133 +
1134 + LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1135 + {
1136 +        CMPBrr(s, d);
1137 + }
1138 + LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1139 +
1140 + LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1141 + {
1142 +        CMPLmr(offset, X86_NOREG, index, factor, d);
1143 + }
1144 + LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1145 +
1146 + LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1147 + {
1148 +        XORLrr(s, d);
1149 + }
1150 + LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1151 +
1152 + LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1153 + {
1154 +        XORWrr(s, d);
1155 + }
1156 + LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1157 +
1158 + LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1159 + {
1160 +        XORBrr(s, d);
1161 + }
1162 + LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1163 +
1164 + LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1165 + {
1166 +        SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1167 + }
1168 + LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1169 +
1170 + LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1171 + {
1172 +        CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1173 + }
1174 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1175 +
1176 + LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1177 + {
1178 +        XCHGLrr(r2, r1);
1179 + }
1180 + LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1181 +
1182 + LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1183 + {
1184 +        PUSHF();
1185 + }
1186 + LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1187 +
1188 + LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1189 + {
1190 +        POPF();
1191 + }
1192 + LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1193 +
1194 + #else
1195 +
1196   const bool optimize_accum               = true;
1197   const bool optimize_imm8                = true;
1198   const bool optimize_shift_once  = true;
# Line 157 | Line 1228 | LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1228   }
1229   LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1230  
1231 + LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1232 + {
1233 +        emit_byte(0x8f);
1234 +        emit_byte(0x05);
1235 +        emit_long(d);
1236 + }
1237 + LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1238 +
1239   LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1240   {
1241          emit_byte(0x0f);
# Line 1071 | Line 2150 | LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d
2150  
2151   LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2152   {
2153 +        Dif(!isbyte(offset)) abort();
2154      emit_byte(0x8b);
2155      emit_byte(0x40+8*d+s);
2156      emit_byte(offset);
# Line 1079 | Line 2159 | LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d,
2159  
2160   LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2161   {
2162 +        Dif(!isbyte(offset)) abort();
2163      emit_byte(0x66);
2164      emit_byte(0x8b);
2165      emit_byte(0x40+8*d+s);
# Line 1088 | Line 2169 | LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d,
2169  
2170   LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2171   {
2172 +        Dif(!isbyte(offset)) abort();
2173      emit_byte(0x8a);
2174      emit_byte(0x40+8*d+s);
2175      emit_byte(offset);
# Line 1121 | Line 2203 | LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d
2203  
2204   LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2205   {
2206 +        Dif(!isbyte(offset)) abort();
2207      emit_byte(0xc7);
2208      emit_byte(0x40+d);
2209      emit_byte(offset);
# Line 1130 | Line 2213 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d
2213  
2214   LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2215   {
2216 +        Dif(!isbyte(offset)) abort();
2217      emit_byte(0x66);
2218      emit_byte(0xc7);
2219      emit_byte(0x40+d);
# Line 1140 | Line 2224 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d
2224  
2225   LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2226   {
2227 +        Dif(!isbyte(offset)) abort();
2228      emit_byte(0xc6);
2229      emit_byte(0x40+d);
2230      emit_byte(offset);
# Line 1149 | Line 2234 | LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d
2234  
2235   LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2236   {
2237 +        Dif(!isbyte(offset)) abort();
2238      emit_byte(0x89);
2239      emit_byte(0x40+8*s+d);
2240      emit_byte(offset);
# Line 1157 | Line 2243 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d
2243  
2244   LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2245   {
2246 +        Dif(!isbyte(offset)) abort();
2247      emit_byte(0x66);
2248      emit_byte(0x89);
2249      emit_byte(0x40+8*s+d);
# Line 1166 | Line 2253 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d
2253  
2254   LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2255   {
2256 +        Dif(!isbyte(offset)) abort();
2257      emit_byte(0x88);
2258      emit_byte(0x40+8*s+d);
2259      emit_byte(offset);
# Line 1440 | Line 2528 | LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d
2528   }
2529   LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2530  
2531 + LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2532 + {
2533 +    emit_byte(0x81);
2534 +    emit_byte(0xf0+d);
2535 +    emit_long(i);
2536 + }
2537 + LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2538 +
2539   LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2540   {
2541          if (optimize_imm8 && isbyte(i)) {
# Line 1856 | Line 2952 | LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r
2952   LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2953  
2954   /*************************************************************************
1859 * FIXME: string-related instructions                                    *
1860 *************************************************************************/
1861
1862 LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1863 {
1864        emit_byte(0xfc);
1865 }
1866 LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1867
1868 LOWFUNC(WRITE,NONE,0,raw_std,(void))
1869 {
1870        emit_byte(0xfd);
1871 }
1872 LENDFUNC(WRITE,NONE,0,raw_std,(void))
1873
1874 LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1875 {
1876        emit_byte(0xa4);
1877 }
1878 LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1879
1880 LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1881 {
1882        emit_byte(0xa5);
1883 }
1884 LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1885
1886 LOWFUNC(NONE,RMW,0,raw_rep,(void))
1887 {
1888        emit_byte(0xf3);
1889 }
1890 LENDFUNC(NONE,RMW,0,raw_rep,(void))
1891
1892 LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1893 {
1894        raw_rep();
1895        raw_movs_b();
1896 }
1897 LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1898
1899 LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1900 {
1901        raw_rep();
1902        raw_movs_l();
1903 }
1904 LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1905
1906 /*************************************************************************
2955   * FIXME: mem access modes probably wrong                                *
2956   *************************************************************************/
2957  
# Line 1919 | Line 2967 | LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2967   }
2968   LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2969  
2970 + #endif
2971 +
2972   /*************************************************************************
2973   * Unoptimizable stuff --- jump                                          *
2974   *************************************************************************/
2975  
2976   static __inline__ void raw_call_r(R4 r)
2977   {
2978 + #if USE_NEW_RTASM
2979 +    CALLsr(r);
2980 + #else
2981      emit_byte(0xff);
2982      emit_byte(0xd0+r);
2983 + #endif
2984   }
2985  
2986   static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2987   {
2988 + #if USE_NEW_RTASM
2989 +    CALLsm(base, X86_NOREG, r, m);
2990 + #else
2991      int mu;
2992      switch(m) {
2993       case 1: mu=0; break;
# Line 1943 | Line 3000 | static __inline__ void raw_call_m_indexe
3000      emit_byte(0x14);
3001      emit_byte(0x05+8*r+0x40*mu);
3002      emit_long(base);
3003 + #endif
3004   }
3005  
3006   static __inline__ void raw_jmp_r(R4 r)
3007   {
3008 + #if USE_NEW_RTASM
3009 +    JMPsr(r);
3010 + #else
3011      emit_byte(0xff);
3012      emit_byte(0xe0+r);
3013 + #endif
3014   }
3015  
3016   static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3017   {
3018 + #if USE_NEW_RTASM
3019 +    JMPsm(base, X86_NOREG, r, m);
3020 + #else
3021      int mu;
3022      switch(m) {
3023       case 1: mu=0; break;
# Line 1965 | Line 3030 | static __inline__ void raw_jmp_m_indexed
3030      emit_byte(0x24);
3031      emit_byte(0x05+8*r+0x40*mu);
3032      emit_long(base);
3033 + #endif
3034   }
3035  
3036   static __inline__ void raw_jmp_m(uae_u32 base)
# Line 1977 | Line 3043 | static __inline__ void raw_jmp_m(uae_u32
3043  
3044   static __inline__ void raw_call(uae_u32 t)
3045   {
3046 + #if USE_NEW_RTASM
3047 +    CALLm(t);
3048 + #else
3049      emit_byte(0xe8);
3050      emit_long(t-(uae_u32)target-4);
3051 + #endif
3052   }
3053  
3054   static __inline__ void raw_jmp(uae_u32 t)
3055   {
3056 + #if USE_NEW_RTASM
3057 +    JMPm(t);
3058 + #else
3059      emit_byte(0xe9);
3060      emit_long(t-(uae_u32)target-4);
3061 + #endif
3062   }
3063  
3064   static __inline__ void raw_jl(uae_u32 t)
3065   {
3066      emit_byte(0x0f);
3067      emit_byte(0x8c);
3068 <    emit_long(t-(uae_u32)target-4);
3068 >    emit_long(t-(uintptr)target-4);
3069   }
3070  
3071   static __inline__ void raw_jz(uae_u32 t)
3072   {
3073      emit_byte(0x0f);
3074      emit_byte(0x84);
3075 <    emit_long(t-(uae_u32)target-4);
3075 >    emit_long(t-(uintptr)target-4);
3076   }
3077  
3078   static __inline__ void raw_jnz(uae_u32 t)
3079   {
3080      emit_byte(0x0f);
3081      emit_byte(0x85);
3082 <    emit_long(t-(uae_u32)target-4);
3082 >    emit_long(t-(uintptr)target-4);
3083   }
3084  
3085   static __inline__ void raw_jnz_l_oponly(void)
# Line 2055 | Line 3129 | static __inline__ void raw_nop(void)
3129      emit_byte(0x90);
3130   }
3131  
3132 + static __inline__ void raw_emit_nop_filler(int nbytes)
3133 + {
3134 +  /* Source: GNU Binutils 2.12.90.0.15 */
3135 +  /* Various efficient no-op patterns for aligning code labels.
3136 +     Note: Don't try to assemble the instructions in the comments.
3137 +     0L and 0w are not legal.  */
3138 +  static const uae_u8 f32_1[] =
3139 +    {0x90};                                                                     /* nop                                  */
3140 +  static const uae_u8 f32_2[] =
3141 +    {0x89,0xf6};                                                        /* movl %esi,%esi               */
3142 +  static const uae_u8 f32_3[] =
3143 +    {0x8d,0x76,0x00};                                           /* leal 0(%esi),%esi    */
3144 +  static const uae_u8 f32_4[] =
3145 +    {0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3146 +  static const uae_u8 f32_5[] =
3147 +    {0x90,                                                                      /* nop                                  */
3148 +     0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3149 +  static const uae_u8 f32_6[] =
3150 +    {0x8d,0xb6,0x00,0x00,0x00,0x00};            /* leal 0L(%esi),%esi   */
3151 +  static const uae_u8 f32_7[] =
3152 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3153 +  static const uae_u8 f32_8[] =
3154 +    {0x90,                                                                      /* nop                                  */
3155 +     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3156 +  static const uae_u8 f32_9[] =
3157 +    {0x89,0xf6,                                                         /* movl %esi,%esi               */
3158 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3159 +  static const uae_u8 f32_10[] =
3160 +    {0x8d,0x76,0x00,                                            /* leal 0(%esi),%esi    */
3161 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3162 +  static const uae_u8 f32_11[] =
3163 +    {0x8d,0x74,0x26,0x00,                                       /* leal 0(%esi,1),%esi  */
3164 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3165 +  static const uae_u8 f32_12[] =
3166 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3167 +     0x8d,0xbf,0x00,0x00,0x00,0x00};            /* leal 0L(%edi),%edi   */
3168 +  static const uae_u8 f32_13[] =
3169 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3170 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3171 +  static const uae_u8 f32_14[] =
3172 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00,        /* leal 0L(%esi,1),%esi */
3173 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3174 +  static const uae_u8 f32_15[] =
3175 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3176 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3177 +  static const uae_u8 f32_16[] =
3178 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3179 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3180 +  static const uae_u8 *const f32_patt[] = {
3181 +    f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3182 +    f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3183 +  };
3184 +  static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3185 +
3186 + #if defined(__x86_64__)
3187 +  /* The recommended way to pad 64bit code is to use NOPs preceded by
3188 +     maximally four 0x66 prefixes.  Balance the size of nops.  */
3189 +  if (nbytes == 0)
3190 +          return;
3191 +
3192 +  int i;
3193 +  int nnops = (nbytes + 3) / 4;
3194 +  int len = nbytes / nnops;
3195 +  int remains = nbytes - nnops * len;
3196 +
3197 +  for (i = 0; i < remains; i++) {
3198 +          emit_block(prefixes, len);
3199 +          raw_nop();
3200 +  }
3201 +  for (; i < nnops; i++) {
3202 +          emit_block(prefixes, len - 1);
3203 +          raw_nop();
3204 +  }
3205 + #else
3206 +  int nloops = nbytes / 16;
3207 +  while (nloops-- > 0)
3208 +        emit_block(f32_16, sizeof(f32_16));
3209 +
3210 +  nbytes %= 16;
3211 +  if (nbytes)
3212 +        emit_block(f32_patt[nbytes - 1], nbytes);
3213 + #endif
3214 + }
3215 +
3216  
3217   /*************************************************************************
3218   * Flag handling, to and fro UAE flag register                           *
# Line 2068 | Line 3226 | static __inline__ void raw_flags_to_reg(
3226   {
3227    raw_lahf(0);  /* Most flags in AH */
3228    //raw_setcc(r,0); /* V flag in AL */
3229 <  raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
3229 >  raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3230    
3231   #if 1   /* Let's avoid those nasty partial register stalls */
3232 <  //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
3233 <  raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
3232 >  //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3233 >  raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3234    //live.state[FLAGTMP].status=CLEAN;
3235    live.state[FLAGTMP].status=INMEM;
3236    live.state[FLAGTMP].realreg=-1;
# Line 2092 | Line 3250 | static __inline__ void raw_reg_to_flags(
3250    raw_sahf(0);
3251   }
3252  
3253 + #define FLAG_NREG3 0  /* Set to -1 if any register will do */
3254 + static __inline__ void raw_flags_set_zero(int s, int tmp)
3255 + {
3256 +    raw_mov_l_rr(tmp,s);
3257 +    raw_lahf(s); /* flags into ah */
3258 +    raw_and_l_ri(s,0xffffbfff);
3259 +    raw_and_l_ri(tmp,0x00004000);
3260 +    raw_xor_l_ri(tmp,0x00004000);
3261 +    raw_or_l(s,tmp);
3262 +    raw_sahf(s);
3263 + }
3264 +
3265   #else
3266  
3267   #define FLAG_NREG1 -1  /* Set to -1 if any register will do */
# Line 2099 | Line 3269 | static __inline__ void raw_flags_to_reg(
3269   {
3270          raw_pushfl();
3271          raw_pop_l_r(r);
3272 <        raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
3272 >        raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3273   //      live.state[FLAGTMP].status=CLEAN;
3274          live.state[FLAGTMP].status=INMEM;
3275          live.state[FLAGTMP].realreg=-1;
# Line 2118 | Line 3288 | static __inline__ void raw_reg_to_flags(
3288          raw_popfl();
3289   }
3290  
3291 + #define FLAG_NREG3 -1  /* Set to -1 if any register will do */
3292 + static __inline__ void raw_flags_set_zero(int s, int tmp)
3293 + {
3294 +    raw_mov_l_rr(tmp,s);
3295 +    raw_pushfl();
3296 +    raw_pop_l_r(s);
3297 +    raw_and_l_ri(s,0xffffffbf);
3298 +    raw_and_l_ri(tmp,0x00000040);
3299 +    raw_xor_l_ri(tmp,0x00000040);
3300 +    raw_or_l(s,tmp);
3301 +    raw_push_l_r(s);
3302 +    raw_popfl();
3303 + }
3304   #endif
3305  
3306   /* Apparently, there are enough instructions between flag store and
# Line 2125 | Line 3308 | static __inline__ void raw_reg_to_flags(
3308   static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3309   {
3310   #if 1
3311 <    raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3311 >    raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3312   #else
3313 <    raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3314 <    raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
3313 >    raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3314 >    raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3315   #endif
3316   }
3317  
# Line 2136 | Line 3319 | static __inline__ void raw_load_flagreg(
3319   static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3320   {
3321      if (live.nat[target].canbyte)
3322 <        raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3322 >        raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3323      else if (live.nat[target].canword)
3324 <        raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
3324 >        raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3325      else
3326 <        raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3326 >        raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3327   }
3328  
3329 + static __inline__ void raw_dec_sp(int off)
3330 + {
3331 +    if (off) raw_sub_l_ri(ESP_INDEX,off);
3332 + }
3333  
3334   static __inline__ void raw_inc_sp(int off)
3335   {
3336 <    raw_add_l_ri(ESP_INDEX,off);
3336 >    if (off) raw_add_l_ri(ESP_INDEX,off);
3337   }
3338  
3339   /*************************************************************************
# Line 2305 | Line 3492 | static void vec(int x, struct sigcontext
3492                  for (i=0;i<5;i++)
3493                      vecbuf[i]=target[i];
3494                  emit_byte(0xe9);
3495 <                emit_long((uae_u32)veccode-(uae_u32)target-4);
3495 >                emit_long((uintptr)veccode-(uintptr)target-4);
3496                  write_log("Create jump to %p\n",veccode);
3497              
3498                  write_log("Handled one access!\n");
# Line 2332 | Line 3519 | static void vec(int x, struct sigcontext
3519                  }
3520                  for (i=0;i<5;i++)
3521                      raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3522 <                raw_mov_l_mi((uae_u32)&in_handler,0);
3522 >                raw_mov_l_mi((uintptr)&in_handler,0);
3523                  emit_byte(0xe9);
3524 <                emit_long(sc.eip+len-(uae_u32)target-4);
3524 >                emit_long(sc.eip+len-(uintptr)target-4);
3525                  in_handler=1;
3526                  target=tmp;
3527              }
# Line 2429 | Line 3616 | enum {
3616    X86_PROCESSOR_K6,
3617    X86_PROCESSOR_ATHLON,
3618    X86_PROCESSOR_PENTIUM4,
3619 +  X86_PROCESSOR_X86_64,
3620    X86_PROCESSOR_max
3621   };
3622  
# Line 2439 | Line 3627 | static const char * x86_processor_string
3627    "PentiumPro",
3628    "K6",
3629    "Athlon",
3630 <  "Pentium4"
3630 >  "Pentium4",
3631 >  "x86-64"
3632   };
3633  
3634   static struct ptt {
# Line 2456 | Line 3645 | x86_alignments[X86_PROCESSOR_max] = {
3645    { 16, 15, 16,  7, 16 },
3646    { 32,  7, 32,  7, 32 },
3647    { 16,  7, 16,  7, 16 },
3648 <  {  0,  0,  0,  0,  0 }
3648 >  {  0,  0,  0,  0,  0 },
3649 >  { 16,  7, 16,  7, 16 }
3650   };
3651  
3652   static void
# Line 2490 | Line 3680 | x86_get_cpu_vendor(struct cpuinfo_x86 *c
3680   static void
3681   cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3682   {
3683 <  static uae_u8 cpuid_space[256];  
3683 >  const int CPUID_SPACE = 4096;
3684 >  uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3685 >  if (cpuid_space == VM_MAP_FAILED)
3686 >    abort();
3687 >  vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3688 >
3689 >  static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3690    uae_u8* tmp=get_target();
3691  
3692 +  s_op = op;
3693    set_target(cpuid_space);
3694    raw_push_l_r(0); /* eax */
3695    raw_push_l_r(1); /* ecx */
3696    raw_push_l_r(2); /* edx */
3697    raw_push_l_r(3); /* ebx */
3698 <  raw_mov_l_rm(0,(uae_u32)&op);
3698 >  raw_mov_l_rm(0,(uintptr)&s_op);
3699    raw_cpuid(0);
3700 <  if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
3701 <  if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
3702 <  if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
3703 <  if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
3700 >  raw_mov_l_mr((uintptr)&s_eax,0);
3701 >  raw_mov_l_mr((uintptr)&s_ebx,3);
3702 >  raw_mov_l_mr((uintptr)&s_ecx,1);
3703 >  raw_mov_l_mr((uintptr)&s_edx,2);
3704    raw_pop_l_r(3);
3705    raw_pop_l_r(2);
3706    raw_pop_l_r(1);
# Line 2512 | Line 3709 | cpuid(uae_u32 op, uae_u32 *eax, uae_u32
3709    set_target(tmp);
3710  
3711    ((cpuop_func*)cpuid_space)(0);
3712 +  if (eax != NULL) *eax = s_eax;
3713 +  if (ebx != NULL) *ebx = s_ebx;
3714 +  if (ecx != NULL) *ecx = s_ecx;
3715 +  if (edx != NULL) *edx = s_edx;
3716 +
3717 +  vm_release(cpuid_space, CPUID_SPACE);
3718   }
3719  
3720   static void
# Line 2520 | Line 3723 | raw_init_cpu(void)
3723    struct cpuinfo_x86 *c = &cpuinfo;
3724  
3725    /* Defaults */
3726 +  c->x86_processor = X86_PROCESSOR_max;
3727    c->x86_vendor = X86_VENDOR_UNKNOWN;
3728    c->cpuid_level = -1;                          /* CPUID not detected */
3729    c->x86_model = c->x86_mask = 0;       /* So far unknown... */
# Line 2541 | Line 3745 | raw_init_cpu(void)
3745          uae_u32 tfms, brand_id;
3746          cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3747          c->x86 = (tfms >> 8) & 15;
3748 +        if (c->x86 == 0xf)
3749 +                c->x86 += (tfms >> 20) & 0xff; /* extended family */
3750          c->x86_model = (tfms >> 4) & 15;
3751 +        if (c->x86_model == 0xf)
3752 +                c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3753          c->x86_brand_id = brand_id & 0xff;
2546        if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2547                 (c->x86 == 0xf)) {
2548          /* AMD Extended Family and Model Values */
2549          c->x86 += (tfms >> 20) & 0xff;
2550          c->x86_model += (tfms >> 12) & 0xf0;
2551        }
3754          c->x86_mask = tfms & 15;
3755    } else {
3756          /* Have CPUID level 0 only - unheard of */
3757          c->x86 = 4;
3758    }
3759  
3760 +  /* AMD-defined flags: level 0x80000001 */
3761 +  uae_u32 xlvl;
3762 +  cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3763 +  if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3764 +        if ( xlvl >= 0x80000001 ) {
3765 +          uae_u32 features, extra_features;
3766 +          cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3767 +          if (features & (1 << 29)) {
3768 +                /* Assume x86-64 if long mode is supported */
3769 +                c->x86_processor = X86_PROCESSOR_X86_64;
3770 +          }
3771 +          if (extra_features & (1 << 0))
3772 +                  have_lahf_lm = true;
3773 +        }
3774 +  }
3775 +          
3776    /* Canonicalize processor ID */
2559  c->x86_processor = X86_PROCESSOR_max;
3777    switch (c->x86) {
3778    case 3:
3779          c->x86_processor = X86_PROCESSOR_I386;
# Line 2577 | Line 3794 | raw_init_cpu(void)
3794            c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3795          break;
3796    case 15:
3797 <        if (c->x86_vendor == X86_VENDOR_INTEL) {
3798 <          /*  Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
3799 <          if (c->x86_brand_id >= 8)
3800 <                c->x86_processor = X86_PROCESSOR_PENTIUM4;
3801 <        }
3802 <        break;
3797 >          if (c->x86_processor == X86_PROCESSOR_max) {
3798 >                  switch (c->x86_vendor) {
3799 >                  case X86_VENDOR_INTEL:
3800 >                          c->x86_processor = X86_PROCESSOR_PENTIUM4;
3801 >                          break;
3802 >                  case X86_VENDOR_AMD:
3803 >                          /* Assume a 32-bit Athlon processor if not in long mode */
3804 >                          c->x86_processor = X86_PROCESSOR_ATHLON;
3805 >                          break;
3806 >                  }
3807 >          }
3808 >          break;
3809    }
3810    if (c->x86_processor == X86_PROCESSOR_max) {
3811 <        fprintf(stderr, "Error: unknown processor type\n");
3811 >        c->x86_processor = X86_PROCESSOR_I386;
3812 >        fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3813          fprintf(stderr, "  Family  : %d\n", c->x86);
3814          fprintf(stderr, "  Model   : %d\n", c->x86_model);
3815          fprintf(stderr, "  Mask    : %d\n", c->x86_mask);
3816 +        fprintf(stderr, "  Vendor  : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3817          if (c->x86_brand_id)
3818            fprintf(stderr, "  BrandID : %02x\n", c->x86_brand_id);
2594        abort();
3819    }
3820  
3821    /* Have CMOV support? */
3822 <  have_cmov = (c->x86_hwcap & (1 << 15)) && true;
3822 >  have_cmov = c->x86_hwcap & (1 << 15);
3823  
3824    /* Can the host CPU suffer from partial register stalls? */
3825    have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
# Line 2618 | Line 3842 | raw_init_cpu(void)
3842                          x86_processor_string_table[c->x86_processor]);
3843   }
3844  
3845 + static bool target_check_bsf(void)
3846 + {
3847 +        bool mismatch = false;
3848 +        for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3849 +        for (int g_CF = 0; g_CF <= 1; g_CF++) {
3850 +        for (int g_OF = 0; g_OF <= 1; g_OF++) {
3851 +        for (int g_SF = 0; g_SF <= 1; g_SF++) {
3852 +                for (int value = -1; value <= 1; value++) {
3853 +                        unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3854 +                        unsigned long tmp = value;
3855 +                        __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3856 +                                                                  : "+r" (flags), "+r" (tmp) : : "cc");
3857 +                        int OF = (flags >> 11) & 1;
3858 +                        int SF = (flags >>  7) & 1;
3859 +                        int ZF = (flags >>  6) & 1;
3860 +                        int CF = flags & 1;
3861 +                        tmp = (value == 0);
3862 +                        if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3863 +                                mismatch = true;
3864 +                }
3865 +        }}}}
3866 +        if (mismatch)
3867 +                write_log("Target CPU defines all flags on BSF instruction\n");
3868 +        return !mismatch;
3869 + }
3870 +
3871  
3872   /*************************************************************************
3873   * FPU stuff                                                             *
# Line 2740 | Line 3990 | static __inline__ void tos_make(int r)
3990      emit_byte(0xd8+(live.tos+1)-live.spos[r]);  /* store top of stack in reg,
3991                                           and pop it*/
3992   }
3993 <    
3994 <        
3993 >
3994 > /* FP helper functions */
3995 > #if USE_NEW_RTASM
3996 > #define DEFINE_OP(NAME, GEN)                    \
3997 > static inline void raw_##NAME(uint32 m)         \
3998 > {                                               \
3999 >    GEN(m, X86_NOREG, X86_NOREG, 1);            \
4000 > }
4001 > DEFINE_OP(fstl,  FSTLm);
4002 > DEFINE_OP(fstpl, FSTPLm);
4003 > DEFINE_OP(fldl,  FLDLm);
4004 > DEFINE_OP(fildl, FILDLm);
4005 > DEFINE_OP(fistl, FISTLm);
4006 > DEFINE_OP(flds,  FLDSm);
4007 > DEFINE_OP(fsts,  FSTSm);
4008 > DEFINE_OP(fstpt, FSTPTm);
4009 > DEFINE_OP(fldt,  FLDTm);
4010 > #else
4011 > #define DEFINE_OP(NAME, OP1, OP2)               \
4012 > static inline void raw_##NAME(uint32 m)         \
4013 > {                                               \
4014 >    emit_byte(OP1);                             \
4015 >    emit_byte(OP2);                             \
4016 >    emit_long(m);                               \
4017 > }
4018 > DEFINE_OP(fstl,  0xdd, 0x15);
4019 > DEFINE_OP(fstpl, 0xdd, 0x1d);
4020 > DEFINE_OP(fldl,  0xdd, 0x05);
4021 > DEFINE_OP(fildl, 0xdb, 0x05);
4022 > DEFINE_OP(fistl, 0xdb, 0x15);
4023 > DEFINE_OP(flds,  0xd9, 0x05);
4024 > DEFINE_OP(fsts,  0xd9, 0x15);
4025 > DEFINE_OP(fstpt, 0xdb, 0x3d);
4026 > DEFINE_OP(fldt,  0xdb, 0x2d);
4027 > #endif
4028 > #undef DEFINE_OP
4029 >
4030   LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4031   {
4032      make_tos(r);
4033 <    emit_byte(0xdd);
2749 <    emit_byte(0x15);
2750 <    emit_long(m);
4033 >    raw_fstl(m);
4034   }
4035   LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4036  
4037   LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4038   {
4039      make_tos(r);
4040 <    emit_byte(0xdd);
2758 <    emit_byte(0x1d);
2759 <    emit_long(m);
4040 >    raw_fstpl(m);
4041      live.onstack[live.tos]=-1;
4042      live.tos--;
4043      live.spos[r]=-2;
# Line 2765 | Line 4046 | LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW
4046  
4047   LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4048   {
4049 <    emit_byte(0xdd);
2769 <    emit_byte(0x05);
2770 <    emit_long(m);
4049 >    raw_fldl(m);
4050      tos_make(r);
4051   }
4052   LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4053  
4054   LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4055   {
4056 <    emit_byte(0xdb);
2778 <    emit_byte(0x05);
2779 <    emit_long(m);
4056 >    raw_fildl(m);
4057      tos_make(r);
4058   }
4059   LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
# Line 2784 | Line 4061 | LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r,
4061   LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4062   {
4063      make_tos(r);
4064 <    emit_byte(0xdb);
2788 <    emit_byte(0x15);
2789 <    emit_long(m);
4064 >    raw_fistl(m);
4065   }
4066   LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4067  
4068   LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4069   {
4070 <    emit_byte(0xd9);
2796 <    emit_byte(0x05);
2797 <    emit_long(m);
4070 >    raw_flds(m);
4071      tos_make(r);
4072   }
4073   LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
# Line 2802 | Line 4075 | LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r,
4075   LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4076   {
4077      make_tos(r);
4078 <    emit_byte(0xd9);
2806 <    emit_byte(0x15);
2807 <    emit_long(m);
4078 >    raw_fsts(m);
4079   }
4080   LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4081  
# Line 2819 | Line 4090 | LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(ME
4090      emit_byte(0xd9);     /* Get a copy to the top of stack */
4091      emit_byte(0xc0+rs);
4092  
4093 <    emit_byte(0xdb);  /* store and pop it */
2823 <    emit_byte(0x3d);
2824 <    emit_long(m);
4093 >    raw_fstpt(m);       /* store and pop it */
4094   }
4095   LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4096  
# Line 2830 | Line 4099 | LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_dro
4099      int rs;
4100  
4101      make_tos(r);
4102 <    emit_byte(0xdb);  /* store and pop it */
2834 <    emit_byte(0x3d);
2835 <    emit_long(m);
4102 >    raw_fstpt(m);       /* store and pop it */
4103      live.onstack[live.tos]=-1;
4104      live.tos--;
4105      live.spos[r]=-2;
# Line 2841 | Line 4108 | LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(M
4108  
4109   LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4110   {
4111 <    emit_byte(0xdb);
2845 <    emit_byte(0x2d);
2846 <    emit_long(m);
4111 >    raw_fldt(m);
4112      tos_make(r);
4113   }
4114   LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
# Line 3052 | Line 4317 | LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d
4317      emit_byte(0xf0);  /* f2xm1 */
4318      emit_byte(0xdc);
4319      emit_byte(0x05);
4320 <    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
4320 >    emit_long((uintptr)&one);  /* Add '1' without using extra stack space */
4321      emit_byte(0xd9);
4322      emit_byte(0xfd);  /* and scale it */
4323      emit_byte(0xdd);
# Line 3086 | Line 4351 | LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d,
4351      emit_byte(0xf0);  /* f2xm1 */
4352      emit_byte(0xdc);
4353      emit_byte(0x05);
4354 <    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
4354 >    emit_long((uintptr)&one);  /* Add '1' without using extra stack space */
4355      emit_byte(0xd9);
4356      emit_byte(0xfd);  /* and scale it */
4357      emit_byte(0xdd);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines