ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.23
Committed: 2004-11-02T23:28:19Z (19 years, 8 months ago) by gbeauche
Branch: MAIN
Changes since 1.22: +47 -32 lines
Log Message:
fix JIT FPU for x86_64

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 cebix 1.19 * Adaptation for Basilisk II and improvements, copyright 2000-2004
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 cebix 1.19 * Basilisk II (C) 1997-2004 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.1
56     /* The register in which subroutines return an integer return value */
57 gbeauche 1.20 #define REG_RESULT EAX_INDEX
58 gbeauche 1.1
59     /* The registers subroutines take their first and second argument in */
60     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61     /* Handle the _fastcall parameters of ECX and EDX */
62 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
63     #define REG_PAR2 EDX_INDEX
64     #elif defined(__x86_64__)
65     #define REG_PAR1 EDI_INDEX
66     #define REG_PAR2 ESI_INDEX
67 gbeauche 1.1 #else
68 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
69     #define REG_PAR2 EDX_INDEX
70 gbeauche 1.1 #endif
71    
72 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
75 gbeauche 1.1 #else
76 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 gbeauche 1.1 #endif
78    
79 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 gbeauche 1.1 -1 if any reg will do */
81 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83 gbeauche 1.1
84     uae_s8 always_used[]={4,-1};
85 gbeauche 1.20 #if defined(__x86_64__)
86     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88     #else
89 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
90     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 gbeauche 1.20 #endif
92 gbeauche 1.1
93 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
94     /* Make sure interpretive core does not use cpuopti */
95     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 gbeauche 1.20 #error FIXME: code not ready
97 gbeauche 1.17 #else
98 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
99     by the caller */
100 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 gbeauche 1.17 #endif
102 gbeauche 1.1
103     /* This *should* be the same as call_saved. But:
104     - We might not really know which registers are saved, and which aren't,
105     so we need to preserve some, but don't want to rely on everyone else
106     also saving those registers
107     - Special registers (such like the stack pointer) should not be "preserved"
108     by pushing, even though they are "saved" across function calls
109     */
110 gbeauche 1.21 #if defined(__x86_64__)
111     /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
112 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
113     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
114 gbeauche 1.21 #else
115     static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
116     #endif
117 gbeauche 1.1
118     /* Whether classes of instructions do or don't clobber the native flags */
119     #define CLOBBER_MOV
120     #define CLOBBER_LEA
121     #define CLOBBER_CMOV
122     #define CLOBBER_POP
123     #define CLOBBER_PUSH
124     #define CLOBBER_SUB clobber_flags()
125     #define CLOBBER_SBB clobber_flags()
126     #define CLOBBER_CMP clobber_flags()
127     #define CLOBBER_ADD clobber_flags()
128     #define CLOBBER_ADC clobber_flags()
129     #define CLOBBER_AND clobber_flags()
130     #define CLOBBER_OR clobber_flags()
131     #define CLOBBER_XOR clobber_flags()
132    
133     #define CLOBBER_ROL clobber_flags()
134     #define CLOBBER_ROR clobber_flags()
135     #define CLOBBER_SHLL clobber_flags()
136     #define CLOBBER_SHRL clobber_flags()
137     #define CLOBBER_SHRA clobber_flags()
138     #define CLOBBER_TEST clobber_flags()
139     #define CLOBBER_CL16
140     #define CLOBBER_CL8
141 gbeauche 1.20 #define CLOBBER_SE32
142 gbeauche 1.1 #define CLOBBER_SE16
143     #define CLOBBER_SE8
144 gbeauche 1.20 #define CLOBBER_ZE32
145 gbeauche 1.1 #define CLOBBER_ZE16
146     #define CLOBBER_ZE8
147     #define CLOBBER_SW16 clobber_flags()
148     #define CLOBBER_SW32
149     #define CLOBBER_SETCC
150     #define CLOBBER_MUL clobber_flags()
151     #define CLOBBER_BT clobber_flags()
152     #define CLOBBER_BSF clobber_flags()
153    
154 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
155 gbeauche 1.20 #if defined(__x86_64__)
156     #define USE_NEW_RTASM 1
157     #endif
158    
159     #if USE_NEW_RTASM
160 gbeauche 1.13
161     #if defined(__x86_64__)
162     #define X86_TARGET_64BIT 1
163     #endif
164     #define X86_FLAT_REGISTERS 0
165 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
166     #define X86_OPTIMIZE_ROTSHI 1
167 gbeauche 1.13 #include "codegen_x86.h"
168    
169     #define x86_emit_byte(B) emit_byte(B)
170     #define x86_emit_word(W) emit_word(W)
171     #define x86_emit_long(L) emit_long(L)
172 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
173 gbeauche 1.13 #define x86_get_target() get_target()
174     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
175    
176     static void jit_fail(const char *msg, const char *file, int line, const char *function)
177     {
178     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
179     function, file, line, msg);
180     abort();
181     }
182    
183     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
184     {
185 gbeauche 1.20 #if defined(__x86_64__)
186     PUSHQr(r);
187     #else
188 gbeauche 1.13 PUSHLr(r);
189 gbeauche 1.20 #endif
190 gbeauche 1.13 }
191     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
192    
193     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
194     {
195 gbeauche 1.20 #if defined(__x86_64__)
196     POPQr(r);
197     #else
198 gbeauche 1.13 POPLr(r);
199 gbeauche 1.20 #endif
200 gbeauche 1.13 }
201     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
202    
203     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
204     {
205     BTLir(i, r);
206     }
207     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
208    
209     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
210     {
211     BTLrr(b, r);
212     }
213     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
214    
215     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
216     {
217     BTCLir(i, r);
218     }
219     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
220    
221     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
222     {
223     BTCLrr(b, r);
224     }
225     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
226    
227     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
228     {
229     BTRLir(i, r);
230     }
231     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
232    
233     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
234     {
235     BTRLrr(b, r);
236     }
237     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
238    
239     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
240     {
241     BTSLir(i, r);
242     }
243     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
244    
245     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
246     {
247     BTSLrr(b, r);
248     }
249     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
250    
251     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
252     {
253     SUBWir(i, d);
254     }
255     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
256    
257     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
258     {
259     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
260     }
261     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
262    
263     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
264     {
265     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
266     }
267     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
268    
269     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
270     {
271     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
272     }
273     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
274    
275     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
276     {
277     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
278     }
279     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
280    
281     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
282     {
283     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
284     }
285     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
286    
287     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
288     {
289     ROLBir(i, r);
290     }
291     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
292    
293     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
294     {
295     ROLWir(i, r);
296     }
297     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
298    
299     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
300     {
301     ROLLir(i, r);
302     }
303     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
304    
305     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
306     {
307     ROLLrr(r, d);
308     }
309     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
310    
311     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
312     {
313     ROLWrr(r, d);
314     }
315     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
316    
317     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
318     {
319     ROLBrr(r, d);
320     }
321     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
322    
323     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
324     {
325     SHLLrr(r, d);
326     }
327     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
328    
329     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
330     {
331     SHLWrr(r, d);
332     }
333     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
334    
335     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
336     {
337     SHLBrr(r, d);
338     }
339     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
340    
341     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
342     {
343     RORBir(i, r);
344     }
345     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
346    
347     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
348     {
349     RORWir(i, r);
350     }
351     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
352    
353     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
354     {
355     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
356     }
357     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
358    
359     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
360     {
361     RORLir(i, r);
362     }
363     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
364    
365     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
366     {
367     RORLrr(r, d);
368     }
369     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
370    
371     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
372     {
373     RORWrr(r, d);
374     }
375     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
376    
377     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
378     {
379     RORBrr(r, d);
380     }
381     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
382    
383     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
384     {
385     SHRLrr(r, d);
386     }
387     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
388    
389     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
390     {
391     SHRWrr(r, d);
392     }
393     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
394    
395     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
396     {
397     SHRBrr(r, d);
398     }
399     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
400    
401     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
402     {
403 gbeauche 1.14 SARLrr(r, d);
404 gbeauche 1.13 }
405     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
406    
407     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
408     {
409 gbeauche 1.14 SARWrr(r, d);
410 gbeauche 1.13 }
411     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
412    
413     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
414     {
415 gbeauche 1.14 SARBrr(r, d);
416 gbeauche 1.13 }
417     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
418    
419     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
420     {
421     SHLLir(i, r);
422     }
423     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
424    
425     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
426     {
427     SHLWir(i, r);
428     }
429     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
430    
431     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
432     {
433     SHLBir(i, r);
434     }
435     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
436    
437     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
438     {
439     SHRLir(i, r);
440     }
441     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
442    
443     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
444     {
445     SHRWir(i, r);
446     }
447     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
448    
449     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
450     {
451     SHRBir(i, r);
452     }
453     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
454    
455     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
456     {
457 gbeauche 1.14 SARLir(i, r);
458 gbeauche 1.13 }
459     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
460    
461     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
462     {
463 gbeauche 1.14 SARWir(i, r);
464 gbeauche 1.13 }
465     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
466    
467     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
468     {
469 gbeauche 1.14 SARBir(i, r);
470 gbeauche 1.13 }
471     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
472    
473     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
474     {
475     SAHF();
476     }
477     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
478    
479     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
480     {
481     CPUID();
482     }
483     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
484    
485     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
486     {
487     LAHF();
488     }
489     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
490    
491     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
492     {
493     SETCCir(cc, d);
494     }
495     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
496    
497     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
498     {
499     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
500     }
501     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
502    
503     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
504     {
505 gbeauche 1.15 if (have_cmov)
506     CMOVLrr(cc, s, d);
507     else { /* replacement using branch and mov */
508     #if defined(__x86_64__)
509     write_log("x86-64 implementations are bound to have CMOV!\n");
510     abort();
511     #endif
512     JCCSii(cc^1, 2);
513     MOVLrr(s, d);
514     }
515 gbeauche 1.13 }
516     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
517    
518     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
519     {
520     BSFLrr(s, d);
521     }
522     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
523    
524 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
525     {
526     MOVSLQrr(s, d);
527     }
528     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
529    
530 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
531     {
532     MOVSWLrr(s, d);
533     }
534     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
535    
536     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
537     {
538     MOVSBLrr(s, d);
539     }
540     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
541    
542     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
543     {
544     MOVZWLrr(s, d);
545     }
546     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
547    
548     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
549     {
550     MOVZBLrr(s, d);
551     }
552     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
553    
554     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
555     {
556 gbeauche 1.14 IMULLrr(s, d);
557 gbeauche 1.13 }
558     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
559    
560     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
561     {
562 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
563     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
564 gbeauche 1.13 abort();
565 gbeauche 1.14 }
566     IMULLr(s);
567 gbeauche 1.13 }
568     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
569    
570     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
571     {
572 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
573     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
574 gbeauche 1.13 abort();
575 gbeauche 1.14 }
576     MULLr(s);
577 gbeauche 1.13 }
578     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
579    
580     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
581     {
582 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
583 gbeauche 1.13 }
584     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
585    
586     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
587     {
588     MOVBrr(s, d);
589     }
590     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
591    
592     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
593     {
594     MOVWrr(s, d);
595     }
596     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
597    
598     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
599     {
600     MOVLmr(0, baser, index, factor, d);
601     }
602     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
603    
604     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
605     {
606     MOVWmr(0, baser, index, factor, d);
607     }
608     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
609    
610     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
611     {
612     MOVBmr(0, baser, index, factor, d);
613     }
614     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
615    
616     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
617     {
618     MOVLrm(s, 0, baser, index, factor);
619     }
620     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
621    
622     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
623     {
624     MOVWrm(s, 0, baser, index, factor);
625     }
626     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
627    
628     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
629     {
630     MOVBrm(s, 0, baser, index, factor);
631     }
632     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
633    
634     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
635     {
636     MOVLrm(s, base, baser, index, factor);
637     }
638     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
639    
640     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
641     {
642     MOVWrm(s, base, baser, index, factor);
643     }
644     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
645    
646     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
647     {
648     MOVBrm(s, base, baser, index, factor);
649     }
650     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
651    
652     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
653     {
654     MOVLmr(base, baser, index, factor, d);
655     }
656     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
657    
658     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
659     {
660     MOVWmr(base, baser, index, factor, d);
661     }
662     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
663    
664     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
665     {
666     MOVBmr(base, baser, index, factor, d);
667     }
668     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
669    
670     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
671     {
672     MOVLmr(base, X86_NOREG, index, factor, d);
673     }
674     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
675    
676     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
677     {
678 gbeauche 1.15 if (have_cmov)
679     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
680     else { /* replacement using branch and mov */
681     #if defined(__x86_64__)
682     write_log("x86-64 implementations are bound to have CMOV!\n");
683     abort();
684     #endif
685     JCCSii(cond^1, 7);
686     MOVLmr(base, X86_NOREG, index, factor, d);
687     }
688 gbeauche 1.13 }
689     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
690    
691     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
692     {
693 gbeauche 1.15 if (have_cmov)
694     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
695     else { /* replacement using branch and mov */
696     #if defined(__x86_64__)
697     write_log("x86-64 implementations are bound to have CMOV!\n");
698     abort();
699     #endif
700     JCCSii(cond^1, 6);
701     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
702     }
703 gbeauche 1.13 }
704     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
705    
706     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
707     {
708     MOVLmr(offset, s, X86_NOREG, 1, d);
709     }
710     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
711    
712     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
713     {
714     MOVWmr(offset, s, X86_NOREG, 1, d);
715     }
716     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
717    
718     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
719     {
720     MOVBmr(offset, s, X86_NOREG, 1, d);
721     }
722     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
723    
724     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
725     {
726     MOVLmr(offset, s, X86_NOREG, 1, d);
727     }
728     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
729    
730     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
731     {
732     MOVWmr(offset, s, X86_NOREG, 1, d);
733     }
734     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
735    
736     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
737     {
738     MOVBmr(offset, s, X86_NOREG, 1, d);
739     }
740     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
741    
742     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
743     {
744     MOVLim(i, offset, d, X86_NOREG, 1);
745     }
746     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
747    
748     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
749     {
750     MOVWim(i, offset, d, X86_NOREG, 1);
751     }
752     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
753    
754     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
755     {
756     MOVBim(i, offset, d, X86_NOREG, 1);
757     }
758     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
759    
760     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
761     {
762     MOVLrm(s, offset, d, X86_NOREG, 1);
763     }
764     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
765    
766     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
767     {
768     MOVWrm(s, offset, d, X86_NOREG, 1);
769     }
770     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
771    
772     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
773     {
774     MOVBrm(s, offset, d, X86_NOREG, 1);
775     }
776     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
777    
778     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
779     {
780     LEALmr(offset, s, X86_NOREG, 1, d);
781     }
782     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
783    
784     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
785     {
786     LEALmr(offset, s, index, factor, d);
787     }
788     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
789    
790     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
791     {
792     LEALmr(0, s, index, factor, d);
793     }
794     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
795    
796     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
797     {
798     MOVLrm(s, offset, d, X86_NOREG, 1);
799     }
800     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
801    
802     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
803     {
804     MOVWrm(s, offset, d, X86_NOREG, 1);
805     }
806     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
807    
808     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
809     {
810     MOVBrm(s, offset, d, X86_NOREG, 1);
811     }
812     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
813    
814     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
815     {
816     BSWAPLr(r);
817     }
818     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
819    
820     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
821     {
822     ROLWir(8, r);
823     }
824     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
825    
826     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
827     {
828     MOVLrr(s, d);
829     }
830     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
831    
832     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
833     {
834     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
835     }
836     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
837    
838     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
839     {
840     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
841     }
842     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
843    
844     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
845     {
846     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
847     }
848     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
849    
850     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
851     {
852     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
853     }
854     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
855    
856     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
857     {
858     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
859     }
860     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
861    
862     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
863     {
864     MOVLir(s, d);
865     }
866     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
867    
868     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
869     {
870     MOVWir(s, d);
871     }
872     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
873    
874     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
875     {
876     MOVBir(s, d);
877     }
878     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
879    
880     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
881     {
882     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
883     }
884     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
885    
886     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
887     {
888     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
889     }
890     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
891    
892     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
893     {
894     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
895     }
896     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
897    
898     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
899     {
900     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
901     }
902     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
903    
904     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
905     {
906     TESTLir(i, d);
907     }
908     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
909    
910     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
911     {
912     TESTLrr(s, d);
913     }
914     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
915    
916     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
917     {
918     TESTWrr(s, d);
919     }
920     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
921    
922     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
923     {
924     TESTBrr(s, d);
925     }
926     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
927    
928     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
929     {
930     ANDLir(i, d);
931     }
932     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
933    
934     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
935     {
936     ANDWir(i, d);
937     }
938     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
939    
940     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
941     {
942     ANDLrr(s, d);
943     }
944     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
945    
946     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
947     {
948     ANDWrr(s, d);
949     }
950     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
951    
952     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
953     {
954     ANDBrr(s, d);
955     }
956     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
957    
958     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
959     {
960     ORLir(i, d);
961     }
962     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
963    
964     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
965     {
966     ORLrr(s, d);
967     }
968     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
969    
970     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
971     {
972     ORWrr(s, d);
973     }
974     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
975    
976     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
977     {
978     ORBrr(s, d);
979     }
980     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
981    
982     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
983     {
984     ADCLrr(s, d);
985     }
986     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
987    
988     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
989     {
990     ADCWrr(s, d);
991     }
992     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
993    
994     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
995     {
996     ADCBrr(s, d);
997     }
998     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
999    
1000     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1001     {
1002     ADDLrr(s, d);
1003     }
1004     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1005    
1006     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1007     {
1008     ADDWrr(s, d);
1009     }
1010     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1011    
1012     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1013     {
1014     ADDBrr(s, d);
1015     }
1016     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1017    
1018     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1019     {
1020     SUBLir(i, d);
1021     }
1022     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1023    
1024     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1025     {
1026     SUBBir(i, d);
1027     }
1028     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1029    
1030     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1031     {
1032     ADDLir(i, d);
1033     }
1034     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1035    
1036     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1037     {
1038     ADDWir(i, d);
1039     }
1040     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1041    
1042     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1043     {
1044     ADDBir(i, d);
1045     }
1046     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1047    
1048     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1049     {
1050     SBBLrr(s, d);
1051     }
1052     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1053    
1054     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1055     {
1056     SBBWrr(s, d);
1057     }
1058     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1059    
1060     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1061     {
1062     SBBBrr(s, d);
1063     }
1064     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1065    
1066     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1067     {
1068     SUBLrr(s, d);
1069     }
1070     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1071    
1072     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1073     {
1074     SUBWrr(s, d);
1075     }
1076     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1077    
1078     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1079     {
1080     SUBBrr(s, d);
1081     }
1082     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1083    
1084     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1085     {
1086     CMPLrr(s, d);
1087     }
1088     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1089    
1090     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1091     {
1092     CMPLir(i, r);
1093     }
1094     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1095    
1096     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1097     {
1098     CMPWrr(s, d);
1099     }
1100     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1101    
1102     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1103     {
1104     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1105     }
1106     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1107    
1108     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1109     {
1110     CMPBir(i, d);
1111     }
1112     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1113    
1114     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1115     {
1116     CMPBrr(s, d);
1117     }
1118     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1119    
1120     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1121     {
1122     CMPLmr(offset, X86_NOREG, index, factor, d);
1123     }
1124     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1125    
1126     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1127     {
1128     XORLrr(s, d);
1129     }
1130     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1131    
1132     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1133     {
1134     XORWrr(s, d);
1135     }
1136     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1137    
1138     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1139     {
1140     XORBrr(s, d);
1141     }
1142     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1143    
1144     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1145     {
1146     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1147     }
1148     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1149    
1150     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1151     {
1152     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1153     }
1154     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1155    
1156     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1157     {
1158     XCHGLrr(r2, r1);
1159     }
1160     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1161    
1162     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1163     {
1164 gbeauche 1.18 PUSHF();
1165 gbeauche 1.13 }
1166     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1167    
1168     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1169     {
1170 gbeauche 1.18 POPF();
1171 gbeauche 1.13 }
1172     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1173    
1174     #else
1175    
1176 gbeauche 1.2 const bool optimize_accum = true;
1177 gbeauche 1.1 const bool optimize_imm8 = true;
1178     const bool optimize_shift_once = true;
1179    
1180     /*************************************************************************
1181     * Actual encoding of the instructions on the target CPU *
1182     *************************************************************************/
1183    
1184 gbeauche 1.2 static __inline__ int isaccum(int r)
1185     {
1186     return (r == EAX_INDEX);
1187     }
1188    
1189 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1190     {
1191     return (x>=-128 && x<=127);
1192     }
1193    
1194     static __inline__ int isword(uae_s32 x)
1195     {
1196     return (x>=-32768 && x<=32767);
1197     }
1198    
1199     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1200     {
1201     emit_byte(0x50+r);
1202     }
1203     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1204    
1205     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1206     {
1207     emit_byte(0x58+r);
1208     }
1209     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1210    
1211     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1212     {
1213     emit_byte(0x0f);
1214     emit_byte(0xba);
1215     emit_byte(0xe0+r);
1216     emit_byte(i);
1217     }
1218     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1219    
1220     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1221     {
1222     emit_byte(0x0f);
1223     emit_byte(0xa3);
1224     emit_byte(0xc0+8*b+r);
1225     }
1226     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1227    
1228     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1229     {
1230     emit_byte(0x0f);
1231     emit_byte(0xba);
1232     emit_byte(0xf8+r);
1233     emit_byte(i);
1234     }
1235     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1236    
1237     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1238     {
1239     emit_byte(0x0f);
1240     emit_byte(0xbb);
1241     emit_byte(0xc0+8*b+r);
1242     }
1243     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1244    
1245    
1246     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1247     {
1248     emit_byte(0x0f);
1249     emit_byte(0xba);
1250     emit_byte(0xf0+r);
1251     emit_byte(i);
1252     }
1253     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1254    
1255     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1256     {
1257     emit_byte(0x0f);
1258     emit_byte(0xb3);
1259     emit_byte(0xc0+8*b+r);
1260     }
1261     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1262    
1263     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1264     {
1265     emit_byte(0x0f);
1266     emit_byte(0xba);
1267     emit_byte(0xe8+r);
1268     emit_byte(i);
1269     }
1270     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1271    
1272     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1273     {
1274     emit_byte(0x0f);
1275     emit_byte(0xab);
1276     emit_byte(0xc0+8*b+r);
1277     }
1278     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1279    
1280     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1281     {
1282     emit_byte(0x66);
1283     if (isbyte(i)) {
1284     emit_byte(0x83);
1285     emit_byte(0xe8+d);
1286     emit_byte(i);
1287     }
1288     else {
1289 gbeauche 1.2 if (optimize_accum && isaccum(d))
1290     emit_byte(0x2d);
1291     else {
1292 gbeauche 1.1 emit_byte(0x81);
1293     emit_byte(0xe8+d);
1294 gbeauche 1.2 }
1295 gbeauche 1.1 emit_word(i);
1296     }
1297     }
1298     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1299    
1300    
1301     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1302     {
1303     emit_byte(0x8b);
1304     emit_byte(0x05+8*d);
1305     emit_long(s);
1306     }
1307     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1308    
1309     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1310     {
1311     emit_byte(0xc7);
1312     emit_byte(0x05);
1313     emit_long(d);
1314     emit_long(s);
1315     }
1316     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1317    
1318     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1319     {
1320     emit_byte(0x66);
1321     emit_byte(0xc7);
1322     emit_byte(0x05);
1323     emit_long(d);
1324     emit_word(s);
1325     }
1326     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1327    
1328     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1329     {
1330     emit_byte(0xc6);
1331     emit_byte(0x05);
1332     emit_long(d);
1333     emit_byte(s);
1334     }
1335     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1336    
1337     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1338     {
1339     if (optimize_shift_once && (i == 1)) {
1340     emit_byte(0xd0);
1341     emit_byte(0x05);
1342     emit_long(d);
1343     }
1344     else {
1345     emit_byte(0xc0);
1346     emit_byte(0x05);
1347     emit_long(d);
1348     emit_byte(i);
1349     }
1350     }
1351     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1352    
1353     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1354     {
1355     if (optimize_shift_once && (i == 1)) {
1356     emit_byte(0xd0);
1357     emit_byte(0xc0+r);
1358     }
1359     else {
1360     emit_byte(0xc0);
1361     emit_byte(0xc0+r);
1362     emit_byte(i);
1363     }
1364     }
1365     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1366    
1367     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1368     {
1369     emit_byte(0x66);
1370     emit_byte(0xc1);
1371     emit_byte(0xc0+r);
1372     emit_byte(i);
1373     }
1374     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1375    
1376     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1377     {
1378     if (optimize_shift_once && (i == 1)) {
1379     emit_byte(0xd1);
1380     emit_byte(0xc0+r);
1381     }
1382     else {
1383     emit_byte(0xc1);
1384     emit_byte(0xc0+r);
1385     emit_byte(i);
1386     }
1387     }
1388     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1389    
1390     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1391     {
1392     emit_byte(0xd3);
1393     emit_byte(0xc0+d);
1394     }
1395     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1396    
1397     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1398     {
1399     emit_byte(0x66);
1400     emit_byte(0xd3);
1401     emit_byte(0xc0+d);
1402     }
1403     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1404    
1405     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1406     {
1407     emit_byte(0xd2);
1408     emit_byte(0xc0+d);
1409     }
1410     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1411    
1412     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1413     {
1414     emit_byte(0xd3);
1415     emit_byte(0xe0+d);
1416     }
1417     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1418    
1419     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1420     {
1421     emit_byte(0x66);
1422     emit_byte(0xd3);
1423     emit_byte(0xe0+d);
1424     }
1425     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1426    
1427     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1428     {
1429     emit_byte(0xd2);
1430     emit_byte(0xe0+d);
1431     }
1432     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1433    
1434     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1435     {
1436     if (optimize_shift_once && (i == 1)) {
1437     emit_byte(0xd0);
1438     emit_byte(0xc8+r);
1439     }
1440     else {
1441     emit_byte(0xc0);
1442     emit_byte(0xc8+r);
1443     emit_byte(i);
1444     }
1445     }
1446     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1447    
1448     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1449     {
1450     emit_byte(0x66);
1451     emit_byte(0xc1);
1452     emit_byte(0xc8+r);
1453     emit_byte(i);
1454     }
1455     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1456    
1457     // gb-- used for making an fpcr value in compemu_fpp.cpp
1458     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1459     {
1460     emit_byte(0x0b);
1461     emit_byte(0x05+8*d);
1462     emit_long(s);
1463     }
1464     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1465    
1466     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1467     {
1468     if (optimize_shift_once && (i == 1)) {
1469     emit_byte(0xd1);
1470     emit_byte(0xc8+r);
1471     }
1472     else {
1473     emit_byte(0xc1);
1474     emit_byte(0xc8+r);
1475     emit_byte(i);
1476     }
1477     }
1478     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1479    
1480     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1481     {
1482     emit_byte(0xd3);
1483     emit_byte(0xc8+d);
1484     }
1485     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1486    
1487     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1488     {
1489     emit_byte(0x66);
1490     emit_byte(0xd3);
1491     emit_byte(0xc8+d);
1492     }
1493     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1494    
1495     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1496     {
1497     emit_byte(0xd2);
1498     emit_byte(0xc8+d);
1499     }
1500     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1501    
1502     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1503     {
1504     emit_byte(0xd3);
1505     emit_byte(0xe8+d);
1506     }
1507     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1508    
1509     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1510     {
1511     emit_byte(0x66);
1512     emit_byte(0xd3);
1513     emit_byte(0xe8+d);
1514     }
1515     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1516    
1517     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1518     {
1519     emit_byte(0xd2);
1520     emit_byte(0xe8+d);
1521     }
1522     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1523    
1524     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1525     {
1526     emit_byte(0xd3);
1527     emit_byte(0xf8+d);
1528     }
1529     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1530    
1531     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1532     {
1533     emit_byte(0x66);
1534     emit_byte(0xd3);
1535     emit_byte(0xf8+d);
1536     }
1537     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1538    
1539     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1540     {
1541     emit_byte(0xd2);
1542     emit_byte(0xf8+d);
1543     }
1544     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1545    
1546     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1547     {
1548     if (optimize_shift_once && (i == 1)) {
1549     emit_byte(0xd1);
1550     emit_byte(0xe0+r);
1551     }
1552     else {
1553     emit_byte(0xc1);
1554     emit_byte(0xe0+r);
1555     emit_byte(i);
1556     }
1557     }
1558     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1559    
1560     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1561     {
1562     emit_byte(0x66);
1563     emit_byte(0xc1);
1564     emit_byte(0xe0+r);
1565     emit_byte(i);
1566     }
1567     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1568    
1569     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1570     {
1571     if (optimize_shift_once && (i == 1)) {
1572     emit_byte(0xd0);
1573     emit_byte(0xe0+r);
1574     }
1575     else {
1576     emit_byte(0xc0);
1577     emit_byte(0xe0+r);
1578     emit_byte(i);
1579     }
1580     }
1581     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1582    
1583     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1584     {
1585     if (optimize_shift_once && (i == 1)) {
1586     emit_byte(0xd1);
1587     emit_byte(0xe8+r);
1588     }
1589     else {
1590     emit_byte(0xc1);
1591     emit_byte(0xe8+r);
1592     emit_byte(i);
1593     }
1594     }
1595     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1596    
1597     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1598     {
1599     emit_byte(0x66);
1600     emit_byte(0xc1);
1601     emit_byte(0xe8+r);
1602     emit_byte(i);
1603     }
1604     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1605    
1606     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1607     {
1608     if (optimize_shift_once && (i == 1)) {
1609     emit_byte(0xd0);
1610     emit_byte(0xe8+r);
1611     }
1612     else {
1613     emit_byte(0xc0);
1614     emit_byte(0xe8+r);
1615     emit_byte(i);
1616     }
1617     }
1618     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1619    
1620     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1621     {
1622     if (optimize_shift_once && (i == 1)) {
1623     emit_byte(0xd1);
1624     emit_byte(0xf8+r);
1625     }
1626     else {
1627     emit_byte(0xc1);
1628     emit_byte(0xf8+r);
1629     emit_byte(i);
1630     }
1631     }
1632     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1633    
1634     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1635     {
1636     emit_byte(0x66);
1637     emit_byte(0xc1);
1638     emit_byte(0xf8+r);
1639     emit_byte(i);
1640     }
1641     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1642    
1643     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1644     {
1645     if (optimize_shift_once && (i == 1)) {
1646     emit_byte(0xd0);
1647     emit_byte(0xf8+r);
1648     }
1649     else {
1650     emit_byte(0xc0);
1651     emit_byte(0xf8+r);
1652     emit_byte(i);
1653     }
1654     }
1655     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1656    
1657     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1658     {
1659     emit_byte(0x9e);
1660     }
1661     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1662    
1663     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1664     {
1665     emit_byte(0x0f);
1666     emit_byte(0xa2);
1667     }
1668     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1669    
1670     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1671     {
1672     emit_byte(0x9f);
1673     }
1674     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1675    
1676     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1677     {
1678     emit_byte(0x0f);
1679     emit_byte(0x90+cc);
1680     emit_byte(0xc0+d);
1681     }
1682     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1683    
1684     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1685     {
1686     emit_byte(0x0f);
1687     emit_byte(0x90+cc);
1688     emit_byte(0x05);
1689     emit_long(d);
1690     }
1691     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1692    
1693     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1694     {
1695     if (have_cmov) {
1696     emit_byte(0x0f);
1697     emit_byte(0x40+cc);
1698     emit_byte(0xc0+8*d+s);
1699     }
1700     else { /* replacement using branch and mov */
1701     int uncc=(cc^1);
1702     emit_byte(0x70+uncc);
1703     emit_byte(2); /* skip next 2 bytes if not cc=true */
1704     emit_byte(0x89);
1705     emit_byte(0xc0+8*s+d);
1706     }
1707     }
1708     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1709    
1710     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1711     {
1712     emit_byte(0x0f);
1713     emit_byte(0xbc);
1714     emit_byte(0xc0+8*d+s);
1715     }
1716     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1717    
1718     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1719     {
1720     emit_byte(0x0f);
1721     emit_byte(0xbf);
1722     emit_byte(0xc0+8*d+s);
1723     }
1724     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1725    
1726     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1727     {
1728     emit_byte(0x0f);
1729     emit_byte(0xbe);
1730     emit_byte(0xc0+8*d+s);
1731     }
1732     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1733    
1734     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1735     {
1736     emit_byte(0x0f);
1737     emit_byte(0xb7);
1738     emit_byte(0xc0+8*d+s);
1739     }
1740     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1741    
1742     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1743     {
1744     emit_byte(0x0f);
1745     emit_byte(0xb6);
1746     emit_byte(0xc0+8*d+s);
1747     }
1748     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1749    
1750     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1751     {
1752     emit_byte(0x0f);
1753     emit_byte(0xaf);
1754     emit_byte(0xc0+8*d+s);
1755     }
1756     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1757    
1758     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1759     {
1760     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1761     abort();
1762     emit_byte(0xf7);
1763     emit_byte(0xea);
1764     }
1765     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1766    
1767     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1768     {
1769     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1770     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1771     abort();
1772     }
1773     emit_byte(0xf7);
1774     emit_byte(0xe2);
1775     }
1776     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1777    
1778     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1779     {
1780     abort(); /* %^$&%^$%#^ x86! */
1781     emit_byte(0x0f);
1782     emit_byte(0xaf);
1783     emit_byte(0xc0+8*d+s);
1784     }
1785     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1786    
1787     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1788     {
1789     emit_byte(0x88);
1790     emit_byte(0xc0+8*s+d);
1791     }
1792     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1793    
1794     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1795     {
1796     emit_byte(0x66);
1797     emit_byte(0x89);
1798     emit_byte(0xc0+8*s+d);
1799     }
1800     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1801    
1802     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1803     {
1804     int isebp=(baser==5)?0x40:0;
1805     int fi;
1806    
1807     switch(factor) {
1808     case 1: fi=0; break;
1809     case 2: fi=1; break;
1810     case 4: fi=2; break;
1811     case 8: fi=3; break;
1812     default: abort();
1813     }
1814    
1815    
1816     emit_byte(0x8b);
1817     emit_byte(0x04+8*d+isebp);
1818     emit_byte(baser+8*index+0x40*fi);
1819     if (isebp)
1820     emit_byte(0x00);
1821     }
1822     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1823    
1824     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1825     {
1826     int fi;
1827     int isebp;
1828    
1829     switch(factor) {
1830     case 1: fi=0; break;
1831     case 2: fi=1; break;
1832     case 4: fi=2; break;
1833     case 8: fi=3; break;
1834     default: abort();
1835     }
1836     isebp=(baser==5)?0x40:0;
1837    
1838     emit_byte(0x66);
1839     emit_byte(0x8b);
1840     emit_byte(0x04+8*d+isebp);
1841     emit_byte(baser+8*index+0x40*fi);
1842     if (isebp)
1843     emit_byte(0x00);
1844     }
1845     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1846    
1847     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1848     {
1849     int fi;
1850     int isebp;
1851    
1852     switch(factor) {
1853     case 1: fi=0; break;
1854     case 2: fi=1; break;
1855     case 4: fi=2; break;
1856     case 8: fi=3; break;
1857     default: abort();
1858     }
1859     isebp=(baser==5)?0x40:0;
1860    
1861     emit_byte(0x8a);
1862     emit_byte(0x04+8*d+isebp);
1863     emit_byte(baser+8*index+0x40*fi);
1864     if (isebp)
1865     emit_byte(0x00);
1866     }
1867     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1868    
1869     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1870     {
1871     int fi;
1872     int isebp;
1873    
1874     switch(factor) {
1875     case 1: fi=0; break;
1876     case 2: fi=1; break;
1877     case 4: fi=2; break;
1878     case 8: fi=3; break;
1879     default: abort();
1880     }
1881    
1882    
1883     isebp=(baser==5)?0x40:0;
1884    
1885     emit_byte(0x89);
1886     emit_byte(0x04+8*s+isebp);
1887     emit_byte(baser+8*index+0x40*fi);
1888     if (isebp)
1889     emit_byte(0x00);
1890     }
1891     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1892    
1893     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1894     {
1895     int fi;
1896     int isebp;
1897    
1898     switch(factor) {
1899     case 1: fi=0; break;
1900     case 2: fi=1; break;
1901     case 4: fi=2; break;
1902     case 8: fi=3; break;
1903     default: abort();
1904     }
1905     isebp=(baser==5)?0x40:0;
1906    
1907     emit_byte(0x66);
1908     emit_byte(0x89);
1909     emit_byte(0x04+8*s+isebp);
1910     emit_byte(baser+8*index+0x40*fi);
1911     if (isebp)
1912     emit_byte(0x00);
1913     }
1914     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1915    
1916     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1917     {
1918     int fi;
1919     int isebp;
1920    
1921     switch(factor) {
1922     case 1: fi=0; break;
1923     case 2: fi=1; break;
1924     case 4: fi=2; break;
1925     case 8: fi=3; break;
1926     default: abort();
1927     }
1928     isebp=(baser==5)?0x40:0;
1929    
1930     emit_byte(0x88);
1931     emit_byte(0x04+8*s+isebp);
1932     emit_byte(baser+8*index+0x40*fi);
1933     if (isebp)
1934     emit_byte(0x00);
1935     }
1936     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1937    
1938     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1939     {
1940     int fi;
1941    
1942     switch(factor) {
1943     case 1: fi=0; break;
1944     case 2: fi=1; break;
1945     case 4: fi=2; break;
1946     case 8: fi=3; break;
1947     default: abort();
1948     }
1949    
1950     emit_byte(0x89);
1951     emit_byte(0x84+8*s);
1952     emit_byte(baser+8*index+0x40*fi);
1953     emit_long(base);
1954     }
1955     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1956    
1957     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1958     {
1959     int fi;
1960    
1961     switch(factor) {
1962     case 1: fi=0; break;
1963     case 2: fi=1; break;
1964     case 4: fi=2; break;
1965     case 8: fi=3; break;
1966     default: abort();
1967     }
1968    
1969     emit_byte(0x66);
1970     emit_byte(0x89);
1971     emit_byte(0x84+8*s);
1972     emit_byte(baser+8*index+0x40*fi);
1973     emit_long(base);
1974     }
1975     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1976    
1977     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1978     {
1979     int fi;
1980    
1981     switch(factor) {
1982     case 1: fi=0; break;
1983     case 2: fi=1; break;
1984     case 4: fi=2; break;
1985     case 8: fi=3; break;
1986     default: abort();
1987     }
1988    
1989     emit_byte(0x88);
1990     emit_byte(0x84+8*s);
1991     emit_byte(baser+8*index+0x40*fi);
1992     emit_long(base);
1993     }
1994     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1995    
1996     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
1997     {
1998     int fi;
1999    
2000     switch(factor) {
2001     case 1: fi=0; break;
2002     case 2: fi=1; break;
2003     case 4: fi=2; break;
2004     case 8: fi=3; break;
2005     default: abort();
2006     }
2007    
2008     emit_byte(0x8b);
2009     emit_byte(0x84+8*d);
2010     emit_byte(baser+8*index+0x40*fi);
2011     emit_long(base);
2012     }
2013     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2014    
2015     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2016     {
2017     int fi;
2018    
2019     switch(factor) {
2020     case 1: fi=0; break;
2021     case 2: fi=1; break;
2022     case 4: fi=2; break;
2023     case 8: fi=3; break;
2024     default: abort();
2025     }
2026    
2027     emit_byte(0x66);
2028     emit_byte(0x8b);
2029     emit_byte(0x84+8*d);
2030     emit_byte(baser+8*index+0x40*fi);
2031     emit_long(base);
2032     }
2033     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2034    
2035     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2036     {
2037     int fi;
2038    
2039     switch(factor) {
2040     case 1: fi=0; break;
2041     case 2: fi=1; break;
2042     case 4: fi=2; break;
2043     case 8: fi=3; break;
2044     default: abort();
2045     }
2046    
2047     emit_byte(0x8a);
2048     emit_byte(0x84+8*d);
2049     emit_byte(baser+8*index+0x40*fi);
2050     emit_long(base);
2051     }
2052     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2053    
2054     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2055     {
2056     int fi;
2057     switch(factor) {
2058     case 1: fi=0; break;
2059     case 2: fi=1; break;
2060     case 4: fi=2; break;
2061     case 8: fi=3; break;
2062     default:
2063     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2064     abort();
2065     }
2066     emit_byte(0x8b);
2067     emit_byte(0x04+8*d);
2068     emit_byte(0x05+8*index+64*fi);
2069     emit_long(base);
2070     }
2071     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2072    
2073     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2074     {
2075     int fi;
2076     switch(factor) {
2077     case 1: fi=0; break;
2078     case 2: fi=1; break;
2079     case 4: fi=2; break;
2080     case 8: fi=3; break;
2081     default:
2082     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2083     abort();
2084     }
2085     if (have_cmov) {
2086     emit_byte(0x0f);
2087     emit_byte(0x40+cond);
2088     emit_byte(0x04+8*d);
2089     emit_byte(0x05+8*index+64*fi);
2090     emit_long(base);
2091     }
2092     else { /* replacement using branch and mov */
2093     int uncc=(cond^1);
2094     emit_byte(0x70+uncc);
2095     emit_byte(7); /* skip next 7 bytes if not cc=true */
2096     emit_byte(0x8b);
2097     emit_byte(0x04+8*d);
2098     emit_byte(0x05+8*index+64*fi);
2099     emit_long(base);
2100     }
2101     }
2102     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2103    
2104     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2105     {
2106     if (have_cmov) {
2107     emit_byte(0x0f);
2108     emit_byte(0x40+cond);
2109     emit_byte(0x05+8*d);
2110     emit_long(mem);
2111     }
2112     else { /* replacement using branch and mov */
2113     int uncc=(cond^1);
2114     emit_byte(0x70+uncc);
2115     emit_byte(6); /* skip next 6 bytes if not cc=true */
2116     emit_byte(0x8b);
2117     emit_byte(0x05+8*d);
2118     emit_long(mem);
2119     }
2120     }
2121     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2122    
2123     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2124     {
2125 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2126 gbeauche 1.1 emit_byte(0x8b);
2127     emit_byte(0x40+8*d+s);
2128     emit_byte(offset);
2129     }
2130     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2131    
2132     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2133     {
2134 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2135 gbeauche 1.1 emit_byte(0x66);
2136     emit_byte(0x8b);
2137     emit_byte(0x40+8*d+s);
2138     emit_byte(offset);
2139     }
2140     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2141    
2142     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2143     {
2144 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2145 gbeauche 1.1 emit_byte(0x8a);
2146     emit_byte(0x40+8*d+s);
2147     emit_byte(offset);
2148     }
2149     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2150    
2151     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2152     {
2153     emit_byte(0x8b);
2154     emit_byte(0x80+8*d+s);
2155     emit_long(offset);
2156     }
2157     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2158    
2159     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2160     {
2161     emit_byte(0x66);
2162     emit_byte(0x8b);
2163     emit_byte(0x80+8*d+s);
2164     emit_long(offset);
2165     }
2166     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2167    
2168     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2169     {
2170     emit_byte(0x8a);
2171     emit_byte(0x80+8*d+s);
2172     emit_long(offset);
2173     }
2174     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2175    
2176     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2177     {
2178 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2179 gbeauche 1.1 emit_byte(0xc7);
2180     emit_byte(0x40+d);
2181     emit_byte(offset);
2182     emit_long(i);
2183     }
2184     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2185    
2186     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2187     {
2188 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2189 gbeauche 1.1 emit_byte(0x66);
2190     emit_byte(0xc7);
2191     emit_byte(0x40+d);
2192     emit_byte(offset);
2193     emit_word(i);
2194     }
2195     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2196    
2197     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2198     {
2199 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2200 gbeauche 1.1 emit_byte(0xc6);
2201     emit_byte(0x40+d);
2202     emit_byte(offset);
2203     emit_byte(i);
2204     }
2205     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2206    
2207     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2208     {
2209 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2210 gbeauche 1.1 emit_byte(0x89);
2211     emit_byte(0x40+8*s+d);
2212     emit_byte(offset);
2213     }
2214     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2215    
2216     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2217     {
2218 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2219 gbeauche 1.1 emit_byte(0x66);
2220     emit_byte(0x89);
2221     emit_byte(0x40+8*s+d);
2222     emit_byte(offset);
2223     }
2224     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2225    
2226     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2227     {
2228 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2229 gbeauche 1.1 emit_byte(0x88);
2230     emit_byte(0x40+8*s+d);
2231     emit_byte(offset);
2232     }
2233     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2234    
2235     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2236     {
2237     if (optimize_imm8 && isbyte(offset)) {
2238     emit_byte(0x8d);
2239     emit_byte(0x40+8*d+s);
2240     emit_byte(offset);
2241     }
2242     else {
2243     emit_byte(0x8d);
2244     emit_byte(0x80+8*d+s);
2245     emit_long(offset);
2246     }
2247     }
2248     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2249    
2250     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2251     {
2252     int fi;
2253    
2254     switch(factor) {
2255     case 1: fi=0; break;
2256     case 2: fi=1; break;
2257     case 4: fi=2; break;
2258     case 8: fi=3; break;
2259     default: abort();
2260     }
2261    
2262     if (optimize_imm8 && isbyte(offset)) {
2263     emit_byte(0x8d);
2264     emit_byte(0x44+8*d);
2265     emit_byte(0x40*fi+8*index+s);
2266     emit_byte(offset);
2267     }
2268     else {
2269     emit_byte(0x8d);
2270     emit_byte(0x84+8*d);
2271     emit_byte(0x40*fi+8*index+s);
2272     emit_long(offset);
2273     }
2274     }
2275     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2276    
2277     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2278     {
2279     int isebp=(s==5)?0x40:0;
2280     int fi;
2281    
2282     switch(factor) {
2283     case 1: fi=0; break;
2284     case 2: fi=1; break;
2285     case 4: fi=2; break;
2286     case 8: fi=3; break;
2287     default: abort();
2288     }
2289    
2290     emit_byte(0x8d);
2291     emit_byte(0x04+8*d+isebp);
2292     emit_byte(0x40*fi+8*index+s);
2293     if (isebp)
2294     emit_byte(0);
2295     }
2296     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2297    
2298     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2299     {
2300     if (optimize_imm8 && isbyte(offset)) {
2301     emit_byte(0x89);
2302     emit_byte(0x40+8*s+d);
2303     emit_byte(offset);
2304     }
2305     else {
2306     emit_byte(0x89);
2307     emit_byte(0x80+8*s+d);
2308     emit_long(offset);
2309     }
2310     }
2311     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2312    
2313     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2314     {
2315     emit_byte(0x66);
2316     emit_byte(0x89);
2317     emit_byte(0x80+8*s+d);
2318     emit_long(offset);
2319     }
2320     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2321    
2322     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2323     {
2324     if (optimize_imm8 && isbyte(offset)) {
2325     emit_byte(0x88);
2326     emit_byte(0x40+8*s+d);
2327     emit_byte(offset);
2328     }
2329     else {
2330     emit_byte(0x88);
2331     emit_byte(0x80+8*s+d);
2332     emit_long(offset);
2333     }
2334     }
2335     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2336    
2337     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2338     {
2339     emit_byte(0x0f);
2340     emit_byte(0xc8+r);
2341     }
2342     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2343    
2344     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2345     {
2346     emit_byte(0x66);
2347     emit_byte(0xc1);
2348     emit_byte(0xc0+r);
2349     emit_byte(0x08);
2350     }
2351     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2352    
2353     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2354     {
2355     emit_byte(0x89);
2356     emit_byte(0xc0+8*s+d);
2357     }
2358     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2359    
2360     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2361     {
2362     emit_byte(0x89);
2363     emit_byte(0x05+8*s);
2364     emit_long(d);
2365     }
2366     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2367    
2368     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2369     {
2370     emit_byte(0x66);
2371     emit_byte(0x89);
2372     emit_byte(0x05+8*s);
2373     emit_long(d);
2374     }
2375     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2376    
2377     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2378     {
2379     emit_byte(0x66);
2380     emit_byte(0x8b);
2381     emit_byte(0x05+8*d);
2382     emit_long(s);
2383     }
2384     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2385    
2386     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2387     {
2388     emit_byte(0x88);
2389     emit_byte(0x05+8*s);
2390     emit_long(d);
2391     }
2392     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2393    
2394     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2395     {
2396     emit_byte(0x8a);
2397     emit_byte(0x05+8*d);
2398     emit_long(s);
2399     }
2400     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2401    
2402     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2403     {
2404     emit_byte(0xb8+d);
2405     emit_long(s);
2406     }
2407     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2408    
2409     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2410     {
2411     emit_byte(0x66);
2412     emit_byte(0xb8+d);
2413     emit_word(s);
2414     }
2415     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2416    
2417     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2418     {
2419     emit_byte(0xb0+d);
2420     emit_byte(s);
2421     }
2422     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2423    
2424     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2425     {
2426     emit_byte(0x81);
2427     emit_byte(0x15);
2428     emit_long(d);
2429     emit_long(s);
2430     }
2431     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2432    
2433     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2434     {
2435     if (optimize_imm8 && isbyte(s)) {
2436     emit_byte(0x83);
2437     emit_byte(0x05);
2438     emit_long(d);
2439     emit_byte(s);
2440     }
2441     else {
2442     emit_byte(0x81);
2443     emit_byte(0x05);
2444     emit_long(d);
2445     emit_long(s);
2446     }
2447     }
2448     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2449    
2450     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2451     {
2452     emit_byte(0x66);
2453     emit_byte(0x81);
2454     emit_byte(0x05);
2455     emit_long(d);
2456     emit_word(s);
2457     }
2458     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2459    
2460     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2461     {
2462     emit_byte(0x80);
2463     emit_byte(0x05);
2464     emit_long(d);
2465     emit_byte(s);
2466     }
2467     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2468    
2469     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2470     {
2471 gbeauche 1.2 if (optimize_accum && isaccum(d))
2472     emit_byte(0xa9);
2473     else {
2474 gbeauche 1.1 emit_byte(0xf7);
2475     emit_byte(0xc0+d);
2476 gbeauche 1.2 }
2477 gbeauche 1.1 emit_long(i);
2478     }
2479     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2480    
2481     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2482     {
2483     emit_byte(0x85);
2484     emit_byte(0xc0+8*s+d);
2485     }
2486     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2487    
2488     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2489     {
2490     emit_byte(0x66);
2491     emit_byte(0x85);
2492     emit_byte(0xc0+8*s+d);
2493     }
2494     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2495    
2496     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2497     {
2498     emit_byte(0x84);
2499     emit_byte(0xc0+8*s+d);
2500     }
2501     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2502    
2503     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2504     {
2505     if (optimize_imm8 && isbyte(i)) {
2506 gbeauche 1.2 emit_byte(0x83);
2507     emit_byte(0xe0+d);
2508     emit_byte(i);
2509 gbeauche 1.1 }
2510     else {
2511 gbeauche 1.2 if (optimize_accum && isaccum(d))
2512     emit_byte(0x25);
2513     else {
2514     emit_byte(0x81);
2515     emit_byte(0xe0+d);
2516     }
2517     emit_long(i);
2518 gbeauche 1.1 }
2519     }
2520     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2521    
2522     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2523     {
2524 gbeauche 1.2 emit_byte(0x66);
2525     if (optimize_imm8 && isbyte(i)) {
2526     emit_byte(0x83);
2527     emit_byte(0xe0+d);
2528     emit_byte(i);
2529     }
2530     else {
2531     if (optimize_accum && isaccum(d))
2532     emit_byte(0x25);
2533     else {
2534     emit_byte(0x81);
2535     emit_byte(0xe0+d);
2536     }
2537     emit_word(i);
2538     }
2539 gbeauche 1.1 }
2540     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2541    
2542     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2543     {
2544     emit_byte(0x21);
2545     emit_byte(0xc0+8*s+d);
2546     }
2547     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2548    
2549     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2550     {
2551     emit_byte(0x66);
2552     emit_byte(0x21);
2553     emit_byte(0xc0+8*s+d);
2554     }
2555     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2556    
2557     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2558     {
2559     emit_byte(0x20);
2560     emit_byte(0xc0+8*s+d);
2561     }
2562     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2563    
2564     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2565     {
2566     if (optimize_imm8 && isbyte(i)) {
2567     emit_byte(0x83);
2568     emit_byte(0xc8+d);
2569     emit_byte(i);
2570     }
2571     else {
2572 gbeauche 1.2 if (optimize_accum && isaccum(d))
2573     emit_byte(0x0d);
2574     else {
2575 gbeauche 1.1 emit_byte(0x81);
2576     emit_byte(0xc8+d);
2577 gbeauche 1.2 }
2578 gbeauche 1.1 emit_long(i);
2579     }
2580     }
2581     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2582    
2583     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2584     {
2585     emit_byte(0x09);
2586     emit_byte(0xc0+8*s+d);
2587     }
2588     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2589    
2590     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2591     {
2592     emit_byte(0x66);
2593     emit_byte(0x09);
2594     emit_byte(0xc0+8*s+d);
2595     }
2596     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2597    
2598     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2599     {
2600     emit_byte(0x08);
2601     emit_byte(0xc0+8*s+d);
2602     }
2603     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2604    
2605     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2606     {
2607     emit_byte(0x11);
2608     emit_byte(0xc0+8*s+d);
2609     }
2610     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2611    
2612     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2613     {
2614     emit_byte(0x66);
2615     emit_byte(0x11);
2616     emit_byte(0xc0+8*s+d);
2617     }
2618     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2619    
2620     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2621     {
2622     emit_byte(0x10);
2623     emit_byte(0xc0+8*s+d);
2624     }
2625     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2626    
2627     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2628     {
2629     emit_byte(0x01);
2630     emit_byte(0xc0+8*s+d);
2631     }
2632     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2633    
2634     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2635     {
2636     emit_byte(0x66);
2637     emit_byte(0x01);
2638     emit_byte(0xc0+8*s+d);
2639     }
2640     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2641    
2642     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2643     {
2644     emit_byte(0x00);
2645     emit_byte(0xc0+8*s+d);
2646     }
2647     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2648    
2649     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2650     {
2651     if (isbyte(i)) {
2652     emit_byte(0x83);
2653     emit_byte(0xe8+d);
2654     emit_byte(i);
2655     }
2656     else {
2657 gbeauche 1.2 if (optimize_accum && isaccum(d))
2658     emit_byte(0x2d);
2659     else {
2660 gbeauche 1.1 emit_byte(0x81);
2661     emit_byte(0xe8+d);
2662 gbeauche 1.2 }
2663 gbeauche 1.1 emit_long(i);
2664     }
2665     }
2666     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2667    
2668     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2669     {
2670 gbeauche 1.2 if (optimize_accum && isaccum(d))
2671     emit_byte(0x2c);
2672     else {
2673 gbeauche 1.1 emit_byte(0x80);
2674     emit_byte(0xe8+d);
2675 gbeauche 1.2 }
2676 gbeauche 1.1 emit_byte(i);
2677     }
2678     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2679    
2680     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2681     {
2682     if (isbyte(i)) {
2683     emit_byte(0x83);
2684     emit_byte(0xc0+d);
2685     emit_byte(i);
2686     }
2687     else {
2688 gbeauche 1.2 if (optimize_accum && isaccum(d))
2689     emit_byte(0x05);
2690     else {
2691 gbeauche 1.1 emit_byte(0x81);
2692     emit_byte(0xc0+d);
2693 gbeauche 1.2 }
2694 gbeauche 1.1 emit_long(i);
2695     }
2696     }
2697     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2698    
2699     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2700     {
2701 gbeauche 1.2 emit_byte(0x66);
2702 gbeauche 1.1 if (isbyte(i)) {
2703     emit_byte(0x83);
2704     emit_byte(0xc0+d);
2705     emit_byte(i);
2706     }
2707     else {
2708 gbeauche 1.2 if (optimize_accum && isaccum(d))
2709     emit_byte(0x05);
2710     else {
2711 gbeauche 1.1 emit_byte(0x81);
2712     emit_byte(0xc0+d);
2713 gbeauche 1.2 }
2714 gbeauche 1.1 emit_word(i);
2715     }
2716     }
2717     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2718    
2719     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2720     {
2721 gbeauche 1.2 if (optimize_accum && isaccum(d))
2722     emit_byte(0x04);
2723     else {
2724     emit_byte(0x80);
2725     emit_byte(0xc0+d);
2726     }
2727 gbeauche 1.1 emit_byte(i);
2728     }
2729     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2730    
2731     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2732     {
2733     emit_byte(0x19);
2734     emit_byte(0xc0+8*s+d);
2735     }
2736     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2737    
2738     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2739     {
2740     emit_byte(0x66);
2741     emit_byte(0x19);
2742     emit_byte(0xc0+8*s+d);
2743     }
2744     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2745    
2746     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2747     {
2748     emit_byte(0x18);
2749     emit_byte(0xc0+8*s+d);
2750     }
2751     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2752    
2753     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2754     {
2755     emit_byte(0x29);
2756     emit_byte(0xc0+8*s+d);
2757     }
2758     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2759    
2760     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2761     {
2762     emit_byte(0x66);
2763     emit_byte(0x29);
2764     emit_byte(0xc0+8*s+d);
2765     }
2766     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2767    
2768     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2769     {
2770     emit_byte(0x28);
2771     emit_byte(0xc0+8*s+d);
2772     }
2773     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2774    
2775     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2776     {
2777     emit_byte(0x39);
2778     emit_byte(0xc0+8*s+d);
2779     }
2780     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2781    
2782     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2783     {
2784     if (optimize_imm8 && isbyte(i)) {
2785     emit_byte(0x83);
2786     emit_byte(0xf8+r);
2787     emit_byte(i);
2788     }
2789     else {
2790 gbeauche 1.2 if (optimize_accum && isaccum(r))
2791     emit_byte(0x3d);
2792     else {
2793 gbeauche 1.1 emit_byte(0x81);
2794     emit_byte(0xf8+r);
2795 gbeauche 1.2 }
2796 gbeauche 1.1 emit_long(i);
2797     }
2798     }
2799     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2800    
2801     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2802     {
2803     emit_byte(0x66);
2804     emit_byte(0x39);
2805     emit_byte(0xc0+8*s+d);
2806     }
2807     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2808    
2809 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2810     {
2811     emit_byte(0x80);
2812     emit_byte(0x3d);
2813     emit_long(d);
2814     emit_byte(s);
2815     }
2816     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2817    
2818 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2819     {
2820 gbeauche 1.2 if (optimize_accum && isaccum(d))
2821     emit_byte(0x3c);
2822     else {
2823 gbeauche 1.1 emit_byte(0x80);
2824     emit_byte(0xf8+d);
2825 gbeauche 1.2 }
2826 gbeauche 1.1 emit_byte(i);
2827     }
2828     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2829    
2830     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2831     {
2832     emit_byte(0x38);
2833     emit_byte(0xc0+8*s+d);
2834     }
2835     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2836    
2837     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2838     {
2839     int fi;
2840    
2841     switch(factor) {
2842     case 1: fi=0; break;
2843     case 2: fi=1; break;
2844     case 4: fi=2; break;
2845     case 8: fi=3; break;
2846     default: abort();
2847     }
2848     emit_byte(0x39);
2849     emit_byte(0x04+8*d);
2850     emit_byte(5+8*index+0x40*fi);
2851     emit_long(offset);
2852     }
2853     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2854    
2855     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2856     {
2857     emit_byte(0x31);
2858     emit_byte(0xc0+8*s+d);
2859     }
2860     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2861    
2862     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2863     {
2864     emit_byte(0x66);
2865     emit_byte(0x31);
2866     emit_byte(0xc0+8*s+d);
2867     }
2868     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2869    
2870     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2871     {
2872     emit_byte(0x30);
2873     emit_byte(0xc0+8*s+d);
2874     }
2875     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2876    
2877     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2878     {
2879     if (optimize_imm8 && isbyte(s)) {
2880     emit_byte(0x83);
2881     emit_byte(0x2d);
2882     emit_long(d);
2883     emit_byte(s);
2884     }
2885     else {
2886     emit_byte(0x81);
2887     emit_byte(0x2d);
2888     emit_long(d);
2889     emit_long(s);
2890     }
2891     }
2892     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2893    
2894     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2895     {
2896     if (optimize_imm8 && isbyte(s)) {
2897     emit_byte(0x83);
2898     emit_byte(0x3d);
2899     emit_long(d);
2900     emit_byte(s);
2901     }
2902     else {
2903     emit_byte(0x81);
2904     emit_byte(0x3d);
2905     emit_long(d);
2906     emit_long(s);
2907     }
2908     }
2909     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2910    
2911     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2912     {
2913     emit_byte(0x87);
2914     emit_byte(0xc0+8*r1+r2);
2915     }
2916     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2917    
2918     /*************************************************************************
2919     * FIXME: mem access modes probably wrong *
2920     *************************************************************************/
2921    
2922     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2923     {
2924     emit_byte(0x9c);
2925     }
2926     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2927    
2928     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2929     {
2930     emit_byte(0x9d);
2931     }
2932     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2933 gbeauche 1.13
2934     #endif
2935 gbeauche 1.1
2936     /*************************************************************************
2937     * Unoptimizable stuff --- jump *
2938     *************************************************************************/
2939    
2940     static __inline__ void raw_call_r(R4 r)
2941     {
2942 gbeauche 1.20 #if USE_NEW_RTASM
2943     CALLsr(r);
2944     #else
2945 gbeauche 1.1 emit_byte(0xff);
2946     emit_byte(0xd0+r);
2947 gbeauche 1.20 #endif
2948 gbeauche 1.5 }
2949    
2950     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2951     {
2952 gbeauche 1.20 #if USE_NEW_RTASM
2953     CALLsm(base, X86_NOREG, r, m);
2954     #else
2955 gbeauche 1.5 int mu;
2956     switch(m) {
2957     case 1: mu=0; break;
2958     case 2: mu=1; break;
2959     case 4: mu=2; break;
2960     case 8: mu=3; break;
2961     default: abort();
2962     }
2963     emit_byte(0xff);
2964     emit_byte(0x14);
2965     emit_byte(0x05+8*r+0x40*mu);
2966     emit_long(base);
2967 gbeauche 1.20 #endif
2968 gbeauche 1.1 }
2969    
2970     static __inline__ void raw_jmp_r(R4 r)
2971     {
2972 gbeauche 1.20 #if USE_NEW_RTASM
2973     JMPsr(r);
2974     #else
2975 gbeauche 1.1 emit_byte(0xff);
2976     emit_byte(0xe0+r);
2977 gbeauche 1.20 #endif
2978 gbeauche 1.1 }
2979    
2980     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2981     {
2982 gbeauche 1.20 #if USE_NEW_RTASM
2983     JMPsm(base, X86_NOREG, r, m);
2984     #else
2985 gbeauche 1.1 int mu;
2986     switch(m) {
2987     case 1: mu=0; break;
2988     case 2: mu=1; break;
2989     case 4: mu=2; break;
2990     case 8: mu=3; break;
2991     default: abort();
2992     }
2993     emit_byte(0xff);
2994     emit_byte(0x24);
2995     emit_byte(0x05+8*r+0x40*mu);
2996     emit_long(base);
2997 gbeauche 1.20 #endif
2998 gbeauche 1.1 }
2999    
3000     static __inline__ void raw_jmp_m(uae_u32 base)
3001     {
3002     emit_byte(0xff);
3003     emit_byte(0x25);
3004     emit_long(base);
3005     }
3006    
3007    
3008     static __inline__ void raw_call(uae_u32 t)
3009     {
3010 gbeauche 1.20 #if USE_NEW_RTASM
3011     CALLm(t);
3012     #else
3013 gbeauche 1.1 emit_byte(0xe8);
3014     emit_long(t-(uae_u32)target-4);
3015 gbeauche 1.20 #endif
3016 gbeauche 1.1 }
3017    
3018     static __inline__ void raw_jmp(uae_u32 t)
3019     {
3020 gbeauche 1.20 #if USE_NEW_RTASM
3021     JMPm(t);
3022     #else
3023 gbeauche 1.1 emit_byte(0xe9);
3024     emit_long(t-(uae_u32)target-4);
3025 gbeauche 1.20 #endif
3026 gbeauche 1.1 }
3027    
3028     static __inline__ void raw_jl(uae_u32 t)
3029     {
3030     emit_byte(0x0f);
3031     emit_byte(0x8c);
3032 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3033 gbeauche 1.1 }
3034    
3035     static __inline__ void raw_jz(uae_u32 t)
3036     {
3037     emit_byte(0x0f);
3038     emit_byte(0x84);
3039 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3040 gbeauche 1.1 }
3041    
3042     static __inline__ void raw_jnz(uae_u32 t)
3043     {
3044     emit_byte(0x0f);
3045     emit_byte(0x85);
3046 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3047 gbeauche 1.1 }
3048    
3049     static __inline__ void raw_jnz_l_oponly(void)
3050     {
3051     emit_byte(0x0f);
3052     emit_byte(0x85);
3053     }
3054    
3055     static __inline__ void raw_jcc_l_oponly(int cc)
3056     {
3057     emit_byte(0x0f);
3058     emit_byte(0x80+cc);
3059     }
3060    
3061     static __inline__ void raw_jnz_b_oponly(void)
3062     {
3063     emit_byte(0x75);
3064     }
3065    
3066     static __inline__ void raw_jz_b_oponly(void)
3067     {
3068     emit_byte(0x74);
3069     }
3070    
3071     static __inline__ void raw_jcc_b_oponly(int cc)
3072     {
3073     emit_byte(0x70+cc);
3074     }
3075    
3076     static __inline__ void raw_jmp_l_oponly(void)
3077     {
3078     emit_byte(0xe9);
3079     }
3080    
3081     static __inline__ void raw_jmp_b_oponly(void)
3082     {
3083     emit_byte(0xeb);
3084     }
3085    
3086     static __inline__ void raw_ret(void)
3087     {
3088     emit_byte(0xc3);
3089     }
3090    
3091     static __inline__ void raw_nop(void)
3092     {
3093     emit_byte(0x90);
3094     }
3095    
3096 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3097     {
3098     /* Source: GNU Binutils 2.12.90.0.15 */
3099     /* Various efficient no-op patterns for aligning code labels.
3100     Note: Don't try to assemble the instructions in the comments.
3101     0L and 0w are not legal. */
3102     static const uae_u8 f32_1[] =
3103     {0x90}; /* nop */
3104     static const uae_u8 f32_2[] =
3105     {0x89,0xf6}; /* movl %esi,%esi */
3106     static const uae_u8 f32_3[] =
3107     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3108     static const uae_u8 f32_4[] =
3109     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3110     static const uae_u8 f32_5[] =
3111     {0x90, /* nop */
3112     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3113     static const uae_u8 f32_6[] =
3114     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3115     static const uae_u8 f32_7[] =
3116     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3117     static const uae_u8 f32_8[] =
3118     {0x90, /* nop */
3119     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3120     static const uae_u8 f32_9[] =
3121     {0x89,0xf6, /* movl %esi,%esi */
3122     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3123     static const uae_u8 f32_10[] =
3124     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3125     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3126     static const uae_u8 f32_11[] =
3127     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3128     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3129     static const uae_u8 f32_12[] =
3130     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3131     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3132     static const uae_u8 f32_13[] =
3133     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3134     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3135     static const uae_u8 f32_14[] =
3136     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3137     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3138     static const uae_u8 f32_15[] =
3139     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3140     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3141     static const uae_u8 f32_16[] =
3142     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3143     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3144     static const uae_u8 *const f32_patt[] = {
3145     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3146     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3147     };
3148 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3149 gbeauche 1.8
3150 gbeauche 1.21 #if defined(__x86_64__)
3151     /* The recommended way to pad 64bit code is to use NOPs preceded by
3152     maximally four 0x66 prefixes. Balance the size of nops. */
3153     if (nbytes == 0)
3154     return;
3155    
3156     int i;
3157     int nnops = (nbytes + 3) / 4;
3158     int len = nbytes / nnops;
3159     int remains = nbytes - nnops * len;
3160    
3161     for (i = 0; i < remains; i++) {
3162     emit_block(prefixes, len);
3163     raw_nop();
3164     }
3165     for (; i < nnops; i++) {
3166     emit_block(prefixes, len - 1);
3167     raw_nop();
3168     }
3169     #else
3170 gbeauche 1.8 int nloops = nbytes / 16;
3171     while (nloops-- > 0)
3172     emit_block(f32_16, sizeof(f32_16));
3173    
3174     nbytes %= 16;
3175     if (nbytes)
3176     emit_block(f32_patt[nbytes - 1], nbytes);
3177 gbeauche 1.21 #endif
3178 gbeauche 1.8 }
3179    
3180 gbeauche 1.1
3181     /*************************************************************************
3182     * Flag handling, to and fro UAE flag register *
3183     *************************************************************************/
3184    
3185     #ifdef SAHF_SETO_PROFITABLE
3186    
3187     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3188    
3189     static __inline__ void raw_flags_to_reg(int r)
3190     {
3191     raw_lahf(0); /* Most flags in AH */
3192     //raw_setcc(r,0); /* V flag in AL */
3193 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3194 gbeauche 1.1
3195     #if 1 /* Let's avoid those nasty partial register stalls */
3196 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3197     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3198 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3199     live.state[FLAGTMP].status=INMEM;
3200     live.state[FLAGTMP].realreg=-1;
3201     /* We just "evicted" FLAGTMP. */
3202     if (live.nat[r].nholds!=1) {
3203     /* Huh? */
3204     abort();
3205     }
3206     live.nat[r].nholds=0;
3207     #endif
3208     }
3209    
3210     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3211     static __inline__ void raw_reg_to_flags(int r)
3212     {
3213     raw_cmp_b_ri(r,-127); /* set V */
3214     raw_sahf(0);
3215     }
3216    
3217     #else
3218    
3219     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3220     static __inline__ void raw_flags_to_reg(int r)
3221     {
3222     raw_pushfl();
3223     raw_pop_l_r(r);
3224 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3225 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3226     live.state[FLAGTMP].status=INMEM;
3227     live.state[FLAGTMP].realreg=-1;
3228     /* We just "evicted" FLAGTMP. */
3229     if (live.nat[r].nholds!=1) {
3230     /* Huh? */
3231     abort();
3232     }
3233     live.nat[r].nholds=0;
3234     }
3235    
3236     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3237     static __inline__ void raw_reg_to_flags(int r)
3238     {
3239     raw_push_l_r(r);
3240     raw_popfl();
3241     }
3242    
3243     #endif
3244    
3245     /* Apparently, there are enough instructions between flag store and
3246     flag reload to avoid the partial memory stall */
3247     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3248     {
3249     #if 1
3250 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3251 gbeauche 1.1 #else
3252 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3253     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3254 gbeauche 1.1 #endif
3255     }
3256    
3257     /* FLAGX is byte sized, and we *do* write it at that size */
3258     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3259     {
3260     if (live.nat[target].canbyte)
3261 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3262 gbeauche 1.1 else if (live.nat[target].canword)
3263 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3264 gbeauche 1.1 else
3265 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3266 gbeauche 1.1 }
3267    
3268 gbeauche 1.11 #define NATIVE_FLAG_Z 0x40
3269     static __inline__ void raw_flags_set_zero(int f, int r, int t)
3270     {
3271     // FIXME: this is really suboptimal
3272     raw_pushfl();
3273     raw_pop_l_r(f);
3274     raw_and_l_ri(f,~NATIVE_FLAG_Z);
3275     raw_test_l_rr(r,r);
3276     raw_mov_l_ri(r,0);
3277     raw_mov_l_ri(t,NATIVE_FLAG_Z);
3278     raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
3279     raw_or_l(f,r);
3280     raw_push_l_r(f);
3281     raw_popfl();
3282     }
3283 gbeauche 1.1
3284     static __inline__ void raw_inc_sp(int off)
3285     {
3286 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
3287 gbeauche 1.1 }
3288    
3289     /*************************************************************************
3290     * Handling mistaken direct memory access *
3291     *************************************************************************/
3292    
3293     // gb-- I don't need that part for JIT Basilisk II
3294     #if defined(NATMEM_OFFSET) && 0
3295     #include <asm/sigcontext.h>
3296     #include <signal.h>
3297    
3298     #define SIG_READ 1
3299     #define SIG_WRITE 2
3300    
3301     static int in_handler=0;
3302     static uae_u8 veccode[256];
3303    
3304     static void vec(int x, struct sigcontext sc)
3305     {
3306     uae_u8* i=(uae_u8*)sc.eip;
3307     uae_u32 addr=sc.cr2;
3308     int r=-1;
3309     int size=4;
3310     int dir=-1;
3311     int len=0;
3312     int j;
3313    
3314     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3315     if (!canbang)
3316     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3317     if (in_handler)
3318     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3319    
3320     if (canbang && i>=compiled_code && i<=current_compile_p) {
3321     if (*i==0x66) {
3322     i++;
3323     size=2;
3324     len++;
3325     }
3326    
3327     switch(i[0]) {
3328     case 0x8a:
3329     if ((i[1]&0xc0)==0x80) {
3330     r=(i[1]>>3)&7;
3331     dir=SIG_READ;
3332     size=1;
3333     len+=6;
3334     break;
3335     }
3336     break;
3337     case 0x88:
3338     if ((i[1]&0xc0)==0x80) {
3339     r=(i[1]>>3)&7;
3340     dir=SIG_WRITE;
3341     size=1;
3342     len+=6;
3343     break;
3344     }
3345     break;
3346     case 0x8b:
3347     if ((i[1]&0xc0)==0x80) {
3348     r=(i[1]>>3)&7;
3349     dir=SIG_READ;
3350     len+=6;
3351     break;
3352     }
3353     if ((i[1]&0xc0)==0x40) {
3354     r=(i[1]>>3)&7;
3355     dir=SIG_READ;
3356     len+=3;
3357     break;
3358     }
3359     break;
3360     case 0x89:
3361     if ((i[1]&0xc0)==0x80) {
3362     r=(i[1]>>3)&7;
3363     dir=SIG_WRITE;
3364     len+=6;
3365     break;
3366     }
3367     if ((i[1]&0xc0)==0x40) {
3368     r=(i[1]>>3)&7;
3369     dir=SIG_WRITE;
3370     len+=3;
3371     break;
3372     }
3373     break;
3374     }
3375     }
3376    
3377     if (r!=-1) {
3378     void* pr=NULL;
3379     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3380    
3381     switch(r) {
3382     case 0: pr=&(sc.eax); break;
3383     case 1: pr=&(sc.ecx); break;
3384     case 2: pr=&(sc.edx); break;
3385     case 3: pr=&(sc.ebx); break;
3386     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3387     case 5: pr=(size>1)?
3388     (void*)(&(sc.ebp)):
3389     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3390     case 6: pr=(size>1)?
3391     (void*)(&(sc.esi)):
3392     (void*)(((uae_u8*)&(sc.edx))+1); break;
3393     case 7: pr=(size>1)?
3394     (void*)(&(sc.edi)):
3395     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3396     default: abort();
3397     }
3398     if (pr) {
3399     blockinfo* bi;
3400    
3401     if (currprefs.comp_oldsegv) {
3402     addr-=NATMEM_OFFSET;
3403    
3404     if ((addr>=0x10000000 && addr<0x40000000) ||
3405     (addr>=0x50000000)) {
3406     write_log("Suspicious address in %x SEGV handler.\n",addr);
3407     }
3408     if (dir==SIG_READ) {
3409     switch(size) {
3410     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3411     case 2: *((uae_u16*)pr)=get_word(addr); break;
3412     case 4: *((uae_u32*)pr)=get_long(addr); break;
3413     default: abort();
3414     }
3415     }
3416     else { /* write */
3417     switch(size) {
3418     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3419     case 2: put_word(addr,*((uae_u16*)pr)); break;
3420     case 4: put_long(addr,*((uae_u32*)pr)); break;
3421     default: abort();
3422     }
3423     }
3424     write_log("Handled one access!\n");
3425     fflush(stdout);
3426     segvcount++;
3427     sc.eip+=len;
3428     }
3429     else {
3430     void* tmp=target;
3431     int i;
3432     uae_u8 vecbuf[5];
3433    
3434     addr-=NATMEM_OFFSET;
3435    
3436     if ((addr>=0x10000000 && addr<0x40000000) ||
3437     (addr>=0x50000000)) {
3438     write_log("Suspicious address in %x SEGV handler.\n",addr);
3439     }
3440    
3441     target=(uae_u8*)sc.eip;
3442     for (i=0;i<5;i++)
3443     vecbuf[i]=target[i];
3444     emit_byte(0xe9);
3445 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3446 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3447    
3448     write_log("Handled one access!\n");
3449     fflush(stdout);
3450     segvcount++;
3451    
3452     target=veccode;
3453    
3454     if (dir==SIG_READ) {
3455     switch(size) {
3456     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3457     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3458     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3459     default: abort();
3460     }
3461     }
3462     else { /* write */
3463     switch(size) {
3464     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3465     case 2: put_word(addr,*((uae_u16*)pr)); break;
3466     case 4: put_long(addr,*((uae_u32*)pr)); break;
3467     default: abort();
3468     }
3469     }
3470     for (i=0;i<5;i++)
3471     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3472 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3473 gbeauche 1.1 emit_byte(0xe9);
3474 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3475 gbeauche 1.1 in_handler=1;
3476     target=tmp;
3477     }
3478     bi=active;
3479     while (bi) {
3480     if (bi->handler &&
3481     (uae_u8*)bi->direct_handler<=i &&
3482     (uae_u8*)bi->nexthandler>i) {
3483     write_log("deleted trigger (%p<%p<%p) %p\n",
3484     bi->handler,
3485     i,
3486     bi->nexthandler,
3487     bi->pc_p);
3488     invalidate_block(bi);
3489     raise_in_cl_list(bi);
3490     set_special(0);
3491     return;
3492     }
3493     bi=bi->next;
3494     }
3495     /* Not found in the active list. Might be a rom routine that
3496     is in the dormant list */
3497     bi=dormant;
3498     while (bi) {
3499     if (bi->handler &&
3500     (uae_u8*)bi->direct_handler<=i &&
3501     (uae_u8*)bi->nexthandler>i) {
3502     write_log("deleted trigger (%p<%p<%p) %p\n",
3503     bi->handler,
3504     i,
3505     bi->nexthandler,
3506     bi->pc_p);
3507     invalidate_block(bi);
3508     raise_in_cl_list(bi);
3509     set_special(0);
3510     return;
3511     }
3512     bi=bi->next;
3513     }
3514     write_log("Huh? Could not find trigger!\n");
3515     return;
3516     }
3517     }
3518     write_log("Can't handle access!\n");
3519     for (j=0;j<10;j++) {
3520     write_log("instruction byte %2d is %02x\n",j,i[j]);
3521     }
3522     write_log("Please send the above info (starting at \"fault address\") to\n"
3523     "bmeyer@csse.monash.edu.au\n"
3524     "This shouldn't happen ;-)\n");
3525     fflush(stdout);
3526     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3527     }
3528     #endif
3529    
3530    
3531     /*************************************************************************
3532     * Checking for CPU features *
3533     *************************************************************************/
3534    
3535 gbeauche 1.3 struct cpuinfo_x86 {
3536     uae_u8 x86; // CPU family
3537     uae_u8 x86_vendor; // CPU vendor
3538     uae_u8 x86_processor; // CPU canonical processor type
3539     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3540     uae_u32 x86_hwcap;
3541     uae_u8 x86_model;
3542     uae_u8 x86_mask;
3543     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3544     char x86_vendor_id[16];
3545     };
3546     struct cpuinfo_x86 cpuinfo;
3547    
3548     enum {
3549     X86_VENDOR_INTEL = 0,
3550     X86_VENDOR_CYRIX = 1,
3551     X86_VENDOR_AMD = 2,
3552     X86_VENDOR_UMC = 3,
3553     X86_VENDOR_NEXGEN = 4,
3554     X86_VENDOR_CENTAUR = 5,
3555     X86_VENDOR_RISE = 6,
3556     X86_VENDOR_TRANSMETA = 7,
3557     X86_VENDOR_NSC = 8,
3558     X86_VENDOR_UNKNOWN = 0xff
3559     };
3560    
3561     enum {
3562     X86_PROCESSOR_I386, /* 80386 */
3563     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3564     X86_PROCESSOR_PENTIUM,
3565     X86_PROCESSOR_PENTIUMPRO,
3566     X86_PROCESSOR_K6,
3567     X86_PROCESSOR_ATHLON,
3568     X86_PROCESSOR_PENTIUM4,
3569 gbeauche 1.16 X86_PROCESSOR_K8,
3570 gbeauche 1.3 X86_PROCESSOR_max
3571     };
3572    
3573     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3574     "80386",
3575     "80486",
3576     "Pentium",
3577     "PentiumPro",
3578     "K6",
3579     "Athlon",
3580 gbeauche 1.16 "Pentium4",
3581     "K8"
3582 gbeauche 1.3 };
3583    
3584     static struct ptt {
3585     const int align_loop;
3586     const int align_loop_max_skip;
3587     const int align_jump;
3588     const int align_jump_max_skip;
3589     const int align_func;
3590     }
3591     x86_alignments[X86_PROCESSOR_max] = {
3592     { 4, 3, 4, 3, 4 },
3593     { 16, 15, 16, 15, 16 },
3594     { 16, 7, 16, 7, 16 },
3595     { 16, 15, 16, 7, 16 },
3596     { 32, 7, 32, 7, 32 },
3597 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3598 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3599     { 16, 7, 16, 7, 16 }
3600 gbeauche 1.3 };
3601 gbeauche 1.1
3602 gbeauche 1.3 static void
3603     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3604 gbeauche 1.1 {
3605 gbeauche 1.3 char *v = c->x86_vendor_id;
3606    
3607     if (!strcmp(v, "GenuineIntel"))
3608     c->x86_vendor = X86_VENDOR_INTEL;
3609     else if (!strcmp(v, "AuthenticAMD"))
3610     c->x86_vendor = X86_VENDOR_AMD;
3611     else if (!strcmp(v, "CyrixInstead"))
3612     c->x86_vendor = X86_VENDOR_CYRIX;
3613     else if (!strcmp(v, "Geode by NSC"))
3614     c->x86_vendor = X86_VENDOR_NSC;
3615     else if (!strcmp(v, "UMC UMC UMC "))
3616     c->x86_vendor = X86_VENDOR_UMC;
3617     else if (!strcmp(v, "CentaurHauls"))
3618     c->x86_vendor = X86_VENDOR_CENTAUR;
3619     else if (!strcmp(v, "NexGenDriven"))
3620     c->x86_vendor = X86_VENDOR_NEXGEN;
3621     else if (!strcmp(v, "RiseRiseRise"))
3622     c->x86_vendor = X86_VENDOR_RISE;
3623     else if (!strcmp(v, "GenuineTMx86") ||
3624     !strcmp(v, "TransmetaCPU"))
3625     c->x86_vendor = X86_VENDOR_TRANSMETA;
3626     else
3627     c->x86_vendor = X86_VENDOR_UNKNOWN;
3628     }
3629 gbeauche 1.1
3630 gbeauche 1.3 static void
3631     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3632     {
3633     static uae_u8 cpuid_space[256];
3634 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3635 gbeauche 1.3 uae_u8* tmp=get_target();
3636 gbeauche 1.1
3637 gbeauche 1.20 s_op = op;
3638 gbeauche 1.3 set_target(cpuid_space);
3639     raw_push_l_r(0); /* eax */
3640     raw_push_l_r(1); /* ecx */
3641     raw_push_l_r(2); /* edx */
3642     raw_push_l_r(3); /* ebx */
3643 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3644 gbeauche 1.3 raw_cpuid(0);
3645 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3646     raw_mov_l_mr((uintptr)&s_ebx,3);
3647     raw_mov_l_mr((uintptr)&s_ecx,1);
3648     raw_mov_l_mr((uintptr)&s_edx,2);
3649 gbeauche 1.3 raw_pop_l_r(3);
3650     raw_pop_l_r(2);
3651     raw_pop_l_r(1);
3652     raw_pop_l_r(0);
3653     raw_ret();
3654     set_target(tmp);
3655 gbeauche 1.1
3656 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3657 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3658     if (ebx != NULL) *ebx = s_ebx;
3659     if (ecx != NULL) *ecx = s_ecx;
3660     if (edx != NULL) *edx = s_edx;
3661 gbeauche 1.1 }
3662    
3663 gbeauche 1.3 static void
3664     raw_init_cpu(void)
3665 gbeauche 1.1 {
3666 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3667    
3668     /* Defaults */
3669 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3670 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3671     c->cpuid_level = -1; /* CPUID not detected */
3672     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3673     c->x86_vendor_id[0] = '\0'; /* Unset */
3674     c->x86_hwcap = 0;
3675    
3676     /* Get vendor name */
3677     c->x86_vendor_id[12] = '\0';
3678     cpuid(0x00000000,
3679     (uae_u32 *)&c->cpuid_level,
3680     (uae_u32 *)&c->x86_vendor_id[0],
3681     (uae_u32 *)&c->x86_vendor_id[8],
3682     (uae_u32 *)&c->x86_vendor_id[4]);
3683     x86_get_cpu_vendor(c);
3684    
3685     /* Intel-defined flags: level 0x00000001 */
3686     c->x86_brand_id = 0;
3687     if ( c->cpuid_level >= 0x00000001 ) {
3688     uae_u32 tfms, brand_id;
3689     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3690     c->x86 = (tfms >> 8) & 15;
3691     c->x86_model = (tfms >> 4) & 15;
3692     c->x86_brand_id = brand_id & 0xff;
3693     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
3694     (c->x86 == 0xf)) {
3695     /* AMD Extended Family and Model Values */
3696     c->x86 += (tfms >> 20) & 0xff;
3697     c->x86_model += (tfms >> 12) & 0xf0;
3698     }
3699     c->x86_mask = tfms & 15;
3700     } else {
3701     /* Have CPUID level 0 only - unheard of */
3702     c->x86 = 4;
3703     }
3704    
3705 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3706     uae_u32 xlvl;
3707     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3708     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3709     if ( xlvl >= 0x80000001 ) {
3710     uae_u32 features;
3711     cpuid(0x80000001, NULL, NULL, NULL, &features);
3712     if (features & (1 << 29)) {
3713     /* Assume x86-64 if long mode is supported */
3714     c->x86_processor = X86_PROCESSOR_K8;
3715     }
3716     }
3717     }
3718    
3719 gbeauche 1.3 /* Canonicalize processor ID */
3720     switch (c->x86) {
3721     case 3:
3722     c->x86_processor = X86_PROCESSOR_I386;
3723     break;
3724     case 4:
3725     c->x86_processor = X86_PROCESSOR_I486;
3726     break;
3727     case 5:
3728     if (c->x86_vendor == X86_VENDOR_AMD)
3729     c->x86_processor = X86_PROCESSOR_K6;
3730     else
3731     c->x86_processor = X86_PROCESSOR_PENTIUM;
3732     break;
3733     case 6:
3734     if (c->x86_vendor == X86_VENDOR_AMD)
3735     c->x86_processor = X86_PROCESSOR_ATHLON;
3736     else
3737     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3738     break;
3739     case 15:
3740     if (c->x86_vendor == X86_VENDOR_INTEL) {
3741 gbeauche 1.16 /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3742 gbeauche 1.3 if (c->x86_brand_id >= 8)
3743     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3744     }
3745 gbeauche 1.16 if (c->x86_vendor == X86_VENDOR_AMD) {
3746     /* Assume an Athlon processor if family == 15 and it was not
3747     detected as an x86-64 so far */
3748     if (c->x86_processor == X86_PROCESSOR_max)
3749     c->x86_processor = X86_PROCESSOR_ATHLON;
3750     }
3751 gbeauche 1.3 break;
3752     }
3753     if (c->x86_processor == X86_PROCESSOR_max) {
3754     fprintf(stderr, "Error: unknown processor type\n");
3755     fprintf(stderr, " Family : %d\n", c->x86);
3756     fprintf(stderr, " Model : %d\n", c->x86_model);
3757     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3758 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3759 gbeauche 1.3 if (c->x86_brand_id)
3760     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3761     abort();
3762     }
3763    
3764     /* Have CMOV support? */
3765 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3766 gbeauche 1.3
3767     /* Can the host CPU suffer from partial register stalls? */
3768     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3769     #if 1
3770     /* It appears that partial register writes are a bad idea even on
3771 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3772     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3773 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3774     have_rat_stall = true;
3775 gbeauche 1.1 #endif
3776 gbeauche 1.3
3777     /* Alignments */
3778     if (tune_alignment) {
3779     align_loops = x86_alignments[c->x86_processor].align_loop;
3780     align_jumps = x86_alignments[c->x86_processor].align_jump;
3781     }
3782    
3783     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3784     c->cpuid_level, c->x86_vendor_id,
3785     x86_processor_string_table[c->x86_processor]);
3786 gbeauche 1.1 }
3787    
3788 gbeauche 1.10 static bool target_check_bsf(void)
3789     {
3790     bool mismatch = false;
3791     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3792     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3793     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3794     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3795     for (int value = -1; value <= 1; value++) {
3796     int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3797     int tmp = value;
3798     __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3799 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3800 gbeauche 1.10 int OF = (flags >> 11) & 1;
3801     int SF = (flags >> 7) & 1;
3802     int ZF = (flags >> 6) & 1;
3803     int CF = flags & 1;
3804     tmp = (value == 0);
3805     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3806     mismatch = true;
3807     }
3808     }}}}
3809     if (mismatch)
3810     write_log("Target CPU defines all flags on BSF instruction\n");
3811     return !mismatch;
3812     }
3813    
3814 gbeauche 1.1
3815     /*************************************************************************
3816     * FPU stuff *
3817     *************************************************************************/
3818    
3819    
3820     static __inline__ void raw_fp_init(void)
3821     {
3822     int i;
3823    
3824     for (i=0;i<N_FREGS;i++)
3825     live.spos[i]=-2;
3826     live.tos=-1; /* Stack is empty */
3827     }
3828    
3829     static __inline__ void raw_fp_cleanup_drop(void)
3830     {
3831     #if 0
3832     /* using FINIT instead of popping all the entries.
3833     Seems to have side effects --- there is display corruption in
3834     Quake when this is used */
3835     if (live.tos>1) {
3836     emit_byte(0x9b);
3837     emit_byte(0xdb);
3838     emit_byte(0xe3);
3839     live.tos=-1;
3840     }
3841     #endif
3842     while (live.tos>=1) {
3843     emit_byte(0xde);
3844     emit_byte(0xd9);
3845     live.tos-=2;
3846     }
3847     while (live.tos>=0) {
3848     emit_byte(0xdd);
3849     emit_byte(0xd8);
3850     live.tos--;
3851     }
3852     raw_fp_init();
3853     }
3854    
3855     static __inline__ void make_tos(int r)
3856     {
3857     int p,q;
3858    
3859     if (live.spos[r]<0) { /* Register not yet on stack */
3860     emit_byte(0xd9);
3861     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3862     live.tos++;
3863     live.spos[r]=live.tos;
3864     live.onstack[live.tos]=r;
3865     return;
3866     }
3867     /* Register is on stack */
3868     if (live.tos==live.spos[r])
3869     return;
3870     p=live.spos[r];
3871     q=live.onstack[live.tos];
3872    
3873     emit_byte(0xd9);
3874     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3875     live.onstack[live.tos]=r;
3876     live.spos[r]=live.tos;
3877     live.onstack[p]=q;
3878     live.spos[q]=p;
3879     }
3880    
3881     static __inline__ void make_tos2(int r, int r2)
3882     {
3883     int q;
3884    
3885     make_tos(r2); /* Put the reg that's supposed to end up in position2
3886     on top */
3887    
3888     if (live.spos[r]<0) { /* Register not yet on stack */
3889     make_tos(r); /* This will extend the stack */
3890     return;
3891     }
3892     /* Register is on stack */
3893     emit_byte(0xd9);
3894     emit_byte(0xc9); /* Move r2 into position 2 */
3895    
3896     q=live.onstack[live.tos-1];
3897     live.onstack[live.tos]=q;
3898     live.spos[q]=live.tos;
3899     live.onstack[live.tos-1]=r2;
3900     live.spos[r2]=live.tos-1;
3901    
3902     make_tos(r); /* And r into 1 */
3903     }
3904    
3905     static __inline__ int stackpos(int r)
3906     {
3907     if (live.spos[r]<0)
3908     abort();
3909     if (live.tos<live.spos[r]) {
3910     printf("Looking for spos for fnreg %d\n",r);
3911     abort();
3912     }
3913     return live.tos-live.spos[r];
3914     }
3915    
3916     static __inline__ void usereg(int r)
3917     {
3918     if (live.spos[r]<0)
3919     make_tos(r);
3920     }
3921    
3922     /* This is called with one FP value in a reg *above* tos, which it will
3923     pop off the stack if necessary */
3924     static __inline__ void tos_make(int r)
3925     {
3926     if (live.spos[r]<0) {
3927     live.tos++;
3928     live.spos[r]=live.tos;
3929     live.onstack[live.tos]=r;
3930     return;
3931     }
3932     emit_byte(0xdd);
3933     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3934     and pop it*/
3935     }
3936 gbeauche 1.23
3937     /* FP helper functions */
3938     #if USE_NEW_RTASM
3939     #define DEFINE_OP(NAME, GEN) \
3940     static inline void raw_##NAME(uint32 m) \
3941     { \
3942     GEN(m, X86_NOREG, X86_NOREG, 1); \
3943     }
3944     DEFINE_OP(fstl, FSTLm);
3945     DEFINE_OP(fstpl, FSTPLm);
3946     DEFINE_OP(fldl, FLDLm);
3947     DEFINE_OP(fildl, FILDLm);
3948     DEFINE_OP(fistl, FISTLm);
3949     DEFINE_OP(flds, FLDSm);
3950     DEFINE_OP(fsts, FSTSm);
3951     DEFINE_OP(fstpt, FSTPTm);
3952     DEFINE_OP(fldt, FLDTm);
3953     #else
3954     #define DEFINE_OP(NAME, OP1, OP2) \
3955     static inline void raw_##NAME(uint32 m) \
3956     { \
3957     emit_byte(OP1); \
3958     emit_byte(OP2); \
3959     emit_long(m); \
3960     }
3961     DEFINE_OP(fstl, 0xdd, 0x15);
3962     DEFINE_OP(fstpl, 0xdd, 0x1d);
3963     DEFINE_OP(fldl, 0xdd, 0x05);
3964     DEFINE_OP(fildl, 0xdb, 0x05);
3965     DEFINE_OP(fistl, 0xdb, 0x15);
3966     DEFINE_OP(flds, 0xd9, 0x05);
3967     DEFINE_OP(fsts, 0xd9, 0x15);
3968     DEFINE_OP(fstpt, 0xdb, 0x3d);
3969     DEFINE_OP(fldt, 0xdb, 0x2d);
3970     #endif
3971     #undef DEFINE_OP
3972    
3973 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3974     {
3975     make_tos(r);
3976 gbeauche 1.23 raw_fstl(m);
3977 gbeauche 1.1 }
3978     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3979    
3980     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
3981     {
3982     make_tos(r);
3983 gbeauche 1.23 raw_fstpl(m);
3984 gbeauche 1.1 live.onstack[live.tos]=-1;
3985     live.tos--;
3986     live.spos[r]=-2;
3987     }
3988     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3989    
3990     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3991     {
3992 gbeauche 1.23 raw_fldl(m);
3993 gbeauche 1.1 tos_make(r);
3994     }
3995     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3996    
3997     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3998     {
3999 gbeauche 1.23 raw_fildl(m);
4000 gbeauche 1.1 tos_make(r);
4001     }
4002     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4003    
4004     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4005     {
4006     make_tos(r);
4007 gbeauche 1.23 raw_fistl(m);
4008 gbeauche 1.1 }
4009     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4010    
4011     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4012     {
4013 gbeauche 1.23 raw_flds(m);
4014 gbeauche 1.1 tos_make(r);
4015     }
4016     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4017    
4018     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4019     {
4020     make_tos(r);
4021 gbeauche 1.23 raw_fsts(m);
4022 gbeauche 1.1 }
4023     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4024    
4025     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4026     {
4027     int rs;
4028    
4029     /* Stupid x87 can't write a long double to mem without popping the
4030     stack! */
4031     usereg(r);
4032     rs=stackpos(r);
4033     emit_byte(0xd9); /* Get a copy to the top of stack */
4034     emit_byte(0xc0+rs);
4035    
4036 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4037 gbeauche 1.1 }
4038     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4039    
4040     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4041     {
4042     int rs;
4043    
4044     make_tos(r);
4045 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4046 gbeauche 1.1 live.onstack[live.tos]=-1;
4047     live.tos--;
4048     live.spos[r]=-2;
4049     }
4050     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4051    
4052     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4053     {
4054 gbeauche 1.23 raw_fldt(m);
4055 gbeauche 1.1 tos_make(r);
4056     }
4057     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4058    
4059     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4060     {
4061     emit_byte(0xd9);
4062     emit_byte(0xeb);
4063     tos_make(r);
4064     }
4065     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4066    
4067     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4068     {
4069     emit_byte(0xd9);
4070     emit_byte(0xec);
4071     tos_make(r);
4072     }
4073     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4074    
4075     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4076     {
4077     emit_byte(0xd9);
4078     emit_byte(0xea);
4079     tos_make(r);
4080     }
4081     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4082    
4083     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4084     {
4085     emit_byte(0xd9);
4086     emit_byte(0xed);
4087     tos_make(r);
4088     }
4089     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4090    
4091     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4092     {
4093     emit_byte(0xd9);
4094     emit_byte(0xe8);
4095     tos_make(r);
4096     }
4097     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4098    
4099     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4100     {
4101     emit_byte(0xd9);
4102     emit_byte(0xee);
4103     tos_make(r);
4104     }
4105     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4106    
4107     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4108     {
4109     int ds;
4110    
4111     usereg(s);
4112     ds=stackpos(s);
4113     if (ds==0 && live.spos[d]>=0) {
4114     /* source is on top of stack, and we already have the dest */
4115     int dd=stackpos(d);
4116     emit_byte(0xdd);
4117     emit_byte(0xd0+dd);
4118     }
4119     else {
4120     emit_byte(0xd9);
4121     emit_byte(0xc0+ds); /* duplicate source on tos */
4122     tos_make(d); /* store to destination, pop if necessary */
4123     }
4124     }
4125     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4126    
4127     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4128     {
4129     emit_byte(0xd9);
4130     emit_byte(0xa8+index);
4131     emit_long(base);
4132     }
4133     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4134    
4135    
4136     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4137     {
4138     int ds;
4139    
4140     if (d!=s) {
4141     usereg(s);
4142     ds=stackpos(s);
4143     emit_byte(0xd9);
4144     emit_byte(0xc0+ds); /* duplicate source */
4145     emit_byte(0xd9);
4146     emit_byte(0xfa); /* take square root */
4147     tos_make(d); /* store to destination */
4148     }
4149     else {
4150     make_tos(d);
4151     emit_byte(0xd9);
4152     emit_byte(0xfa); /* take square root */
4153     }
4154     }
4155     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4156    
4157     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4158     {
4159     int ds;
4160    
4161     if (d!=s) {
4162     usereg(s);
4163     ds=stackpos(s);
4164     emit_byte(0xd9);
4165     emit_byte(0xc0+ds); /* duplicate source */
4166     emit_byte(0xd9);
4167     emit_byte(0xe1); /* take fabs */
4168     tos_make(d); /* store to destination */
4169     }
4170     else {
4171     make_tos(d);
4172     emit_byte(0xd9);
4173     emit_byte(0xe1); /* take fabs */
4174     }
4175     }
4176     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4177    
4178     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4179     {
4180     int ds;
4181    
4182     if (d!=s) {
4183     usereg(s);
4184     ds=stackpos(s);
4185     emit_byte(0xd9);
4186     emit_byte(0xc0+ds); /* duplicate source */
4187     emit_byte(0xd9);
4188     emit_byte(0xfc); /* take frndint */
4189     tos_make(d); /* store to destination */
4190     }
4191     else {
4192     make_tos(d);
4193     emit_byte(0xd9);
4194     emit_byte(0xfc); /* take frndint */
4195     }
4196     }
4197     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4198    
4199     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4200     {
4201     int ds;
4202    
4203     if (d!=s) {
4204     usereg(s);
4205     ds=stackpos(s);
4206     emit_byte(0xd9);
4207     emit_byte(0xc0+ds); /* duplicate source */
4208     emit_byte(0xd9);
4209     emit_byte(0xff); /* take cos */
4210     tos_make(d); /* store to destination */
4211     }
4212     else {
4213     make_tos(d);
4214     emit_byte(0xd9);
4215     emit_byte(0xff); /* take cos */
4216     }
4217     }
4218     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4219    
4220     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4221     {
4222     int ds;
4223    
4224     if (d!=s) {
4225     usereg(s);
4226     ds=stackpos(s);
4227     emit_byte(0xd9);
4228     emit_byte(0xc0+ds); /* duplicate source */
4229     emit_byte(0xd9);
4230     emit_byte(0xfe); /* take sin */
4231     tos_make(d); /* store to destination */
4232     }
4233     else {
4234     make_tos(d);
4235     emit_byte(0xd9);
4236     emit_byte(0xfe); /* take sin */
4237     }
4238     }
4239     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4240    
4241     double one=1;
4242     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4243     {
4244     int ds;
4245    
4246     usereg(s);
4247     ds=stackpos(s);
4248     emit_byte(0xd9);
4249     emit_byte(0xc0+ds); /* duplicate source */
4250    
4251     emit_byte(0xd9);
4252     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4253     emit_byte(0xd9);
4254     emit_byte(0xfc); /* rndint */
4255     emit_byte(0xd9);
4256     emit_byte(0xc9); /* swap top two elements */
4257     emit_byte(0xd8);
4258     emit_byte(0xe1); /* subtract rounded from original */
4259     emit_byte(0xd9);
4260     emit_byte(0xf0); /* f2xm1 */
4261     emit_byte(0xdc);
4262     emit_byte(0x05);
4263 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4264 gbeauche 1.1 emit_byte(0xd9);
4265     emit_byte(0xfd); /* and scale it */
4266     emit_byte(0xdd);
4267     emit_byte(0xd9); /* take he rounded value off */
4268     tos_make(d); /* store to destination */
4269     }
4270     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4271    
4272     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4273     {
4274     int ds;
4275    
4276     usereg(s);
4277     ds=stackpos(s);
4278     emit_byte(0xd9);
4279     emit_byte(0xc0+ds); /* duplicate source */
4280     emit_byte(0xd9);
4281     emit_byte(0xea); /* fldl2e */
4282     emit_byte(0xde);
4283     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4284    
4285     emit_byte(0xd9);
4286     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4287     emit_byte(0xd9);
4288     emit_byte(0xfc); /* rndint */
4289     emit_byte(0xd9);
4290     emit_byte(0xc9); /* swap top two elements */
4291     emit_byte(0xd8);
4292     emit_byte(0xe1); /* subtract rounded from original */
4293     emit_byte(0xd9);
4294     emit_byte(0xf0); /* f2xm1 */
4295     emit_byte(0xdc);
4296     emit_byte(0x05);
4297 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4298 gbeauche 1.1 emit_byte(0xd9);
4299     emit_byte(0xfd); /* and scale it */
4300     emit_byte(0xdd);
4301     emit_byte(0xd9); /* take he rounded value off */
4302     tos_make(d); /* store to destination */
4303     }
4304     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4305    
4306     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4307     {
4308     int ds;
4309    
4310     usereg(s);
4311     ds=stackpos(s);
4312     emit_byte(0xd9);
4313     emit_byte(0xc0+ds); /* duplicate source */
4314     emit_byte(0xd9);
4315     emit_byte(0xe8); /* push '1' */
4316     emit_byte(0xd9);
4317     emit_byte(0xc9); /* swap top two */
4318     emit_byte(0xd9);
4319     emit_byte(0xf1); /* take 1*log2(x) */
4320     tos_make(d); /* store to destination */
4321     }
4322     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4323    
4324    
4325     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4326     {
4327     int ds;
4328    
4329     if (d!=s) {
4330     usereg(s);
4331     ds=stackpos(s);
4332     emit_byte(0xd9);
4333     emit_byte(0xc0+ds); /* duplicate source */
4334     emit_byte(0xd9);
4335     emit_byte(0xe0); /* take fchs */
4336     tos_make(d); /* store to destination */
4337     }
4338     else {
4339     make_tos(d);
4340     emit_byte(0xd9);
4341     emit_byte(0xe0); /* take fchs */
4342     }
4343     }
4344     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4345    
4346     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4347     {
4348     int ds;
4349    
4350     usereg(s);
4351     usereg(d);
4352    
4353     if (live.spos[s]==live.tos) {
4354     /* Source is on top of stack */
4355     ds=stackpos(d);
4356     emit_byte(0xdc);
4357     emit_byte(0xc0+ds); /* add source to dest*/
4358     }
4359     else {
4360     make_tos(d);
4361     ds=stackpos(s);
4362    
4363     emit_byte(0xd8);
4364     emit_byte(0xc0+ds); /* add source to dest*/
4365     }
4366     }
4367     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4368    
4369     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4370     {
4371     int ds;
4372    
4373     usereg(s);
4374     usereg(d);
4375    
4376     if (live.spos[s]==live.tos) {
4377     /* Source is on top of stack */
4378     ds=stackpos(d);
4379     emit_byte(0xdc);
4380     emit_byte(0xe8+ds); /* sub source from dest*/
4381     }
4382     else {
4383     make_tos(d);
4384     ds=stackpos(s);
4385    
4386     emit_byte(0xd8);
4387     emit_byte(0xe0+ds); /* sub src from dest */
4388     }
4389     }
4390     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4391    
4392     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4393     {
4394     int ds;
4395    
4396     usereg(s);
4397     usereg(d);
4398    
4399     make_tos(d);
4400     ds=stackpos(s);
4401    
4402     emit_byte(0xdd);
4403     emit_byte(0xe0+ds); /* cmp dest with source*/
4404     }
4405     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4406    
4407     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4408     {
4409     int ds;
4410    
4411     usereg(s);
4412     usereg(d);
4413    
4414     if (live.spos[s]==live.tos) {
4415     /* Source is on top of stack */
4416     ds=stackpos(d);
4417     emit_byte(0xdc);
4418     emit_byte(0xc8+ds); /* mul dest by source*/
4419     }
4420     else {
4421     make_tos(d);
4422     ds=stackpos(s);
4423    
4424     emit_byte(0xd8);
4425     emit_byte(0xc8+ds); /* mul dest by source*/
4426     }
4427     }
4428     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4429    
4430     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4431     {
4432     int ds;
4433    
4434     usereg(s);
4435     usereg(d);
4436    
4437     if (live.spos[s]==live.tos) {
4438     /* Source is on top of stack */
4439     ds=stackpos(d);
4440     emit_byte(0xdc);
4441     emit_byte(0xf8+ds); /* div dest by source */
4442     }
4443     else {
4444     make_tos(d);
4445     ds=stackpos(s);
4446    
4447     emit_byte(0xd8);
4448     emit_byte(0xf0+ds); /* div dest by source*/
4449     }
4450     }
4451     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4452    
4453     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4454     {
4455     int ds;
4456    
4457     usereg(s);
4458     usereg(d);
4459    
4460     make_tos2(d,s);
4461     ds=stackpos(s);
4462    
4463     if (ds!=1) {
4464     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4465     abort();
4466     }
4467     emit_byte(0xd9);
4468     emit_byte(0xf8); /* take rem from dest by source */
4469     }
4470     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4471    
4472     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4473     {
4474     int ds;
4475    
4476     usereg(s);
4477     usereg(d);
4478    
4479     make_tos2(d,s);
4480     ds=stackpos(s);
4481    
4482     if (ds!=1) {
4483     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4484     abort();
4485     }
4486     emit_byte(0xd9);
4487     emit_byte(0xf5); /* take rem1 from dest by source */
4488     }
4489     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4490    
4491    
4492     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4493     {
4494     make_tos(r);
4495     emit_byte(0xd9); /* ftst */
4496     emit_byte(0xe4);
4497     }
4498     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4499    
4500     /* %eax register is clobbered if target processor doesn't support fucomi */
4501     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4502     #define FFLAG_NREG EAX_INDEX
4503    
4504     static __inline__ void raw_fflags_into_flags(int r)
4505     {
4506     int p;
4507    
4508     usereg(r);
4509     p=stackpos(r);
4510    
4511     emit_byte(0xd9);
4512     emit_byte(0xee); /* Push 0 */
4513     emit_byte(0xd9);
4514     emit_byte(0xc9+p); /* swap top two around */
4515     if (have_cmov) {
4516     // gb-- fucomi is for P6 cores only, not K6-2 then...
4517     emit_byte(0xdb);
4518     emit_byte(0xe9+p); /* fucomi them */
4519     }
4520     else {
4521     emit_byte(0xdd);
4522     emit_byte(0xe1+p); /* fucom them */
4523     emit_byte(0x9b);
4524     emit_byte(0xdf);
4525     emit_byte(0xe0); /* fstsw ax */
4526     raw_sahf(0); /* sahf */
4527     }
4528     emit_byte(0xdd);
4529     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4530     }