ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.28
Committed: 2005-04-21T09:08:57Z (19 years, 5 months ago) by gbeauche
Branch: MAIN
Changes since 1.27: +7 -5 lines
Log Message:
Recognize lahf_lm from Dual Core Opterons. This enables use of LAHF/SETO
instructions in long mode (64-bit). However, there seems to be another bug
in the JIT preventing it from being fully supported. m68k.h & codegen_x86.h
are easily fixed bug another patch is still needed.

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.26 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 gbeauche 1.26 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.1
56     /* The register in which subroutines return an integer return value */
57 gbeauche 1.20 #define REG_RESULT EAX_INDEX
58 gbeauche 1.1
59     /* The registers subroutines take their first and second argument in */
60     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61     /* Handle the _fastcall parameters of ECX and EDX */
62 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
63     #define REG_PAR2 EDX_INDEX
64     #elif defined(__x86_64__)
65     #define REG_PAR1 EDI_INDEX
66     #define REG_PAR2 ESI_INDEX
67 gbeauche 1.1 #else
68 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
69     #define REG_PAR2 EDX_INDEX
70 gbeauche 1.1 #endif
71    
72 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
75 gbeauche 1.1 #else
76 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 gbeauche 1.1 #endif
78    
79 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 gbeauche 1.1 -1 if any reg will do */
81 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83 gbeauche 1.1
84     uae_s8 always_used[]={4,-1};
85 gbeauche 1.20 #if defined(__x86_64__)
86     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88     #else
89 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
90     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 gbeauche 1.20 #endif
92 gbeauche 1.1
93 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
94     /* Make sure interpretive core does not use cpuopti */
95     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 gbeauche 1.20 #error FIXME: code not ready
97 gbeauche 1.17 #else
98 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
99     by the caller */
100 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 gbeauche 1.17 #endif
102 gbeauche 1.1
103     /* This *should* be the same as call_saved. But:
104     - We might not really know which registers are saved, and which aren't,
105     so we need to preserve some, but don't want to rely on everyone else
106     also saving those registers
107     - Special registers (such like the stack pointer) should not be "preserved"
108     by pushing, even though they are "saved" across function calls
109     */
110 gbeauche 1.21 #if defined(__x86_64__)
111     /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
112 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
113     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
114 gbeauche 1.21 #else
115     static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
116     #endif
117 gbeauche 1.1
118     /* Whether classes of instructions do or don't clobber the native flags */
119     #define CLOBBER_MOV
120     #define CLOBBER_LEA
121     #define CLOBBER_CMOV
122     #define CLOBBER_POP
123     #define CLOBBER_PUSH
124     #define CLOBBER_SUB clobber_flags()
125     #define CLOBBER_SBB clobber_flags()
126     #define CLOBBER_CMP clobber_flags()
127     #define CLOBBER_ADD clobber_flags()
128     #define CLOBBER_ADC clobber_flags()
129     #define CLOBBER_AND clobber_flags()
130     #define CLOBBER_OR clobber_flags()
131     #define CLOBBER_XOR clobber_flags()
132    
133     #define CLOBBER_ROL clobber_flags()
134     #define CLOBBER_ROR clobber_flags()
135     #define CLOBBER_SHLL clobber_flags()
136     #define CLOBBER_SHRL clobber_flags()
137     #define CLOBBER_SHRA clobber_flags()
138     #define CLOBBER_TEST clobber_flags()
139     #define CLOBBER_CL16
140     #define CLOBBER_CL8
141 gbeauche 1.20 #define CLOBBER_SE32
142 gbeauche 1.1 #define CLOBBER_SE16
143     #define CLOBBER_SE8
144 gbeauche 1.20 #define CLOBBER_ZE32
145 gbeauche 1.1 #define CLOBBER_ZE16
146     #define CLOBBER_ZE8
147     #define CLOBBER_SW16 clobber_flags()
148     #define CLOBBER_SW32
149     #define CLOBBER_SETCC
150     #define CLOBBER_MUL clobber_flags()
151     #define CLOBBER_BT clobber_flags()
152     #define CLOBBER_BSF clobber_flags()
153    
154 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
155 gbeauche 1.20 #if defined(__x86_64__)
156     #define USE_NEW_RTASM 1
157     #endif
158    
159     #if USE_NEW_RTASM
160 gbeauche 1.13
161     #if defined(__x86_64__)
162     #define X86_TARGET_64BIT 1
163     #endif
164     #define X86_FLAT_REGISTERS 0
165 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
166     #define X86_OPTIMIZE_ROTSHI 1
167 gbeauche 1.13 #include "codegen_x86.h"
168    
169     #define x86_emit_byte(B) emit_byte(B)
170     #define x86_emit_word(W) emit_word(W)
171     #define x86_emit_long(L) emit_long(L)
172 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
173 gbeauche 1.13 #define x86_get_target() get_target()
174     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
175    
176     static void jit_fail(const char *msg, const char *file, int line, const char *function)
177     {
178     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
179     function, file, line, msg);
180     abort();
181     }
182    
183     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
184     {
185 gbeauche 1.20 #if defined(__x86_64__)
186     PUSHQr(r);
187     #else
188 gbeauche 1.13 PUSHLr(r);
189 gbeauche 1.20 #endif
190 gbeauche 1.13 }
191     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
192    
193     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
194     {
195 gbeauche 1.20 #if defined(__x86_64__)
196     POPQr(r);
197     #else
198 gbeauche 1.13 POPLr(r);
199 gbeauche 1.20 #endif
200 gbeauche 1.13 }
201     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
202    
203 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
204     {
205     #if defined(__x86_64__)
206     POPQm(d, X86_NOREG, X86_NOREG, 1);
207     #else
208     POPLm(d, X86_NOREG, X86_NOREG, 1);
209     #endif
210     }
211     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
212    
213 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
214     {
215     BTLir(i, r);
216     }
217     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
218    
219     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
220     {
221     BTLrr(b, r);
222     }
223     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
224    
225     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
226     {
227     BTCLir(i, r);
228     }
229     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
230    
231     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
232     {
233     BTCLrr(b, r);
234     }
235     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
236    
237     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
238     {
239     BTRLir(i, r);
240     }
241     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
242    
243     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
244     {
245     BTRLrr(b, r);
246     }
247     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
248    
249     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
250     {
251     BTSLir(i, r);
252     }
253     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
254    
255     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
256     {
257     BTSLrr(b, r);
258     }
259     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
260    
261     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
262     {
263     SUBWir(i, d);
264     }
265     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
266    
267     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
268     {
269     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
270     }
271     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
272    
273     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
274     {
275     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
276     }
277     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
278    
279     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
280     {
281     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
282     }
283     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
284    
285     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
286     {
287     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
288     }
289     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
290    
291     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
292     {
293     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
294     }
295     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
296    
297     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
298     {
299     ROLBir(i, r);
300     }
301     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
302    
303     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
304     {
305     ROLWir(i, r);
306     }
307     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
308    
309     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
310     {
311     ROLLir(i, r);
312     }
313     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
314    
315     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
316     {
317     ROLLrr(r, d);
318     }
319     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
320    
321     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
322     {
323     ROLWrr(r, d);
324     }
325     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
326    
327     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
328     {
329     ROLBrr(r, d);
330     }
331     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
332    
333     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
334     {
335     SHLLrr(r, d);
336     }
337     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
338    
339     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
340     {
341     SHLWrr(r, d);
342     }
343     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
344    
345     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
346     {
347     SHLBrr(r, d);
348     }
349     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
350    
351     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
352     {
353     RORBir(i, r);
354     }
355     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
356    
357     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
358     {
359     RORWir(i, r);
360     }
361     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
362    
363     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
364     {
365     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
366     }
367     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
368    
369     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
370     {
371     RORLir(i, r);
372     }
373     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
374    
375     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
376     {
377     RORLrr(r, d);
378     }
379     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
380    
381     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
382     {
383     RORWrr(r, d);
384     }
385     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
386    
387     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
388     {
389     RORBrr(r, d);
390     }
391     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
392    
393     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
394     {
395     SHRLrr(r, d);
396     }
397     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
398    
399     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
400     {
401     SHRWrr(r, d);
402     }
403     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
404    
405     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
406     {
407     SHRBrr(r, d);
408     }
409     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
410    
411     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
412     {
413 gbeauche 1.14 SARLrr(r, d);
414 gbeauche 1.13 }
415     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
416    
417     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
418     {
419 gbeauche 1.14 SARWrr(r, d);
420 gbeauche 1.13 }
421     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
422    
423     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
424     {
425 gbeauche 1.14 SARBrr(r, d);
426 gbeauche 1.13 }
427     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
428    
429     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
430     {
431     SHLLir(i, r);
432     }
433     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
434    
435     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
436     {
437     SHLWir(i, r);
438     }
439     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
440    
441     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
442     {
443     SHLBir(i, r);
444     }
445     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
446    
447     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
448     {
449     SHRLir(i, r);
450     }
451     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
452    
453     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
454     {
455     SHRWir(i, r);
456     }
457     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
458    
459     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
460     {
461     SHRBir(i, r);
462     }
463     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
464    
465     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
466     {
467 gbeauche 1.14 SARLir(i, r);
468 gbeauche 1.13 }
469     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
470    
471     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
472     {
473 gbeauche 1.14 SARWir(i, r);
474 gbeauche 1.13 }
475     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
476    
477     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
478     {
479 gbeauche 1.14 SARBir(i, r);
480 gbeauche 1.13 }
481     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
482    
483     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
484     {
485     SAHF();
486     }
487     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
488    
489     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
490     {
491     CPUID();
492     }
493     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
494    
495     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
496     {
497     LAHF();
498     }
499     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
500    
501     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
502     {
503     SETCCir(cc, d);
504     }
505     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
506    
507     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
508     {
509     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
510     }
511     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
512    
513     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
514     {
515 gbeauche 1.15 if (have_cmov)
516     CMOVLrr(cc, s, d);
517     else { /* replacement using branch and mov */
518     #if defined(__x86_64__)
519     write_log("x86-64 implementations are bound to have CMOV!\n");
520     abort();
521     #endif
522     JCCSii(cc^1, 2);
523     MOVLrr(s, d);
524     }
525 gbeauche 1.13 }
526     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
527    
528     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
529     {
530     BSFLrr(s, d);
531     }
532     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
533    
534 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
535     {
536     MOVSLQrr(s, d);
537     }
538     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
539    
540 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
541     {
542     MOVSWLrr(s, d);
543     }
544     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
545    
546     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
547     {
548     MOVSBLrr(s, d);
549     }
550     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
551    
552     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
553     {
554     MOVZWLrr(s, d);
555     }
556     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
557    
558     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
559     {
560     MOVZBLrr(s, d);
561     }
562     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
563    
564     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
565     {
566 gbeauche 1.14 IMULLrr(s, d);
567 gbeauche 1.13 }
568     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
569    
570     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
571     {
572 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
573     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
574 gbeauche 1.13 abort();
575 gbeauche 1.14 }
576     IMULLr(s);
577 gbeauche 1.13 }
578     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
579    
580     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
581     {
582 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
583     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
584 gbeauche 1.13 abort();
585 gbeauche 1.14 }
586     MULLr(s);
587 gbeauche 1.13 }
588     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
589    
590     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
591     {
592 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
593 gbeauche 1.13 }
594     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
595    
596     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
597     {
598     MOVBrr(s, d);
599     }
600     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
601    
602     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
603     {
604     MOVWrr(s, d);
605     }
606     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
607    
608     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
609     {
610     MOVLmr(0, baser, index, factor, d);
611     }
612     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
613    
614     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
615     {
616     MOVWmr(0, baser, index, factor, d);
617     }
618     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
619    
620     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
621     {
622     MOVBmr(0, baser, index, factor, d);
623     }
624     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
625    
626     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
627     {
628     MOVLrm(s, 0, baser, index, factor);
629     }
630     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
631    
632     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
633     {
634     MOVWrm(s, 0, baser, index, factor);
635     }
636     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
637    
638     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
639     {
640     MOVBrm(s, 0, baser, index, factor);
641     }
642     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
643    
644     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
645     {
646     MOVLrm(s, base, baser, index, factor);
647     }
648     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
649    
650     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
651     {
652     MOVWrm(s, base, baser, index, factor);
653     }
654     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
655    
656     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
657     {
658     MOVBrm(s, base, baser, index, factor);
659     }
660     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
661    
662     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
663     {
664     MOVLmr(base, baser, index, factor, d);
665     }
666     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
667    
668     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
669     {
670     MOVWmr(base, baser, index, factor, d);
671     }
672     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
673    
674     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
675     {
676     MOVBmr(base, baser, index, factor, d);
677     }
678     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
679    
680     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
681     {
682     MOVLmr(base, X86_NOREG, index, factor, d);
683     }
684     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
685    
686     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
687     {
688 gbeauche 1.15 if (have_cmov)
689     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
690     else { /* replacement using branch and mov */
691     #if defined(__x86_64__)
692     write_log("x86-64 implementations are bound to have CMOV!\n");
693     abort();
694     #endif
695     JCCSii(cond^1, 7);
696     MOVLmr(base, X86_NOREG, index, factor, d);
697     }
698 gbeauche 1.13 }
699     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
700    
701     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
702     {
703 gbeauche 1.15 if (have_cmov)
704     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
705     else { /* replacement using branch and mov */
706     #if defined(__x86_64__)
707     write_log("x86-64 implementations are bound to have CMOV!\n");
708     abort();
709     #endif
710     JCCSii(cond^1, 6);
711     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
712     }
713 gbeauche 1.13 }
714     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
715    
716     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
717     {
718     MOVLmr(offset, s, X86_NOREG, 1, d);
719     }
720     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
721    
722     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
723     {
724     MOVWmr(offset, s, X86_NOREG, 1, d);
725     }
726     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
727    
728     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
729     {
730     MOVBmr(offset, s, X86_NOREG, 1, d);
731     }
732     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
733    
734     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
735     {
736     MOVLmr(offset, s, X86_NOREG, 1, d);
737     }
738     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
739    
740     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
741     {
742     MOVWmr(offset, s, X86_NOREG, 1, d);
743     }
744     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
745    
746     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
747     {
748     MOVBmr(offset, s, X86_NOREG, 1, d);
749     }
750     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
751    
752     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
753     {
754     MOVLim(i, offset, d, X86_NOREG, 1);
755     }
756     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
757    
758     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
759     {
760     MOVWim(i, offset, d, X86_NOREG, 1);
761     }
762     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
763    
764     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
765     {
766     MOVBim(i, offset, d, X86_NOREG, 1);
767     }
768     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
769    
770     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
771     {
772     MOVLrm(s, offset, d, X86_NOREG, 1);
773     }
774     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
775    
776     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
777     {
778     MOVWrm(s, offset, d, X86_NOREG, 1);
779     }
780     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
781    
782     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
783     {
784     MOVBrm(s, offset, d, X86_NOREG, 1);
785     }
786     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
787    
788     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
789     {
790     LEALmr(offset, s, X86_NOREG, 1, d);
791     }
792     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
793    
794     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
795     {
796     LEALmr(offset, s, index, factor, d);
797     }
798     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
799    
800     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
801     {
802     LEALmr(0, s, index, factor, d);
803     }
804     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
805    
806     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
807     {
808     MOVLrm(s, offset, d, X86_NOREG, 1);
809     }
810     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
811    
812     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
813     {
814     MOVWrm(s, offset, d, X86_NOREG, 1);
815     }
816     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
817    
818     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
819     {
820     MOVBrm(s, offset, d, X86_NOREG, 1);
821     }
822     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
823    
824     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
825     {
826     BSWAPLr(r);
827     }
828     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
829    
830     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
831     {
832     ROLWir(8, r);
833     }
834     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
835    
836     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
837     {
838     MOVLrr(s, d);
839     }
840     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
841    
842     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
843     {
844     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
845     }
846     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
847    
848     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
849     {
850     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
851     }
852     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
853    
854     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
855     {
856     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
857     }
858     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
859    
860     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
861     {
862     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
863     }
864     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
865    
866     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
867     {
868     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
869     }
870     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
871    
872     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
873     {
874     MOVLir(s, d);
875     }
876     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
877    
878     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
879     {
880     MOVWir(s, d);
881     }
882     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
883    
884     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
885     {
886     MOVBir(s, d);
887     }
888     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
889    
890     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
891     {
892     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
893     }
894     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
895    
896     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
897     {
898     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
899     }
900     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
901    
902     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
903     {
904     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
905     }
906     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
907    
908     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
909     {
910     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
911     }
912     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
913    
914     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
915     {
916     TESTLir(i, d);
917     }
918     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
919    
920     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
921     {
922     TESTLrr(s, d);
923     }
924     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
925    
926     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
927     {
928     TESTWrr(s, d);
929     }
930     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
931    
932     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
933     {
934     TESTBrr(s, d);
935     }
936     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
937    
938 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
939     {
940     XORLir(i, d);
941     }
942     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
943    
944 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
945     {
946     ANDLir(i, d);
947     }
948     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
949    
950     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
951     {
952     ANDWir(i, d);
953     }
954     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
955    
956     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
957     {
958     ANDLrr(s, d);
959     }
960     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
961    
962     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
963     {
964     ANDWrr(s, d);
965     }
966     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
967    
968     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
969     {
970     ANDBrr(s, d);
971     }
972     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
973    
974     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
975     {
976     ORLir(i, d);
977     }
978     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
979    
980     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
981     {
982     ORLrr(s, d);
983     }
984     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
985    
986     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
987     {
988     ORWrr(s, d);
989     }
990     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
991    
992     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
993     {
994     ORBrr(s, d);
995     }
996     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
997    
998     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
999     {
1000     ADCLrr(s, d);
1001     }
1002     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1003    
1004     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1005     {
1006     ADCWrr(s, d);
1007     }
1008     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1009    
1010     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1011     {
1012     ADCBrr(s, d);
1013     }
1014     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1015    
1016     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1017     {
1018     ADDLrr(s, d);
1019     }
1020     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1021    
1022     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1023     {
1024     ADDWrr(s, d);
1025     }
1026     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1027    
1028     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1029     {
1030     ADDBrr(s, d);
1031     }
1032     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1033    
1034     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1035     {
1036     SUBLir(i, d);
1037     }
1038     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1039    
1040     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1041     {
1042     SUBBir(i, d);
1043     }
1044     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1045    
1046     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1047     {
1048     ADDLir(i, d);
1049     }
1050     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1051    
1052     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1053     {
1054     ADDWir(i, d);
1055     }
1056     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1057    
1058     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1059     {
1060     ADDBir(i, d);
1061     }
1062     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1063    
1064     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1065     {
1066     SBBLrr(s, d);
1067     }
1068     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1069    
1070     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1071     {
1072     SBBWrr(s, d);
1073     }
1074     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1075    
1076     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1077     {
1078     SBBBrr(s, d);
1079     }
1080     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1081    
1082     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1083     {
1084     SUBLrr(s, d);
1085     }
1086     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1087    
1088     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1089     {
1090     SUBWrr(s, d);
1091     }
1092     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1093    
1094     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1095     {
1096     SUBBrr(s, d);
1097     }
1098     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1099    
1100     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1101     {
1102     CMPLrr(s, d);
1103     }
1104     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1105    
1106     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1107     {
1108     CMPLir(i, r);
1109     }
1110     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1111    
1112     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1113     {
1114     CMPWrr(s, d);
1115     }
1116     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1117    
1118     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1119     {
1120     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1121     }
1122     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1123    
1124     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1125     {
1126     CMPBir(i, d);
1127     }
1128     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1129    
1130     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1131     {
1132     CMPBrr(s, d);
1133     }
1134     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1135    
1136     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1137     {
1138     CMPLmr(offset, X86_NOREG, index, factor, d);
1139     }
1140     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1141    
1142     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1143     {
1144     XORLrr(s, d);
1145     }
1146     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1147    
1148     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1149     {
1150     XORWrr(s, d);
1151     }
1152     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1153    
1154     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1155     {
1156     XORBrr(s, d);
1157     }
1158     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1159    
1160     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1161     {
1162     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1163     }
1164     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1165    
1166     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1167     {
1168     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1169     }
1170     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1171    
1172     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1173     {
1174     XCHGLrr(r2, r1);
1175     }
1176     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1177    
1178     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1179     {
1180 gbeauche 1.18 PUSHF();
1181 gbeauche 1.13 }
1182     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1183    
1184     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1185     {
1186 gbeauche 1.18 POPF();
1187 gbeauche 1.13 }
1188     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1189    
1190     #else
1191    
1192 gbeauche 1.2 const bool optimize_accum = true;
1193 gbeauche 1.1 const bool optimize_imm8 = true;
1194     const bool optimize_shift_once = true;
1195    
1196     /*************************************************************************
1197     * Actual encoding of the instructions on the target CPU *
1198     *************************************************************************/
1199    
1200 gbeauche 1.2 static __inline__ int isaccum(int r)
1201     {
1202     return (r == EAX_INDEX);
1203     }
1204    
1205 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1206     {
1207     return (x>=-128 && x<=127);
1208     }
1209    
1210     static __inline__ int isword(uae_s32 x)
1211     {
1212     return (x>=-32768 && x<=32767);
1213     }
1214    
1215     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1216     {
1217     emit_byte(0x50+r);
1218     }
1219     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1220    
1221     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1222     {
1223     emit_byte(0x58+r);
1224     }
1225     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1226    
1227 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1228     {
1229     emit_byte(0x8f);
1230     emit_byte(0x05);
1231     emit_long(d);
1232     }
1233     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1234    
1235 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1236     {
1237     emit_byte(0x0f);
1238     emit_byte(0xba);
1239     emit_byte(0xe0+r);
1240     emit_byte(i);
1241     }
1242     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1243    
1244     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1245     {
1246     emit_byte(0x0f);
1247     emit_byte(0xa3);
1248     emit_byte(0xc0+8*b+r);
1249     }
1250     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1251    
1252     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1253     {
1254     emit_byte(0x0f);
1255     emit_byte(0xba);
1256     emit_byte(0xf8+r);
1257     emit_byte(i);
1258     }
1259     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1260    
1261     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1262     {
1263     emit_byte(0x0f);
1264     emit_byte(0xbb);
1265     emit_byte(0xc0+8*b+r);
1266     }
1267     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1268    
1269    
1270     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1271     {
1272     emit_byte(0x0f);
1273     emit_byte(0xba);
1274     emit_byte(0xf0+r);
1275     emit_byte(i);
1276     }
1277     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1278    
1279     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1280     {
1281     emit_byte(0x0f);
1282     emit_byte(0xb3);
1283     emit_byte(0xc0+8*b+r);
1284     }
1285     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1286    
1287     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1288     {
1289     emit_byte(0x0f);
1290     emit_byte(0xba);
1291     emit_byte(0xe8+r);
1292     emit_byte(i);
1293     }
1294     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1295    
1296     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1297     {
1298     emit_byte(0x0f);
1299     emit_byte(0xab);
1300     emit_byte(0xc0+8*b+r);
1301     }
1302     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1303    
1304     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1305     {
1306     emit_byte(0x66);
1307     if (isbyte(i)) {
1308     emit_byte(0x83);
1309     emit_byte(0xe8+d);
1310     emit_byte(i);
1311     }
1312     else {
1313 gbeauche 1.2 if (optimize_accum && isaccum(d))
1314     emit_byte(0x2d);
1315     else {
1316 gbeauche 1.1 emit_byte(0x81);
1317     emit_byte(0xe8+d);
1318 gbeauche 1.2 }
1319 gbeauche 1.1 emit_word(i);
1320     }
1321     }
1322     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1323    
1324    
1325     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1326     {
1327     emit_byte(0x8b);
1328     emit_byte(0x05+8*d);
1329     emit_long(s);
1330     }
1331     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1332    
1333     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1334     {
1335     emit_byte(0xc7);
1336     emit_byte(0x05);
1337     emit_long(d);
1338     emit_long(s);
1339     }
1340     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1341    
1342     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1343     {
1344     emit_byte(0x66);
1345     emit_byte(0xc7);
1346     emit_byte(0x05);
1347     emit_long(d);
1348     emit_word(s);
1349     }
1350     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1351    
1352     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1353     {
1354     emit_byte(0xc6);
1355     emit_byte(0x05);
1356     emit_long(d);
1357     emit_byte(s);
1358     }
1359     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1360    
1361     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1362     {
1363     if (optimize_shift_once && (i == 1)) {
1364     emit_byte(0xd0);
1365     emit_byte(0x05);
1366     emit_long(d);
1367     }
1368     else {
1369     emit_byte(0xc0);
1370     emit_byte(0x05);
1371     emit_long(d);
1372     emit_byte(i);
1373     }
1374     }
1375     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1376    
1377     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1378     {
1379     if (optimize_shift_once && (i == 1)) {
1380     emit_byte(0xd0);
1381     emit_byte(0xc0+r);
1382     }
1383     else {
1384     emit_byte(0xc0);
1385     emit_byte(0xc0+r);
1386     emit_byte(i);
1387     }
1388     }
1389     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1390    
1391     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1392     {
1393     emit_byte(0x66);
1394     emit_byte(0xc1);
1395     emit_byte(0xc0+r);
1396     emit_byte(i);
1397     }
1398     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1399    
1400     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1401     {
1402     if (optimize_shift_once && (i == 1)) {
1403     emit_byte(0xd1);
1404     emit_byte(0xc0+r);
1405     }
1406     else {
1407     emit_byte(0xc1);
1408     emit_byte(0xc0+r);
1409     emit_byte(i);
1410     }
1411     }
1412     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1413    
1414     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1415     {
1416     emit_byte(0xd3);
1417     emit_byte(0xc0+d);
1418     }
1419     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1420    
1421     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1422     {
1423     emit_byte(0x66);
1424     emit_byte(0xd3);
1425     emit_byte(0xc0+d);
1426     }
1427     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1428    
1429     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1430     {
1431     emit_byte(0xd2);
1432     emit_byte(0xc0+d);
1433     }
1434     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1435    
1436     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1437     {
1438     emit_byte(0xd3);
1439     emit_byte(0xe0+d);
1440     }
1441     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1442    
1443     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1444     {
1445     emit_byte(0x66);
1446     emit_byte(0xd3);
1447     emit_byte(0xe0+d);
1448     }
1449     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1450    
1451     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1452     {
1453     emit_byte(0xd2);
1454     emit_byte(0xe0+d);
1455     }
1456     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1457    
1458     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1459     {
1460     if (optimize_shift_once && (i == 1)) {
1461     emit_byte(0xd0);
1462     emit_byte(0xc8+r);
1463     }
1464     else {
1465     emit_byte(0xc0);
1466     emit_byte(0xc8+r);
1467     emit_byte(i);
1468     }
1469     }
1470     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1471    
1472     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1473     {
1474     emit_byte(0x66);
1475     emit_byte(0xc1);
1476     emit_byte(0xc8+r);
1477     emit_byte(i);
1478     }
1479     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1480    
1481     // gb-- used for making an fpcr value in compemu_fpp.cpp
1482     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1483     {
1484     emit_byte(0x0b);
1485     emit_byte(0x05+8*d);
1486     emit_long(s);
1487     }
1488     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1489    
1490     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1491     {
1492     if (optimize_shift_once && (i == 1)) {
1493     emit_byte(0xd1);
1494     emit_byte(0xc8+r);
1495     }
1496     else {
1497     emit_byte(0xc1);
1498     emit_byte(0xc8+r);
1499     emit_byte(i);
1500     }
1501     }
1502     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1503    
1504     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1505     {
1506     emit_byte(0xd3);
1507     emit_byte(0xc8+d);
1508     }
1509     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1510    
1511     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1512     {
1513     emit_byte(0x66);
1514     emit_byte(0xd3);
1515     emit_byte(0xc8+d);
1516     }
1517     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1518    
1519     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1520     {
1521     emit_byte(0xd2);
1522     emit_byte(0xc8+d);
1523     }
1524     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1525    
1526     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1527     {
1528     emit_byte(0xd3);
1529     emit_byte(0xe8+d);
1530     }
1531     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1532    
1533     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1534     {
1535     emit_byte(0x66);
1536     emit_byte(0xd3);
1537     emit_byte(0xe8+d);
1538     }
1539     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1540    
1541     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1542     {
1543     emit_byte(0xd2);
1544     emit_byte(0xe8+d);
1545     }
1546     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1547    
1548     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1549     {
1550     emit_byte(0xd3);
1551     emit_byte(0xf8+d);
1552     }
1553     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1554    
1555     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1556     {
1557     emit_byte(0x66);
1558     emit_byte(0xd3);
1559     emit_byte(0xf8+d);
1560     }
1561     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1562    
1563     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1564     {
1565     emit_byte(0xd2);
1566     emit_byte(0xf8+d);
1567     }
1568     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1569    
1570     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1571     {
1572     if (optimize_shift_once && (i == 1)) {
1573     emit_byte(0xd1);
1574     emit_byte(0xe0+r);
1575     }
1576     else {
1577     emit_byte(0xc1);
1578     emit_byte(0xe0+r);
1579     emit_byte(i);
1580     }
1581     }
1582     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1583    
1584     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1585     {
1586     emit_byte(0x66);
1587     emit_byte(0xc1);
1588     emit_byte(0xe0+r);
1589     emit_byte(i);
1590     }
1591     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1592    
1593     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1594     {
1595     if (optimize_shift_once && (i == 1)) {
1596     emit_byte(0xd0);
1597     emit_byte(0xe0+r);
1598     }
1599     else {
1600     emit_byte(0xc0);
1601     emit_byte(0xe0+r);
1602     emit_byte(i);
1603     }
1604     }
1605     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1606    
1607     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1608     {
1609     if (optimize_shift_once && (i == 1)) {
1610     emit_byte(0xd1);
1611     emit_byte(0xe8+r);
1612     }
1613     else {
1614     emit_byte(0xc1);
1615     emit_byte(0xe8+r);
1616     emit_byte(i);
1617     }
1618     }
1619     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1620    
1621     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1622     {
1623     emit_byte(0x66);
1624     emit_byte(0xc1);
1625     emit_byte(0xe8+r);
1626     emit_byte(i);
1627     }
1628     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1629    
1630     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1631     {
1632     if (optimize_shift_once && (i == 1)) {
1633     emit_byte(0xd0);
1634     emit_byte(0xe8+r);
1635     }
1636     else {
1637     emit_byte(0xc0);
1638     emit_byte(0xe8+r);
1639     emit_byte(i);
1640     }
1641     }
1642     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1643    
1644     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1645     {
1646     if (optimize_shift_once && (i == 1)) {
1647     emit_byte(0xd1);
1648     emit_byte(0xf8+r);
1649     }
1650     else {
1651     emit_byte(0xc1);
1652     emit_byte(0xf8+r);
1653     emit_byte(i);
1654     }
1655     }
1656     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1657    
1658     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1659     {
1660     emit_byte(0x66);
1661     emit_byte(0xc1);
1662     emit_byte(0xf8+r);
1663     emit_byte(i);
1664     }
1665     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1666    
1667     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1668     {
1669     if (optimize_shift_once && (i == 1)) {
1670     emit_byte(0xd0);
1671     emit_byte(0xf8+r);
1672     }
1673     else {
1674     emit_byte(0xc0);
1675     emit_byte(0xf8+r);
1676     emit_byte(i);
1677     }
1678     }
1679     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1680    
1681     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1682     {
1683     emit_byte(0x9e);
1684     }
1685     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1686    
1687     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1688     {
1689     emit_byte(0x0f);
1690     emit_byte(0xa2);
1691     }
1692     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1693    
1694     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1695     {
1696     emit_byte(0x9f);
1697     }
1698     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1699    
1700     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1701     {
1702     emit_byte(0x0f);
1703     emit_byte(0x90+cc);
1704     emit_byte(0xc0+d);
1705     }
1706     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1707    
1708     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1709     {
1710     emit_byte(0x0f);
1711     emit_byte(0x90+cc);
1712     emit_byte(0x05);
1713     emit_long(d);
1714     }
1715     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1716    
1717     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1718     {
1719     if (have_cmov) {
1720     emit_byte(0x0f);
1721     emit_byte(0x40+cc);
1722     emit_byte(0xc0+8*d+s);
1723     }
1724     else { /* replacement using branch and mov */
1725     int uncc=(cc^1);
1726     emit_byte(0x70+uncc);
1727     emit_byte(2); /* skip next 2 bytes if not cc=true */
1728     emit_byte(0x89);
1729     emit_byte(0xc0+8*s+d);
1730     }
1731     }
1732     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1733    
1734     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1735     {
1736     emit_byte(0x0f);
1737     emit_byte(0xbc);
1738     emit_byte(0xc0+8*d+s);
1739     }
1740     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1741    
1742     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1743     {
1744     emit_byte(0x0f);
1745     emit_byte(0xbf);
1746     emit_byte(0xc0+8*d+s);
1747     }
1748     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1749    
1750     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1751     {
1752     emit_byte(0x0f);
1753     emit_byte(0xbe);
1754     emit_byte(0xc0+8*d+s);
1755     }
1756     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1757    
1758     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1759     {
1760     emit_byte(0x0f);
1761     emit_byte(0xb7);
1762     emit_byte(0xc0+8*d+s);
1763     }
1764     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1765    
1766     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1767     {
1768     emit_byte(0x0f);
1769     emit_byte(0xb6);
1770     emit_byte(0xc0+8*d+s);
1771     }
1772     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1773    
1774     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1775     {
1776     emit_byte(0x0f);
1777     emit_byte(0xaf);
1778     emit_byte(0xc0+8*d+s);
1779     }
1780     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1781    
1782     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1783     {
1784     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1785     abort();
1786     emit_byte(0xf7);
1787     emit_byte(0xea);
1788     }
1789     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1790    
1791     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1792     {
1793     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1794     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1795     abort();
1796     }
1797     emit_byte(0xf7);
1798     emit_byte(0xe2);
1799     }
1800     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1801    
1802     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1803     {
1804     abort(); /* %^$&%^$%#^ x86! */
1805     emit_byte(0x0f);
1806     emit_byte(0xaf);
1807     emit_byte(0xc0+8*d+s);
1808     }
1809     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1810    
1811     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1812     {
1813     emit_byte(0x88);
1814     emit_byte(0xc0+8*s+d);
1815     }
1816     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1817    
1818     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1819     {
1820     emit_byte(0x66);
1821     emit_byte(0x89);
1822     emit_byte(0xc0+8*s+d);
1823     }
1824     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1825    
1826     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1827     {
1828     int isebp=(baser==5)?0x40:0;
1829     int fi;
1830    
1831     switch(factor) {
1832     case 1: fi=0; break;
1833     case 2: fi=1; break;
1834     case 4: fi=2; break;
1835     case 8: fi=3; break;
1836     default: abort();
1837     }
1838    
1839    
1840     emit_byte(0x8b);
1841     emit_byte(0x04+8*d+isebp);
1842     emit_byte(baser+8*index+0x40*fi);
1843     if (isebp)
1844     emit_byte(0x00);
1845     }
1846     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1847    
1848     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1849     {
1850     int fi;
1851     int isebp;
1852    
1853     switch(factor) {
1854     case 1: fi=0; break;
1855     case 2: fi=1; break;
1856     case 4: fi=2; break;
1857     case 8: fi=3; break;
1858     default: abort();
1859     }
1860     isebp=(baser==5)?0x40:0;
1861    
1862     emit_byte(0x66);
1863     emit_byte(0x8b);
1864     emit_byte(0x04+8*d+isebp);
1865     emit_byte(baser+8*index+0x40*fi);
1866     if (isebp)
1867     emit_byte(0x00);
1868     }
1869     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1870    
1871     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1872     {
1873     int fi;
1874     int isebp;
1875    
1876     switch(factor) {
1877     case 1: fi=0; break;
1878     case 2: fi=1; break;
1879     case 4: fi=2; break;
1880     case 8: fi=3; break;
1881     default: abort();
1882     }
1883     isebp=(baser==5)?0x40:0;
1884    
1885     emit_byte(0x8a);
1886     emit_byte(0x04+8*d+isebp);
1887     emit_byte(baser+8*index+0x40*fi);
1888     if (isebp)
1889     emit_byte(0x00);
1890     }
1891     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1892    
1893     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1894     {
1895     int fi;
1896     int isebp;
1897    
1898     switch(factor) {
1899     case 1: fi=0; break;
1900     case 2: fi=1; break;
1901     case 4: fi=2; break;
1902     case 8: fi=3; break;
1903     default: abort();
1904     }
1905    
1906    
1907     isebp=(baser==5)?0x40:0;
1908    
1909     emit_byte(0x89);
1910     emit_byte(0x04+8*s+isebp);
1911     emit_byte(baser+8*index+0x40*fi);
1912     if (isebp)
1913     emit_byte(0x00);
1914     }
1915     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1916    
1917     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1918     {
1919     int fi;
1920     int isebp;
1921    
1922     switch(factor) {
1923     case 1: fi=0; break;
1924     case 2: fi=1; break;
1925     case 4: fi=2; break;
1926     case 8: fi=3; break;
1927     default: abort();
1928     }
1929     isebp=(baser==5)?0x40:0;
1930    
1931     emit_byte(0x66);
1932     emit_byte(0x89);
1933     emit_byte(0x04+8*s+isebp);
1934     emit_byte(baser+8*index+0x40*fi);
1935     if (isebp)
1936     emit_byte(0x00);
1937     }
1938     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1939    
1940     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1941     {
1942     int fi;
1943     int isebp;
1944    
1945     switch(factor) {
1946     case 1: fi=0; break;
1947     case 2: fi=1; break;
1948     case 4: fi=2; break;
1949     case 8: fi=3; break;
1950     default: abort();
1951     }
1952     isebp=(baser==5)?0x40:0;
1953    
1954     emit_byte(0x88);
1955     emit_byte(0x04+8*s+isebp);
1956     emit_byte(baser+8*index+0x40*fi);
1957     if (isebp)
1958     emit_byte(0x00);
1959     }
1960     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1961    
1962     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1963     {
1964     int fi;
1965    
1966     switch(factor) {
1967     case 1: fi=0; break;
1968     case 2: fi=1; break;
1969     case 4: fi=2; break;
1970     case 8: fi=3; break;
1971     default: abort();
1972     }
1973    
1974     emit_byte(0x89);
1975     emit_byte(0x84+8*s);
1976     emit_byte(baser+8*index+0x40*fi);
1977     emit_long(base);
1978     }
1979     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1980    
1981     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1982     {
1983     int fi;
1984    
1985     switch(factor) {
1986     case 1: fi=0; break;
1987     case 2: fi=1; break;
1988     case 4: fi=2; break;
1989     case 8: fi=3; break;
1990     default: abort();
1991     }
1992    
1993     emit_byte(0x66);
1994     emit_byte(0x89);
1995     emit_byte(0x84+8*s);
1996     emit_byte(baser+8*index+0x40*fi);
1997     emit_long(base);
1998     }
1999     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2000    
2001     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2002     {
2003     int fi;
2004    
2005     switch(factor) {
2006     case 1: fi=0; break;
2007     case 2: fi=1; break;
2008     case 4: fi=2; break;
2009     case 8: fi=3; break;
2010     default: abort();
2011     }
2012    
2013     emit_byte(0x88);
2014     emit_byte(0x84+8*s);
2015     emit_byte(baser+8*index+0x40*fi);
2016     emit_long(base);
2017     }
2018     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2019    
2020     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2021     {
2022     int fi;
2023    
2024     switch(factor) {
2025     case 1: fi=0; break;
2026     case 2: fi=1; break;
2027     case 4: fi=2; break;
2028     case 8: fi=3; break;
2029     default: abort();
2030     }
2031    
2032     emit_byte(0x8b);
2033     emit_byte(0x84+8*d);
2034     emit_byte(baser+8*index+0x40*fi);
2035     emit_long(base);
2036     }
2037     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2038    
2039     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2040     {
2041     int fi;
2042    
2043     switch(factor) {
2044     case 1: fi=0; break;
2045     case 2: fi=1; break;
2046     case 4: fi=2; break;
2047     case 8: fi=3; break;
2048     default: abort();
2049     }
2050    
2051     emit_byte(0x66);
2052     emit_byte(0x8b);
2053     emit_byte(0x84+8*d);
2054     emit_byte(baser+8*index+0x40*fi);
2055     emit_long(base);
2056     }
2057     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2058    
2059     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2060     {
2061     int fi;
2062    
2063     switch(factor) {
2064     case 1: fi=0; break;
2065     case 2: fi=1; break;
2066     case 4: fi=2; break;
2067     case 8: fi=3; break;
2068     default: abort();
2069     }
2070    
2071     emit_byte(0x8a);
2072     emit_byte(0x84+8*d);
2073     emit_byte(baser+8*index+0x40*fi);
2074     emit_long(base);
2075     }
2076     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2077    
2078     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2079     {
2080     int fi;
2081     switch(factor) {
2082     case 1: fi=0; break;
2083     case 2: fi=1; break;
2084     case 4: fi=2; break;
2085     case 8: fi=3; break;
2086     default:
2087     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2088     abort();
2089     }
2090     emit_byte(0x8b);
2091     emit_byte(0x04+8*d);
2092     emit_byte(0x05+8*index+64*fi);
2093     emit_long(base);
2094     }
2095     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2096    
2097     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2098     {
2099     int fi;
2100     switch(factor) {
2101     case 1: fi=0; break;
2102     case 2: fi=1; break;
2103     case 4: fi=2; break;
2104     case 8: fi=3; break;
2105     default:
2106     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2107     abort();
2108     }
2109     if (have_cmov) {
2110     emit_byte(0x0f);
2111     emit_byte(0x40+cond);
2112     emit_byte(0x04+8*d);
2113     emit_byte(0x05+8*index+64*fi);
2114     emit_long(base);
2115     }
2116     else { /* replacement using branch and mov */
2117     int uncc=(cond^1);
2118     emit_byte(0x70+uncc);
2119     emit_byte(7); /* skip next 7 bytes if not cc=true */
2120     emit_byte(0x8b);
2121     emit_byte(0x04+8*d);
2122     emit_byte(0x05+8*index+64*fi);
2123     emit_long(base);
2124     }
2125     }
2126     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2127    
2128     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2129     {
2130     if (have_cmov) {
2131     emit_byte(0x0f);
2132     emit_byte(0x40+cond);
2133     emit_byte(0x05+8*d);
2134     emit_long(mem);
2135     }
2136     else { /* replacement using branch and mov */
2137     int uncc=(cond^1);
2138     emit_byte(0x70+uncc);
2139     emit_byte(6); /* skip next 6 bytes if not cc=true */
2140     emit_byte(0x8b);
2141     emit_byte(0x05+8*d);
2142     emit_long(mem);
2143     }
2144     }
2145     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2146    
2147     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2148     {
2149 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2150 gbeauche 1.1 emit_byte(0x8b);
2151     emit_byte(0x40+8*d+s);
2152     emit_byte(offset);
2153     }
2154     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2155    
2156     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2157     {
2158 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2159 gbeauche 1.1 emit_byte(0x66);
2160     emit_byte(0x8b);
2161     emit_byte(0x40+8*d+s);
2162     emit_byte(offset);
2163     }
2164     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2165    
2166     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2167     {
2168 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2169 gbeauche 1.1 emit_byte(0x8a);
2170     emit_byte(0x40+8*d+s);
2171     emit_byte(offset);
2172     }
2173     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2174    
2175     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2176     {
2177     emit_byte(0x8b);
2178     emit_byte(0x80+8*d+s);
2179     emit_long(offset);
2180     }
2181     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2182    
2183     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2184     {
2185     emit_byte(0x66);
2186     emit_byte(0x8b);
2187     emit_byte(0x80+8*d+s);
2188     emit_long(offset);
2189     }
2190     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2191    
2192     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2193     {
2194     emit_byte(0x8a);
2195     emit_byte(0x80+8*d+s);
2196     emit_long(offset);
2197     }
2198     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2199    
2200     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2201     {
2202 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2203 gbeauche 1.1 emit_byte(0xc7);
2204     emit_byte(0x40+d);
2205     emit_byte(offset);
2206     emit_long(i);
2207     }
2208     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2209    
2210     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2211     {
2212 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2213 gbeauche 1.1 emit_byte(0x66);
2214     emit_byte(0xc7);
2215     emit_byte(0x40+d);
2216     emit_byte(offset);
2217     emit_word(i);
2218     }
2219     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2220    
2221     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2222     {
2223 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2224 gbeauche 1.1 emit_byte(0xc6);
2225     emit_byte(0x40+d);
2226     emit_byte(offset);
2227     emit_byte(i);
2228     }
2229     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2230    
2231     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2232     {
2233 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2234 gbeauche 1.1 emit_byte(0x89);
2235     emit_byte(0x40+8*s+d);
2236     emit_byte(offset);
2237     }
2238     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2239    
2240     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2241     {
2242 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2243 gbeauche 1.1 emit_byte(0x66);
2244     emit_byte(0x89);
2245     emit_byte(0x40+8*s+d);
2246     emit_byte(offset);
2247     }
2248     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2249    
2250     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2251     {
2252 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2253 gbeauche 1.1 emit_byte(0x88);
2254     emit_byte(0x40+8*s+d);
2255     emit_byte(offset);
2256     }
2257     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2258    
2259     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2260     {
2261     if (optimize_imm8 && isbyte(offset)) {
2262     emit_byte(0x8d);
2263     emit_byte(0x40+8*d+s);
2264     emit_byte(offset);
2265     }
2266     else {
2267     emit_byte(0x8d);
2268     emit_byte(0x80+8*d+s);
2269     emit_long(offset);
2270     }
2271     }
2272     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2273    
2274     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2275     {
2276     int fi;
2277    
2278     switch(factor) {
2279     case 1: fi=0; break;
2280     case 2: fi=1; break;
2281     case 4: fi=2; break;
2282     case 8: fi=3; break;
2283     default: abort();
2284     }
2285    
2286     if (optimize_imm8 && isbyte(offset)) {
2287     emit_byte(0x8d);
2288     emit_byte(0x44+8*d);
2289     emit_byte(0x40*fi+8*index+s);
2290     emit_byte(offset);
2291     }
2292     else {
2293     emit_byte(0x8d);
2294     emit_byte(0x84+8*d);
2295     emit_byte(0x40*fi+8*index+s);
2296     emit_long(offset);
2297     }
2298     }
2299     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2300    
2301     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2302     {
2303     int isebp=(s==5)?0x40:0;
2304     int fi;
2305    
2306     switch(factor) {
2307     case 1: fi=0; break;
2308     case 2: fi=1; break;
2309     case 4: fi=2; break;
2310     case 8: fi=3; break;
2311     default: abort();
2312     }
2313    
2314     emit_byte(0x8d);
2315     emit_byte(0x04+8*d+isebp);
2316     emit_byte(0x40*fi+8*index+s);
2317     if (isebp)
2318     emit_byte(0);
2319     }
2320     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2321    
2322     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2323     {
2324     if (optimize_imm8 && isbyte(offset)) {
2325     emit_byte(0x89);
2326     emit_byte(0x40+8*s+d);
2327     emit_byte(offset);
2328     }
2329     else {
2330     emit_byte(0x89);
2331     emit_byte(0x80+8*s+d);
2332     emit_long(offset);
2333     }
2334     }
2335     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2336    
2337     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2338     {
2339     emit_byte(0x66);
2340     emit_byte(0x89);
2341     emit_byte(0x80+8*s+d);
2342     emit_long(offset);
2343     }
2344     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2345    
2346     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2347     {
2348     if (optimize_imm8 && isbyte(offset)) {
2349     emit_byte(0x88);
2350     emit_byte(0x40+8*s+d);
2351     emit_byte(offset);
2352     }
2353     else {
2354     emit_byte(0x88);
2355     emit_byte(0x80+8*s+d);
2356     emit_long(offset);
2357     }
2358     }
2359     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2360    
2361     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2362     {
2363     emit_byte(0x0f);
2364     emit_byte(0xc8+r);
2365     }
2366     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2367    
2368     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2369     {
2370     emit_byte(0x66);
2371     emit_byte(0xc1);
2372     emit_byte(0xc0+r);
2373     emit_byte(0x08);
2374     }
2375     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2376    
2377     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2378     {
2379     emit_byte(0x89);
2380     emit_byte(0xc0+8*s+d);
2381     }
2382     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2383    
2384     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2385     {
2386     emit_byte(0x89);
2387     emit_byte(0x05+8*s);
2388     emit_long(d);
2389     }
2390     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2391    
2392     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2393     {
2394     emit_byte(0x66);
2395     emit_byte(0x89);
2396     emit_byte(0x05+8*s);
2397     emit_long(d);
2398     }
2399     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2400    
2401     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2402     {
2403     emit_byte(0x66);
2404     emit_byte(0x8b);
2405     emit_byte(0x05+8*d);
2406     emit_long(s);
2407     }
2408     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2409    
2410     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2411     {
2412     emit_byte(0x88);
2413     emit_byte(0x05+8*s);
2414     emit_long(d);
2415     }
2416     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2417    
2418     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2419     {
2420     emit_byte(0x8a);
2421     emit_byte(0x05+8*d);
2422     emit_long(s);
2423     }
2424     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2425    
2426     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2427     {
2428     emit_byte(0xb8+d);
2429     emit_long(s);
2430     }
2431     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2432    
2433     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2434     {
2435     emit_byte(0x66);
2436     emit_byte(0xb8+d);
2437     emit_word(s);
2438     }
2439     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2440    
2441     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2442     {
2443     emit_byte(0xb0+d);
2444     emit_byte(s);
2445     }
2446     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2447    
2448     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2449     {
2450     emit_byte(0x81);
2451     emit_byte(0x15);
2452     emit_long(d);
2453     emit_long(s);
2454     }
2455     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2456    
2457     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2458     {
2459     if (optimize_imm8 && isbyte(s)) {
2460     emit_byte(0x83);
2461     emit_byte(0x05);
2462     emit_long(d);
2463     emit_byte(s);
2464     }
2465     else {
2466     emit_byte(0x81);
2467     emit_byte(0x05);
2468     emit_long(d);
2469     emit_long(s);
2470     }
2471     }
2472     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2473    
2474     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2475     {
2476     emit_byte(0x66);
2477     emit_byte(0x81);
2478     emit_byte(0x05);
2479     emit_long(d);
2480     emit_word(s);
2481     }
2482     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2483    
2484     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2485     {
2486     emit_byte(0x80);
2487     emit_byte(0x05);
2488     emit_long(d);
2489     emit_byte(s);
2490     }
2491     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2492    
2493     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2494     {
2495 gbeauche 1.2 if (optimize_accum && isaccum(d))
2496     emit_byte(0xa9);
2497     else {
2498 gbeauche 1.1 emit_byte(0xf7);
2499     emit_byte(0xc0+d);
2500 gbeauche 1.2 }
2501 gbeauche 1.1 emit_long(i);
2502     }
2503     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2504    
2505     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2506     {
2507     emit_byte(0x85);
2508     emit_byte(0xc0+8*s+d);
2509     }
2510     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2511    
2512     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2513     {
2514     emit_byte(0x66);
2515     emit_byte(0x85);
2516     emit_byte(0xc0+8*s+d);
2517     }
2518     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2519    
2520     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2521     {
2522     emit_byte(0x84);
2523     emit_byte(0xc0+8*s+d);
2524     }
2525     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2526    
2527 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2528     {
2529     emit_byte(0x81);
2530     emit_byte(0xf0+d);
2531     emit_long(i);
2532     }
2533     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2534    
2535 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2536     {
2537     if (optimize_imm8 && isbyte(i)) {
2538 gbeauche 1.2 emit_byte(0x83);
2539     emit_byte(0xe0+d);
2540     emit_byte(i);
2541 gbeauche 1.1 }
2542     else {
2543 gbeauche 1.2 if (optimize_accum && isaccum(d))
2544     emit_byte(0x25);
2545     else {
2546     emit_byte(0x81);
2547     emit_byte(0xe0+d);
2548     }
2549     emit_long(i);
2550 gbeauche 1.1 }
2551     }
2552     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2553    
2554     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2555     {
2556 gbeauche 1.2 emit_byte(0x66);
2557     if (optimize_imm8 && isbyte(i)) {
2558     emit_byte(0x83);
2559     emit_byte(0xe0+d);
2560     emit_byte(i);
2561     }
2562     else {
2563     if (optimize_accum && isaccum(d))
2564     emit_byte(0x25);
2565     else {
2566     emit_byte(0x81);
2567     emit_byte(0xe0+d);
2568     }
2569     emit_word(i);
2570     }
2571 gbeauche 1.1 }
2572     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2573    
2574     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2575     {
2576     emit_byte(0x21);
2577     emit_byte(0xc0+8*s+d);
2578     }
2579     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2580    
2581     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2582     {
2583     emit_byte(0x66);
2584     emit_byte(0x21);
2585     emit_byte(0xc0+8*s+d);
2586     }
2587     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2588    
2589     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2590     {
2591     emit_byte(0x20);
2592     emit_byte(0xc0+8*s+d);
2593     }
2594     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2595    
2596     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2597     {
2598     if (optimize_imm8 && isbyte(i)) {
2599     emit_byte(0x83);
2600     emit_byte(0xc8+d);
2601     emit_byte(i);
2602     }
2603     else {
2604 gbeauche 1.2 if (optimize_accum && isaccum(d))
2605     emit_byte(0x0d);
2606     else {
2607 gbeauche 1.1 emit_byte(0x81);
2608     emit_byte(0xc8+d);
2609 gbeauche 1.2 }
2610 gbeauche 1.1 emit_long(i);
2611     }
2612     }
2613     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2614    
2615     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2616     {
2617     emit_byte(0x09);
2618     emit_byte(0xc0+8*s+d);
2619     }
2620     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2621    
2622     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2623     {
2624     emit_byte(0x66);
2625     emit_byte(0x09);
2626     emit_byte(0xc0+8*s+d);
2627     }
2628     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2629    
2630     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2631     {
2632     emit_byte(0x08);
2633     emit_byte(0xc0+8*s+d);
2634     }
2635     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2636    
2637     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2638     {
2639     emit_byte(0x11);
2640     emit_byte(0xc0+8*s+d);
2641     }
2642     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2643    
2644     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2645     {
2646     emit_byte(0x66);
2647     emit_byte(0x11);
2648     emit_byte(0xc0+8*s+d);
2649     }
2650     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2651    
2652     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2653     {
2654     emit_byte(0x10);
2655     emit_byte(0xc0+8*s+d);
2656     }
2657     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2658    
2659     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2660     {
2661     emit_byte(0x01);
2662     emit_byte(0xc0+8*s+d);
2663     }
2664     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2665    
2666     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2667     {
2668     emit_byte(0x66);
2669     emit_byte(0x01);
2670     emit_byte(0xc0+8*s+d);
2671     }
2672     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2673    
2674     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2675     {
2676     emit_byte(0x00);
2677     emit_byte(0xc0+8*s+d);
2678     }
2679     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2680    
2681     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2682     {
2683     if (isbyte(i)) {
2684     emit_byte(0x83);
2685     emit_byte(0xe8+d);
2686     emit_byte(i);
2687     }
2688     else {
2689 gbeauche 1.2 if (optimize_accum && isaccum(d))
2690     emit_byte(0x2d);
2691     else {
2692 gbeauche 1.1 emit_byte(0x81);
2693     emit_byte(0xe8+d);
2694 gbeauche 1.2 }
2695 gbeauche 1.1 emit_long(i);
2696     }
2697     }
2698     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2699    
2700     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2701     {
2702 gbeauche 1.2 if (optimize_accum && isaccum(d))
2703     emit_byte(0x2c);
2704     else {
2705 gbeauche 1.1 emit_byte(0x80);
2706     emit_byte(0xe8+d);
2707 gbeauche 1.2 }
2708 gbeauche 1.1 emit_byte(i);
2709     }
2710     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2711    
2712     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2713     {
2714     if (isbyte(i)) {
2715     emit_byte(0x83);
2716     emit_byte(0xc0+d);
2717     emit_byte(i);
2718     }
2719     else {
2720 gbeauche 1.2 if (optimize_accum && isaccum(d))
2721     emit_byte(0x05);
2722     else {
2723 gbeauche 1.1 emit_byte(0x81);
2724     emit_byte(0xc0+d);
2725 gbeauche 1.2 }
2726 gbeauche 1.1 emit_long(i);
2727     }
2728     }
2729     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2730    
2731     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2732     {
2733 gbeauche 1.2 emit_byte(0x66);
2734 gbeauche 1.1 if (isbyte(i)) {
2735     emit_byte(0x83);
2736     emit_byte(0xc0+d);
2737     emit_byte(i);
2738     }
2739     else {
2740 gbeauche 1.2 if (optimize_accum && isaccum(d))
2741     emit_byte(0x05);
2742     else {
2743 gbeauche 1.1 emit_byte(0x81);
2744     emit_byte(0xc0+d);
2745 gbeauche 1.2 }
2746 gbeauche 1.1 emit_word(i);
2747     }
2748     }
2749     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2750    
2751     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2752     {
2753 gbeauche 1.2 if (optimize_accum && isaccum(d))
2754     emit_byte(0x04);
2755     else {
2756     emit_byte(0x80);
2757     emit_byte(0xc0+d);
2758     }
2759 gbeauche 1.1 emit_byte(i);
2760     }
2761     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2762    
2763     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2764     {
2765     emit_byte(0x19);
2766     emit_byte(0xc0+8*s+d);
2767     }
2768     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2769    
2770     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2771     {
2772     emit_byte(0x66);
2773     emit_byte(0x19);
2774     emit_byte(0xc0+8*s+d);
2775     }
2776     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2777    
2778     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2779     {
2780     emit_byte(0x18);
2781     emit_byte(0xc0+8*s+d);
2782     }
2783     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2784    
2785     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2786     {
2787     emit_byte(0x29);
2788     emit_byte(0xc0+8*s+d);
2789     }
2790     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2791    
2792     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2793     {
2794     emit_byte(0x66);
2795     emit_byte(0x29);
2796     emit_byte(0xc0+8*s+d);
2797     }
2798     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2799    
2800     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2801     {
2802     emit_byte(0x28);
2803     emit_byte(0xc0+8*s+d);
2804     }
2805     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2806    
2807     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2808     {
2809     emit_byte(0x39);
2810     emit_byte(0xc0+8*s+d);
2811     }
2812     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2813    
2814     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2815     {
2816     if (optimize_imm8 && isbyte(i)) {
2817     emit_byte(0x83);
2818     emit_byte(0xf8+r);
2819     emit_byte(i);
2820     }
2821     else {
2822 gbeauche 1.2 if (optimize_accum && isaccum(r))
2823     emit_byte(0x3d);
2824     else {
2825 gbeauche 1.1 emit_byte(0x81);
2826     emit_byte(0xf8+r);
2827 gbeauche 1.2 }
2828 gbeauche 1.1 emit_long(i);
2829     }
2830     }
2831     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2832    
2833     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2834     {
2835     emit_byte(0x66);
2836     emit_byte(0x39);
2837     emit_byte(0xc0+8*s+d);
2838     }
2839     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2840    
2841 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2842     {
2843     emit_byte(0x80);
2844     emit_byte(0x3d);
2845     emit_long(d);
2846     emit_byte(s);
2847     }
2848     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2849    
2850 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2851     {
2852 gbeauche 1.2 if (optimize_accum && isaccum(d))
2853     emit_byte(0x3c);
2854     else {
2855 gbeauche 1.1 emit_byte(0x80);
2856     emit_byte(0xf8+d);
2857 gbeauche 1.2 }
2858 gbeauche 1.1 emit_byte(i);
2859     }
2860     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2861    
2862     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2863     {
2864     emit_byte(0x38);
2865     emit_byte(0xc0+8*s+d);
2866     }
2867     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2868    
2869     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2870     {
2871     int fi;
2872    
2873     switch(factor) {
2874     case 1: fi=0; break;
2875     case 2: fi=1; break;
2876     case 4: fi=2; break;
2877     case 8: fi=3; break;
2878     default: abort();
2879     }
2880     emit_byte(0x39);
2881     emit_byte(0x04+8*d);
2882     emit_byte(5+8*index+0x40*fi);
2883     emit_long(offset);
2884     }
2885     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2886    
2887     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2888     {
2889     emit_byte(0x31);
2890     emit_byte(0xc0+8*s+d);
2891     }
2892     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2893    
2894     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2895     {
2896     emit_byte(0x66);
2897     emit_byte(0x31);
2898     emit_byte(0xc0+8*s+d);
2899     }
2900     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2901    
2902     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2903     {
2904     emit_byte(0x30);
2905     emit_byte(0xc0+8*s+d);
2906     }
2907     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2908    
2909     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2910     {
2911     if (optimize_imm8 && isbyte(s)) {
2912     emit_byte(0x83);
2913     emit_byte(0x2d);
2914     emit_long(d);
2915     emit_byte(s);
2916     }
2917     else {
2918     emit_byte(0x81);
2919     emit_byte(0x2d);
2920     emit_long(d);
2921     emit_long(s);
2922     }
2923     }
2924     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2925    
2926     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2927     {
2928     if (optimize_imm8 && isbyte(s)) {
2929     emit_byte(0x83);
2930     emit_byte(0x3d);
2931     emit_long(d);
2932     emit_byte(s);
2933     }
2934     else {
2935     emit_byte(0x81);
2936     emit_byte(0x3d);
2937     emit_long(d);
2938     emit_long(s);
2939     }
2940     }
2941     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2942    
2943     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2944     {
2945     emit_byte(0x87);
2946     emit_byte(0xc0+8*r1+r2);
2947     }
2948     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2949    
2950     /*************************************************************************
2951     * FIXME: mem access modes probably wrong *
2952     *************************************************************************/
2953    
2954     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2955     {
2956     emit_byte(0x9c);
2957     }
2958     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2959    
2960     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2961     {
2962     emit_byte(0x9d);
2963     }
2964     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2965 gbeauche 1.13
2966     #endif
2967 gbeauche 1.1
2968     /*************************************************************************
2969     * Unoptimizable stuff --- jump *
2970     *************************************************************************/
2971    
2972     static __inline__ void raw_call_r(R4 r)
2973     {
2974 gbeauche 1.20 #if USE_NEW_RTASM
2975     CALLsr(r);
2976     #else
2977 gbeauche 1.1 emit_byte(0xff);
2978     emit_byte(0xd0+r);
2979 gbeauche 1.20 #endif
2980 gbeauche 1.5 }
2981    
2982     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2983     {
2984 gbeauche 1.20 #if USE_NEW_RTASM
2985     CALLsm(base, X86_NOREG, r, m);
2986     #else
2987 gbeauche 1.5 int mu;
2988     switch(m) {
2989     case 1: mu=0; break;
2990     case 2: mu=1; break;
2991     case 4: mu=2; break;
2992     case 8: mu=3; break;
2993     default: abort();
2994     }
2995     emit_byte(0xff);
2996     emit_byte(0x14);
2997     emit_byte(0x05+8*r+0x40*mu);
2998     emit_long(base);
2999 gbeauche 1.20 #endif
3000 gbeauche 1.1 }
3001    
3002     static __inline__ void raw_jmp_r(R4 r)
3003     {
3004 gbeauche 1.20 #if USE_NEW_RTASM
3005     JMPsr(r);
3006     #else
3007 gbeauche 1.1 emit_byte(0xff);
3008     emit_byte(0xe0+r);
3009 gbeauche 1.20 #endif
3010 gbeauche 1.1 }
3011    
3012     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3013     {
3014 gbeauche 1.20 #if USE_NEW_RTASM
3015     JMPsm(base, X86_NOREG, r, m);
3016     #else
3017 gbeauche 1.1 int mu;
3018     switch(m) {
3019     case 1: mu=0; break;
3020     case 2: mu=1; break;
3021     case 4: mu=2; break;
3022     case 8: mu=3; break;
3023     default: abort();
3024     }
3025     emit_byte(0xff);
3026     emit_byte(0x24);
3027     emit_byte(0x05+8*r+0x40*mu);
3028     emit_long(base);
3029 gbeauche 1.20 #endif
3030 gbeauche 1.1 }
3031    
3032     static __inline__ void raw_jmp_m(uae_u32 base)
3033     {
3034     emit_byte(0xff);
3035     emit_byte(0x25);
3036     emit_long(base);
3037     }
3038    
3039    
3040     static __inline__ void raw_call(uae_u32 t)
3041     {
3042 gbeauche 1.20 #if USE_NEW_RTASM
3043     CALLm(t);
3044     #else
3045 gbeauche 1.1 emit_byte(0xe8);
3046     emit_long(t-(uae_u32)target-4);
3047 gbeauche 1.20 #endif
3048 gbeauche 1.1 }
3049    
3050     static __inline__ void raw_jmp(uae_u32 t)
3051     {
3052 gbeauche 1.20 #if USE_NEW_RTASM
3053     JMPm(t);
3054     #else
3055 gbeauche 1.1 emit_byte(0xe9);
3056     emit_long(t-(uae_u32)target-4);
3057 gbeauche 1.20 #endif
3058 gbeauche 1.1 }
3059    
3060     static __inline__ void raw_jl(uae_u32 t)
3061     {
3062     emit_byte(0x0f);
3063     emit_byte(0x8c);
3064 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3065 gbeauche 1.1 }
3066    
3067     static __inline__ void raw_jz(uae_u32 t)
3068     {
3069     emit_byte(0x0f);
3070     emit_byte(0x84);
3071 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3072 gbeauche 1.1 }
3073    
3074     static __inline__ void raw_jnz(uae_u32 t)
3075     {
3076     emit_byte(0x0f);
3077     emit_byte(0x85);
3078 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3079 gbeauche 1.1 }
3080    
3081     static __inline__ void raw_jnz_l_oponly(void)
3082     {
3083     emit_byte(0x0f);
3084     emit_byte(0x85);
3085     }
3086    
3087     static __inline__ void raw_jcc_l_oponly(int cc)
3088     {
3089     emit_byte(0x0f);
3090     emit_byte(0x80+cc);
3091     }
3092    
3093     static __inline__ void raw_jnz_b_oponly(void)
3094     {
3095     emit_byte(0x75);
3096     }
3097    
3098     static __inline__ void raw_jz_b_oponly(void)
3099     {
3100     emit_byte(0x74);
3101     }
3102    
3103     static __inline__ void raw_jcc_b_oponly(int cc)
3104     {
3105     emit_byte(0x70+cc);
3106     }
3107    
3108     static __inline__ void raw_jmp_l_oponly(void)
3109     {
3110     emit_byte(0xe9);
3111     }
3112    
3113     static __inline__ void raw_jmp_b_oponly(void)
3114     {
3115     emit_byte(0xeb);
3116     }
3117    
3118     static __inline__ void raw_ret(void)
3119     {
3120     emit_byte(0xc3);
3121     }
3122    
3123     static __inline__ void raw_nop(void)
3124     {
3125     emit_byte(0x90);
3126     }
3127    
3128 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3129     {
3130     /* Source: GNU Binutils 2.12.90.0.15 */
3131     /* Various efficient no-op patterns for aligning code labels.
3132     Note: Don't try to assemble the instructions in the comments.
3133     0L and 0w are not legal. */
3134     static const uae_u8 f32_1[] =
3135     {0x90}; /* nop */
3136     static const uae_u8 f32_2[] =
3137     {0x89,0xf6}; /* movl %esi,%esi */
3138     static const uae_u8 f32_3[] =
3139     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3140     static const uae_u8 f32_4[] =
3141     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3142     static const uae_u8 f32_5[] =
3143     {0x90, /* nop */
3144     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3145     static const uae_u8 f32_6[] =
3146     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3147     static const uae_u8 f32_7[] =
3148     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3149     static const uae_u8 f32_8[] =
3150     {0x90, /* nop */
3151     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3152     static const uae_u8 f32_9[] =
3153     {0x89,0xf6, /* movl %esi,%esi */
3154     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3155     static const uae_u8 f32_10[] =
3156     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3157     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3158     static const uae_u8 f32_11[] =
3159     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3160     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3161     static const uae_u8 f32_12[] =
3162     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3163     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3164     static const uae_u8 f32_13[] =
3165     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3166     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3167     static const uae_u8 f32_14[] =
3168     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3169     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3170     static const uae_u8 f32_15[] =
3171     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3172     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3173     static const uae_u8 f32_16[] =
3174     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3175     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3176     static const uae_u8 *const f32_patt[] = {
3177     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3178     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3179     };
3180 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3181 gbeauche 1.8
3182 gbeauche 1.21 #if defined(__x86_64__)
3183     /* The recommended way to pad 64bit code is to use NOPs preceded by
3184     maximally four 0x66 prefixes. Balance the size of nops. */
3185     if (nbytes == 0)
3186     return;
3187    
3188     int i;
3189     int nnops = (nbytes + 3) / 4;
3190     int len = nbytes / nnops;
3191     int remains = nbytes - nnops * len;
3192    
3193     for (i = 0; i < remains; i++) {
3194     emit_block(prefixes, len);
3195     raw_nop();
3196     }
3197     for (; i < nnops; i++) {
3198     emit_block(prefixes, len - 1);
3199     raw_nop();
3200     }
3201     #else
3202 gbeauche 1.8 int nloops = nbytes / 16;
3203     while (nloops-- > 0)
3204     emit_block(f32_16, sizeof(f32_16));
3205    
3206     nbytes %= 16;
3207     if (nbytes)
3208     emit_block(f32_patt[nbytes - 1], nbytes);
3209 gbeauche 1.21 #endif
3210 gbeauche 1.8 }
3211    
3212 gbeauche 1.1
3213     /*************************************************************************
3214     * Flag handling, to and fro UAE flag register *
3215     *************************************************************************/
3216    
3217     #ifdef SAHF_SETO_PROFITABLE
3218    
3219     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3220    
3221     static __inline__ void raw_flags_to_reg(int r)
3222     {
3223     raw_lahf(0); /* Most flags in AH */
3224     //raw_setcc(r,0); /* V flag in AL */
3225 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3226 gbeauche 1.1
3227     #if 1 /* Let's avoid those nasty partial register stalls */
3228 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3229     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3230 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3231     live.state[FLAGTMP].status=INMEM;
3232     live.state[FLAGTMP].realreg=-1;
3233     /* We just "evicted" FLAGTMP. */
3234     if (live.nat[r].nholds!=1) {
3235     /* Huh? */
3236     abort();
3237     }
3238     live.nat[r].nholds=0;
3239     #endif
3240     }
3241    
3242     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3243     static __inline__ void raw_reg_to_flags(int r)
3244     {
3245     raw_cmp_b_ri(r,-127); /* set V */
3246     raw_sahf(0);
3247     }
3248    
3249 gbeauche 1.24 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3250     static __inline__ void raw_flags_set_zero(int s, int tmp)
3251     {
3252     raw_mov_l_rr(tmp,s);
3253     raw_lahf(s); /* flags into ah */
3254     raw_and_l_ri(s,0xffffbfff);
3255     raw_and_l_ri(tmp,0x00004000);
3256     raw_xor_l_ri(tmp,0x00004000);
3257     raw_or_l(s,tmp);
3258     raw_sahf(s);
3259     }
3260    
3261 gbeauche 1.1 #else
3262    
3263     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3264     static __inline__ void raw_flags_to_reg(int r)
3265     {
3266     raw_pushfl();
3267     raw_pop_l_r(r);
3268 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3269 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3270     live.state[FLAGTMP].status=INMEM;
3271     live.state[FLAGTMP].realreg=-1;
3272     /* We just "evicted" FLAGTMP. */
3273     if (live.nat[r].nholds!=1) {
3274     /* Huh? */
3275     abort();
3276     }
3277     live.nat[r].nholds=0;
3278     }
3279    
3280     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3281     static __inline__ void raw_reg_to_flags(int r)
3282     {
3283     raw_push_l_r(r);
3284     raw_popfl();
3285     }
3286    
3287 gbeauche 1.24 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3288     static __inline__ void raw_flags_set_zero(int s, int tmp)
3289     {
3290     raw_mov_l_rr(tmp,s);
3291     raw_pushfl();
3292     raw_pop_l_r(s);
3293     raw_and_l_ri(s,0xffffffbf);
3294     raw_and_l_ri(tmp,0x00000040);
3295     raw_xor_l_ri(tmp,0x00000040);
3296     raw_or_l(s,tmp);
3297     raw_push_l_r(s);
3298     raw_popfl();
3299     }
3300 gbeauche 1.1 #endif
3301    
3302     /* Apparently, there are enough instructions between flag store and
3303     flag reload to avoid the partial memory stall */
3304     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3305     {
3306     #if 1
3307 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3308 gbeauche 1.1 #else
3309 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3310     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3311 gbeauche 1.1 #endif
3312     }
3313    
3314     /* FLAGX is byte sized, and we *do* write it at that size */
3315     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3316     {
3317     if (live.nat[target].canbyte)
3318 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3319 gbeauche 1.1 else if (live.nat[target].canword)
3320 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3321 gbeauche 1.1 else
3322 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3323 gbeauche 1.1 }
3324    
3325     static __inline__ void raw_inc_sp(int off)
3326     {
3327 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
3328 gbeauche 1.1 }
3329    
3330     /*************************************************************************
3331     * Handling mistaken direct memory access *
3332     *************************************************************************/
3333    
3334     // gb-- I don't need that part for JIT Basilisk II
3335     #if defined(NATMEM_OFFSET) && 0
3336     #include <asm/sigcontext.h>
3337     #include <signal.h>
3338    
3339     #define SIG_READ 1
3340     #define SIG_WRITE 2
3341    
3342     static int in_handler=0;
3343     static uae_u8 veccode[256];
3344    
3345     static void vec(int x, struct sigcontext sc)
3346     {
3347     uae_u8* i=(uae_u8*)sc.eip;
3348     uae_u32 addr=sc.cr2;
3349     int r=-1;
3350     int size=4;
3351     int dir=-1;
3352     int len=0;
3353     int j;
3354    
3355     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3356     if (!canbang)
3357     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3358     if (in_handler)
3359     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3360    
3361     if (canbang && i>=compiled_code && i<=current_compile_p) {
3362     if (*i==0x66) {
3363     i++;
3364     size=2;
3365     len++;
3366     }
3367    
3368     switch(i[0]) {
3369     case 0x8a:
3370     if ((i[1]&0xc0)==0x80) {
3371     r=(i[1]>>3)&7;
3372     dir=SIG_READ;
3373     size=1;
3374     len+=6;
3375     break;
3376     }
3377     break;
3378     case 0x88:
3379     if ((i[1]&0xc0)==0x80) {
3380     r=(i[1]>>3)&7;
3381     dir=SIG_WRITE;
3382     size=1;
3383     len+=6;
3384     break;
3385     }
3386     break;
3387     case 0x8b:
3388     if ((i[1]&0xc0)==0x80) {
3389     r=(i[1]>>3)&7;
3390     dir=SIG_READ;
3391     len+=6;
3392     break;
3393     }
3394     if ((i[1]&0xc0)==0x40) {
3395     r=(i[1]>>3)&7;
3396     dir=SIG_READ;
3397     len+=3;
3398     break;
3399     }
3400     break;
3401     case 0x89:
3402     if ((i[1]&0xc0)==0x80) {
3403     r=(i[1]>>3)&7;
3404     dir=SIG_WRITE;
3405     len+=6;
3406     break;
3407     }
3408     if ((i[1]&0xc0)==0x40) {
3409     r=(i[1]>>3)&7;
3410     dir=SIG_WRITE;
3411     len+=3;
3412     break;
3413     }
3414     break;
3415     }
3416     }
3417    
3418     if (r!=-1) {
3419     void* pr=NULL;
3420     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3421    
3422     switch(r) {
3423     case 0: pr=&(sc.eax); break;
3424     case 1: pr=&(sc.ecx); break;
3425     case 2: pr=&(sc.edx); break;
3426     case 3: pr=&(sc.ebx); break;
3427     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3428     case 5: pr=(size>1)?
3429     (void*)(&(sc.ebp)):
3430     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3431     case 6: pr=(size>1)?
3432     (void*)(&(sc.esi)):
3433     (void*)(((uae_u8*)&(sc.edx))+1); break;
3434     case 7: pr=(size>1)?
3435     (void*)(&(sc.edi)):
3436     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3437     default: abort();
3438     }
3439     if (pr) {
3440     blockinfo* bi;
3441    
3442     if (currprefs.comp_oldsegv) {
3443     addr-=NATMEM_OFFSET;
3444    
3445     if ((addr>=0x10000000 && addr<0x40000000) ||
3446     (addr>=0x50000000)) {
3447     write_log("Suspicious address in %x SEGV handler.\n",addr);
3448     }
3449     if (dir==SIG_READ) {
3450     switch(size) {
3451     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3452     case 2: *((uae_u16*)pr)=get_word(addr); break;
3453     case 4: *((uae_u32*)pr)=get_long(addr); break;
3454     default: abort();
3455     }
3456     }
3457     else { /* write */
3458     switch(size) {
3459     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3460     case 2: put_word(addr,*((uae_u16*)pr)); break;
3461     case 4: put_long(addr,*((uae_u32*)pr)); break;
3462     default: abort();
3463     }
3464     }
3465     write_log("Handled one access!\n");
3466     fflush(stdout);
3467     segvcount++;
3468     sc.eip+=len;
3469     }
3470     else {
3471     void* tmp=target;
3472     int i;
3473     uae_u8 vecbuf[5];
3474    
3475     addr-=NATMEM_OFFSET;
3476    
3477     if ((addr>=0x10000000 && addr<0x40000000) ||
3478     (addr>=0x50000000)) {
3479     write_log("Suspicious address in %x SEGV handler.\n",addr);
3480     }
3481    
3482     target=(uae_u8*)sc.eip;
3483     for (i=0;i<5;i++)
3484     vecbuf[i]=target[i];
3485     emit_byte(0xe9);
3486 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3487 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3488    
3489     write_log("Handled one access!\n");
3490     fflush(stdout);
3491     segvcount++;
3492    
3493     target=veccode;
3494    
3495     if (dir==SIG_READ) {
3496     switch(size) {
3497     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3498     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3499     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3500     default: abort();
3501     }
3502     }
3503     else { /* write */
3504     switch(size) {
3505     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3506     case 2: put_word(addr,*((uae_u16*)pr)); break;
3507     case 4: put_long(addr,*((uae_u32*)pr)); break;
3508     default: abort();
3509     }
3510     }
3511     for (i=0;i<5;i++)
3512     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3513 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3514 gbeauche 1.1 emit_byte(0xe9);
3515 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3516 gbeauche 1.1 in_handler=1;
3517     target=tmp;
3518     }
3519     bi=active;
3520     while (bi) {
3521     if (bi->handler &&
3522     (uae_u8*)bi->direct_handler<=i &&
3523     (uae_u8*)bi->nexthandler>i) {
3524     write_log("deleted trigger (%p<%p<%p) %p\n",
3525     bi->handler,
3526     i,
3527     bi->nexthandler,
3528     bi->pc_p);
3529     invalidate_block(bi);
3530     raise_in_cl_list(bi);
3531     set_special(0);
3532     return;
3533     }
3534     bi=bi->next;
3535     }
3536     /* Not found in the active list. Might be a rom routine that
3537     is in the dormant list */
3538     bi=dormant;
3539     while (bi) {
3540     if (bi->handler &&
3541     (uae_u8*)bi->direct_handler<=i &&
3542     (uae_u8*)bi->nexthandler>i) {
3543     write_log("deleted trigger (%p<%p<%p) %p\n",
3544     bi->handler,
3545     i,
3546     bi->nexthandler,
3547     bi->pc_p);
3548     invalidate_block(bi);
3549     raise_in_cl_list(bi);
3550     set_special(0);
3551     return;
3552     }
3553     bi=bi->next;
3554     }
3555     write_log("Huh? Could not find trigger!\n");
3556     return;
3557     }
3558     }
3559     write_log("Can't handle access!\n");
3560     for (j=0;j<10;j++) {
3561     write_log("instruction byte %2d is %02x\n",j,i[j]);
3562     }
3563     write_log("Please send the above info (starting at \"fault address\") to\n"
3564     "bmeyer@csse.monash.edu.au\n"
3565     "This shouldn't happen ;-)\n");
3566     fflush(stdout);
3567     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3568     }
3569     #endif
3570    
3571    
3572     /*************************************************************************
3573     * Checking for CPU features *
3574     *************************************************************************/
3575    
3576 gbeauche 1.3 struct cpuinfo_x86 {
3577     uae_u8 x86; // CPU family
3578     uae_u8 x86_vendor; // CPU vendor
3579     uae_u8 x86_processor; // CPU canonical processor type
3580     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3581     uae_u32 x86_hwcap;
3582     uae_u8 x86_model;
3583     uae_u8 x86_mask;
3584     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3585     char x86_vendor_id[16];
3586     };
3587     struct cpuinfo_x86 cpuinfo;
3588    
3589     enum {
3590     X86_VENDOR_INTEL = 0,
3591     X86_VENDOR_CYRIX = 1,
3592     X86_VENDOR_AMD = 2,
3593     X86_VENDOR_UMC = 3,
3594     X86_VENDOR_NEXGEN = 4,
3595     X86_VENDOR_CENTAUR = 5,
3596     X86_VENDOR_RISE = 6,
3597     X86_VENDOR_TRANSMETA = 7,
3598     X86_VENDOR_NSC = 8,
3599     X86_VENDOR_UNKNOWN = 0xff
3600     };
3601    
3602     enum {
3603     X86_PROCESSOR_I386, /* 80386 */
3604     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3605     X86_PROCESSOR_PENTIUM,
3606     X86_PROCESSOR_PENTIUMPRO,
3607     X86_PROCESSOR_K6,
3608     X86_PROCESSOR_ATHLON,
3609     X86_PROCESSOR_PENTIUM4,
3610 gbeauche 1.28 X86_PROCESSOR_X86_64,
3611 gbeauche 1.3 X86_PROCESSOR_max
3612     };
3613    
3614     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3615     "80386",
3616     "80486",
3617     "Pentium",
3618     "PentiumPro",
3619     "K6",
3620     "Athlon",
3621 gbeauche 1.16 "Pentium4",
3622 gbeauche 1.28 "x86-64"
3623 gbeauche 1.3 };
3624    
3625     static struct ptt {
3626     const int align_loop;
3627     const int align_loop_max_skip;
3628     const int align_jump;
3629     const int align_jump_max_skip;
3630     const int align_func;
3631     }
3632     x86_alignments[X86_PROCESSOR_max] = {
3633     { 4, 3, 4, 3, 4 },
3634     { 16, 15, 16, 15, 16 },
3635     { 16, 7, 16, 7, 16 },
3636     { 16, 15, 16, 7, 16 },
3637     { 32, 7, 32, 7, 32 },
3638 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3639 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3640     { 16, 7, 16, 7, 16 }
3641 gbeauche 1.3 };
3642 gbeauche 1.1
3643 gbeauche 1.3 static void
3644     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3645 gbeauche 1.1 {
3646 gbeauche 1.3 char *v = c->x86_vendor_id;
3647    
3648     if (!strcmp(v, "GenuineIntel"))
3649     c->x86_vendor = X86_VENDOR_INTEL;
3650     else if (!strcmp(v, "AuthenticAMD"))
3651     c->x86_vendor = X86_VENDOR_AMD;
3652     else if (!strcmp(v, "CyrixInstead"))
3653     c->x86_vendor = X86_VENDOR_CYRIX;
3654     else if (!strcmp(v, "Geode by NSC"))
3655     c->x86_vendor = X86_VENDOR_NSC;
3656     else if (!strcmp(v, "UMC UMC UMC "))
3657     c->x86_vendor = X86_VENDOR_UMC;
3658     else if (!strcmp(v, "CentaurHauls"))
3659     c->x86_vendor = X86_VENDOR_CENTAUR;
3660     else if (!strcmp(v, "NexGenDriven"))
3661     c->x86_vendor = X86_VENDOR_NEXGEN;
3662     else if (!strcmp(v, "RiseRiseRise"))
3663     c->x86_vendor = X86_VENDOR_RISE;
3664     else if (!strcmp(v, "GenuineTMx86") ||
3665     !strcmp(v, "TransmetaCPU"))
3666     c->x86_vendor = X86_VENDOR_TRANSMETA;
3667     else
3668     c->x86_vendor = X86_VENDOR_UNKNOWN;
3669     }
3670 gbeauche 1.1
3671 gbeauche 1.3 static void
3672     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3673     {
3674 gbeauche 1.27 const int CPUID_SPACE = 4096;
3675     uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3676     if (cpuid_space == VM_MAP_FAILED)
3677     abort();
3678     vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3679    
3680 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3681 gbeauche 1.3 uae_u8* tmp=get_target();
3682 gbeauche 1.1
3683 gbeauche 1.20 s_op = op;
3684 gbeauche 1.3 set_target(cpuid_space);
3685     raw_push_l_r(0); /* eax */
3686     raw_push_l_r(1); /* ecx */
3687     raw_push_l_r(2); /* edx */
3688     raw_push_l_r(3); /* ebx */
3689 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3690 gbeauche 1.3 raw_cpuid(0);
3691 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3692     raw_mov_l_mr((uintptr)&s_ebx,3);
3693     raw_mov_l_mr((uintptr)&s_ecx,1);
3694     raw_mov_l_mr((uintptr)&s_edx,2);
3695 gbeauche 1.3 raw_pop_l_r(3);
3696     raw_pop_l_r(2);
3697     raw_pop_l_r(1);
3698     raw_pop_l_r(0);
3699     raw_ret();
3700     set_target(tmp);
3701 gbeauche 1.1
3702 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3703 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3704     if (ebx != NULL) *ebx = s_ebx;
3705     if (ecx != NULL) *ecx = s_ecx;
3706     if (edx != NULL) *edx = s_edx;
3707 gbeauche 1.27
3708     vm_release(cpuid_space, CPUID_SPACE);
3709 gbeauche 1.1 }
3710    
3711 gbeauche 1.3 static void
3712     raw_init_cpu(void)
3713 gbeauche 1.1 {
3714 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3715    
3716     /* Defaults */
3717 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3718 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3719     c->cpuid_level = -1; /* CPUID not detected */
3720     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3721     c->x86_vendor_id[0] = '\0'; /* Unset */
3722     c->x86_hwcap = 0;
3723    
3724     /* Get vendor name */
3725     c->x86_vendor_id[12] = '\0';
3726     cpuid(0x00000000,
3727     (uae_u32 *)&c->cpuid_level,
3728     (uae_u32 *)&c->x86_vendor_id[0],
3729     (uae_u32 *)&c->x86_vendor_id[8],
3730     (uae_u32 *)&c->x86_vendor_id[4]);
3731     x86_get_cpu_vendor(c);
3732    
3733     /* Intel-defined flags: level 0x00000001 */
3734     c->x86_brand_id = 0;
3735     if ( c->cpuid_level >= 0x00000001 ) {
3736     uae_u32 tfms, brand_id;
3737     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3738     c->x86 = (tfms >> 8) & 15;
3739     c->x86_model = (tfms >> 4) & 15;
3740     c->x86_brand_id = brand_id & 0xff;
3741     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
3742     (c->x86 == 0xf)) {
3743     /* AMD Extended Family and Model Values */
3744     c->x86 += (tfms >> 20) & 0xff;
3745     c->x86_model += (tfms >> 12) & 0xf0;
3746     }
3747     c->x86_mask = tfms & 15;
3748     } else {
3749     /* Have CPUID level 0 only - unheard of */
3750     c->x86 = 4;
3751     }
3752    
3753 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3754     uae_u32 xlvl;
3755     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3756     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3757     if ( xlvl >= 0x80000001 ) {
3758 gbeauche 1.28 uae_u32 features, extra_features;
3759     cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3760 gbeauche 1.16 if (features & (1 << 29)) {
3761     /* Assume x86-64 if long mode is supported */
3762 gbeauche 1.28 c->x86_processor = X86_PROCESSOR_X86_64;
3763 gbeauche 1.16 }
3764 gbeauche 1.28 if (extra_features & (1 << 0))
3765     have_lahf_lm = true;
3766 gbeauche 1.16 }
3767     }
3768    
3769 gbeauche 1.3 /* Canonicalize processor ID */
3770     switch (c->x86) {
3771     case 3:
3772     c->x86_processor = X86_PROCESSOR_I386;
3773     break;
3774     case 4:
3775     c->x86_processor = X86_PROCESSOR_I486;
3776     break;
3777     case 5:
3778     if (c->x86_vendor == X86_VENDOR_AMD)
3779     c->x86_processor = X86_PROCESSOR_K6;
3780     else
3781     c->x86_processor = X86_PROCESSOR_PENTIUM;
3782     break;
3783     case 6:
3784     if (c->x86_vendor == X86_VENDOR_AMD)
3785     c->x86_processor = X86_PROCESSOR_ATHLON;
3786     else
3787     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3788     break;
3789     case 15:
3790     if (c->x86_vendor == X86_VENDOR_INTEL) {
3791 gbeauche 1.16 /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3792 gbeauche 1.3 if (c->x86_brand_id >= 8)
3793     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3794     }
3795 gbeauche 1.16 if (c->x86_vendor == X86_VENDOR_AMD) {
3796     /* Assume an Athlon processor if family == 15 and it was not
3797     detected as an x86-64 so far */
3798     if (c->x86_processor == X86_PROCESSOR_max)
3799     c->x86_processor = X86_PROCESSOR_ATHLON;
3800     }
3801 gbeauche 1.3 break;
3802     }
3803     if (c->x86_processor == X86_PROCESSOR_max) {
3804     fprintf(stderr, "Error: unknown processor type\n");
3805     fprintf(stderr, " Family : %d\n", c->x86);
3806     fprintf(stderr, " Model : %d\n", c->x86_model);
3807     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3808 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3809 gbeauche 1.3 if (c->x86_brand_id)
3810     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3811     abort();
3812     }
3813    
3814     /* Have CMOV support? */
3815 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3816 gbeauche 1.3
3817     /* Can the host CPU suffer from partial register stalls? */
3818     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3819     #if 1
3820     /* It appears that partial register writes are a bad idea even on
3821 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3822     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3823 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3824     have_rat_stall = true;
3825 gbeauche 1.1 #endif
3826 gbeauche 1.3
3827     /* Alignments */
3828     if (tune_alignment) {
3829     align_loops = x86_alignments[c->x86_processor].align_loop;
3830     align_jumps = x86_alignments[c->x86_processor].align_jump;
3831     }
3832    
3833     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3834     c->cpuid_level, c->x86_vendor_id,
3835     x86_processor_string_table[c->x86_processor]);
3836 gbeauche 1.1 }
3837    
3838 gbeauche 1.10 static bool target_check_bsf(void)
3839     {
3840     bool mismatch = false;
3841     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3842     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3843     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3844     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3845     for (int value = -1; value <= 1; value++) {
3846 gbeauche 1.25 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3847     unsigned long tmp = value;
3848 gbeauche 1.10 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3849 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3850 gbeauche 1.10 int OF = (flags >> 11) & 1;
3851     int SF = (flags >> 7) & 1;
3852     int ZF = (flags >> 6) & 1;
3853     int CF = flags & 1;
3854     tmp = (value == 0);
3855     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3856     mismatch = true;
3857     }
3858     }}}}
3859     if (mismatch)
3860     write_log("Target CPU defines all flags on BSF instruction\n");
3861     return !mismatch;
3862     }
3863    
3864 gbeauche 1.1
3865     /*************************************************************************
3866     * FPU stuff *
3867     *************************************************************************/
3868    
3869    
3870     static __inline__ void raw_fp_init(void)
3871     {
3872     int i;
3873    
3874     for (i=0;i<N_FREGS;i++)
3875     live.spos[i]=-2;
3876     live.tos=-1; /* Stack is empty */
3877     }
3878    
3879     static __inline__ void raw_fp_cleanup_drop(void)
3880     {
3881     #if 0
3882     /* using FINIT instead of popping all the entries.
3883     Seems to have side effects --- there is display corruption in
3884     Quake when this is used */
3885     if (live.tos>1) {
3886     emit_byte(0x9b);
3887     emit_byte(0xdb);
3888     emit_byte(0xe3);
3889     live.tos=-1;
3890     }
3891     #endif
3892     while (live.tos>=1) {
3893     emit_byte(0xde);
3894     emit_byte(0xd9);
3895     live.tos-=2;
3896     }
3897     while (live.tos>=0) {
3898     emit_byte(0xdd);
3899     emit_byte(0xd8);
3900     live.tos--;
3901     }
3902     raw_fp_init();
3903     }
3904    
3905     static __inline__ void make_tos(int r)
3906     {
3907     int p,q;
3908    
3909     if (live.spos[r]<0) { /* Register not yet on stack */
3910     emit_byte(0xd9);
3911     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3912     live.tos++;
3913     live.spos[r]=live.tos;
3914     live.onstack[live.tos]=r;
3915     return;
3916     }
3917     /* Register is on stack */
3918     if (live.tos==live.spos[r])
3919     return;
3920     p=live.spos[r];
3921     q=live.onstack[live.tos];
3922    
3923     emit_byte(0xd9);
3924     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3925     live.onstack[live.tos]=r;
3926     live.spos[r]=live.tos;
3927     live.onstack[p]=q;
3928     live.spos[q]=p;
3929     }
3930    
3931     static __inline__ void make_tos2(int r, int r2)
3932     {
3933     int q;
3934    
3935     make_tos(r2); /* Put the reg that's supposed to end up in position2
3936     on top */
3937    
3938     if (live.spos[r]<0) { /* Register not yet on stack */
3939     make_tos(r); /* This will extend the stack */
3940     return;
3941     }
3942     /* Register is on stack */
3943     emit_byte(0xd9);
3944     emit_byte(0xc9); /* Move r2 into position 2 */
3945    
3946     q=live.onstack[live.tos-1];
3947     live.onstack[live.tos]=q;
3948     live.spos[q]=live.tos;
3949     live.onstack[live.tos-1]=r2;
3950     live.spos[r2]=live.tos-1;
3951    
3952     make_tos(r); /* And r into 1 */
3953     }
3954    
3955     static __inline__ int stackpos(int r)
3956     {
3957     if (live.spos[r]<0)
3958     abort();
3959     if (live.tos<live.spos[r]) {
3960     printf("Looking for spos for fnreg %d\n",r);
3961     abort();
3962     }
3963     return live.tos-live.spos[r];
3964     }
3965    
3966     static __inline__ void usereg(int r)
3967     {
3968     if (live.spos[r]<0)
3969     make_tos(r);
3970     }
3971    
3972     /* This is called with one FP value in a reg *above* tos, which it will
3973     pop off the stack if necessary */
3974     static __inline__ void tos_make(int r)
3975     {
3976     if (live.spos[r]<0) {
3977     live.tos++;
3978     live.spos[r]=live.tos;
3979     live.onstack[live.tos]=r;
3980     return;
3981     }
3982     emit_byte(0xdd);
3983     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3984     and pop it*/
3985     }
3986 gbeauche 1.23
3987     /* FP helper functions */
3988     #if USE_NEW_RTASM
3989     #define DEFINE_OP(NAME, GEN) \
3990     static inline void raw_##NAME(uint32 m) \
3991     { \
3992     GEN(m, X86_NOREG, X86_NOREG, 1); \
3993     }
3994     DEFINE_OP(fstl, FSTLm);
3995     DEFINE_OP(fstpl, FSTPLm);
3996     DEFINE_OP(fldl, FLDLm);
3997     DEFINE_OP(fildl, FILDLm);
3998     DEFINE_OP(fistl, FISTLm);
3999     DEFINE_OP(flds, FLDSm);
4000     DEFINE_OP(fsts, FSTSm);
4001     DEFINE_OP(fstpt, FSTPTm);
4002     DEFINE_OP(fldt, FLDTm);
4003     #else
4004     #define DEFINE_OP(NAME, OP1, OP2) \
4005     static inline void raw_##NAME(uint32 m) \
4006     { \
4007     emit_byte(OP1); \
4008     emit_byte(OP2); \
4009     emit_long(m); \
4010     }
4011     DEFINE_OP(fstl, 0xdd, 0x15);
4012     DEFINE_OP(fstpl, 0xdd, 0x1d);
4013     DEFINE_OP(fldl, 0xdd, 0x05);
4014     DEFINE_OP(fildl, 0xdb, 0x05);
4015     DEFINE_OP(fistl, 0xdb, 0x15);
4016     DEFINE_OP(flds, 0xd9, 0x05);
4017     DEFINE_OP(fsts, 0xd9, 0x15);
4018     DEFINE_OP(fstpt, 0xdb, 0x3d);
4019     DEFINE_OP(fldt, 0xdb, 0x2d);
4020     #endif
4021     #undef DEFINE_OP
4022    
4023 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4024     {
4025     make_tos(r);
4026 gbeauche 1.23 raw_fstl(m);
4027 gbeauche 1.1 }
4028     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4029    
4030     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4031     {
4032     make_tos(r);
4033 gbeauche 1.23 raw_fstpl(m);
4034 gbeauche 1.1 live.onstack[live.tos]=-1;
4035     live.tos--;
4036     live.spos[r]=-2;
4037     }
4038     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4039    
4040     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4041     {
4042 gbeauche 1.23 raw_fldl(m);
4043 gbeauche 1.1 tos_make(r);
4044     }
4045     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4046    
4047     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4048     {
4049 gbeauche 1.23 raw_fildl(m);
4050 gbeauche 1.1 tos_make(r);
4051     }
4052     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4053    
4054     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4055     {
4056     make_tos(r);
4057 gbeauche 1.23 raw_fistl(m);
4058 gbeauche 1.1 }
4059     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4060    
4061     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4062     {
4063 gbeauche 1.23 raw_flds(m);
4064 gbeauche 1.1 tos_make(r);
4065     }
4066     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4067    
4068     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4069     {
4070     make_tos(r);
4071 gbeauche 1.23 raw_fsts(m);
4072 gbeauche 1.1 }
4073     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4074    
4075     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4076     {
4077     int rs;
4078    
4079     /* Stupid x87 can't write a long double to mem without popping the
4080     stack! */
4081     usereg(r);
4082     rs=stackpos(r);
4083     emit_byte(0xd9); /* Get a copy to the top of stack */
4084     emit_byte(0xc0+rs);
4085    
4086 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4087 gbeauche 1.1 }
4088     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4089    
4090     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4091     {
4092     int rs;
4093    
4094     make_tos(r);
4095 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4096 gbeauche 1.1 live.onstack[live.tos]=-1;
4097     live.tos--;
4098     live.spos[r]=-2;
4099     }
4100     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4101    
4102     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4103     {
4104 gbeauche 1.23 raw_fldt(m);
4105 gbeauche 1.1 tos_make(r);
4106     }
4107     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4108    
4109     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4110     {
4111     emit_byte(0xd9);
4112     emit_byte(0xeb);
4113     tos_make(r);
4114     }
4115     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4116    
4117     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4118     {
4119     emit_byte(0xd9);
4120     emit_byte(0xec);
4121     tos_make(r);
4122     }
4123     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4124    
4125     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4126     {
4127     emit_byte(0xd9);
4128     emit_byte(0xea);
4129     tos_make(r);
4130     }
4131     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4132    
4133     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4134     {
4135     emit_byte(0xd9);
4136     emit_byte(0xed);
4137     tos_make(r);
4138     }
4139     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4140    
4141     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4142     {
4143     emit_byte(0xd9);
4144     emit_byte(0xe8);
4145     tos_make(r);
4146     }
4147     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4148    
4149     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4150     {
4151     emit_byte(0xd9);
4152     emit_byte(0xee);
4153     tos_make(r);
4154     }
4155     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4156    
4157     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4158     {
4159     int ds;
4160    
4161     usereg(s);
4162     ds=stackpos(s);
4163     if (ds==0 && live.spos[d]>=0) {
4164     /* source is on top of stack, and we already have the dest */
4165     int dd=stackpos(d);
4166     emit_byte(0xdd);
4167     emit_byte(0xd0+dd);
4168     }
4169     else {
4170     emit_byte(0xd9);
4171     emit_byte(0xc0+ds); /* duplicate source on tos */
4172     tos_make(d); /* store to destination, pop if necessary */
4173     }
4174     }
4175     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4176    
4177     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4178     {
4179     emit_byte(0xd9);
4180     emit_byte(0xa8+index);
4181     emit_long(base);
4182     }
4183     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4184    
4185    
4186     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4187     {
4188     int ds;
4189    
4190     if (d!=s) {
4191     usereg(s);
4192     ds=stackpos(s);
4193     emit_byte(0xd9);
4194     emit_byte(0xc0+ds); /* duplicate source */
4195     emit_byte(0xd9);
4196     emit_byte(0xfa); /* take square root */
4197     tos_make(d); /* store to destination */
4198     }
4199     else {
4200     make_tos(d);
4201     emit_byte(0xd9);
4202     emit_byte(0xfa); /* take square root */
4203     }
4204     }
4205     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4206    
4207     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4208     {
4209     int ds;
4210    
4211     if (d!=s) {
4212     usereg(s);
4213     ds=stackpos(s);
4214     emit_byte(0xd9);
4215     emit_byte(0xc0+ds); /* duplicate source */
4216     emit_byte(0xd9);
4217     emit_byte(0xe1); /* take fabs */
4218     tos_make(d); /* store to destination */
4219     }
4220     else {
4221     make_tos(d);
4222     emit_byte(0xd9);
4223     emit_byte(0xe1); /* take fabs */
4224     }
4225     }
4226     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4227    
4228     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4229     {
4230     int ds;
4231    
4232     if (d!=s) {
4233     usereg(s);
4234     ds=stackpos(s);
4235     emit_byte(0xd9);
4236     emit_byte(0xc0+ds); /* duplicate source */
4237     emit_byte(0xd9);
4238     emit_byte(0xfc); /* take frndint */
4239     tos_make(d); /* store to destination */
4240     }
4241     else {
4242     make_tos(d);
4243     emit_byte(0xd9);
4244     emit_byte(0xfc); /* take frndint */
4245     }
4246     }
4247     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4248    
4249     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4250     {
4251     int ds;
4252    
4253     if (d!=s) {
4254     usereg(s);
4255     ds=stackpos(s);
4256     emit_byte(0xd9);
4257     emit_byte(0xc0+ds); /* duplicate source */
4258     emit_byte(0xd9);
4259     emit_byte(0xff); /* take cos */
4260     tos_make(d); /* store to destination */
4261     }
4262     else {
4263     make_tos(d);
4264     emit_byte(0xd9);
4265     emit_byte(0xff); /* take cos */
4266     }
4267     }
4268     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4269    
4270     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4271     {
4272     int ds;
4273    
4274     if (d!=s) {
4275     usereg(s);
4276     ds=stackpos(s);
4277     emit_byte(0xd9);
4278     emit_byte(0xc0+ds); /* duplicate source */
4279     emit_byte(0xd9);
4280     emit_byte(0xfe); /* take sin */
4281     tos_make(d); /* store to destination */
4282     }
4283     else {
4284     make_tos(d);
4285     emit_byte(0xd9);
4286     emit_byte(0xfe); /* take sin */
4287     }
4288     }
4289     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4290    
4291     double one=1;
4292     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4293     {
4294     int ds;
4295    
4296     usereg(s);
4297     ds=stackpos(s);
4298     emit_byte(0xd9);
4299     emit_byte(0xc0+ds); /* duplicate source */
4300    
4301     emit_byte(0xd9);
4302     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4303     emit_byte(0xd9);
4304     emit_byte(0xfc); /* rndint */
4305     emit_byte(0xd9);
4306     emit_byte(0xc9); /* swap top two elements */
4307     emit_byte(0xd8);
4308     emit_byte(0xe1); /* subtract rounded from original */
4309     emit_byte(0xd9);
4310     emit_byte(0xf0); /* f2xm1 */
4311     emit_byte(0xdc);
4312     emit_byte(0x05);
4313 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4314 gbeauche 1.1 emit_byte(0xd9);
4315     emit_byte(0xfd); /* and scale it */
4316     emit_byte(0xdd);
4317     emit_byte(0xd9); /* take he rounded value off */
4318     tos_make(d); /* store to destination */
4319     }
4320     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4321    
4322     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4323     {
4324     int ds;
4325    
4326     usereg(s);
4327     ds=stackpos(s);
4328     emit_byte(0xd9);
4329     emit_byte(0xc0+ds); /* duplicate source */
4330     emit_byte(0xd9);
4331     emit_byte(0xea); /* fldl2e */
4332     emit_byte(0xde);
4333     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4334    
4335     emit_byte(0xd9);
4336     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4337     emit_byte(0xd9);
4338     emit_byte(0xfc); /* rndint */
4339     emit_byte(0xd9);
4340     emit_byte(0xc9); /* swap top two elements */
4341     emit_byte(0xd8);
4342     emit_byte(0xe1); /* subtract rounded from original */
4343     emit_byte(0xd9);
4344     emit_byte(0xf0); /* f2xm1 */
4345     emit_byte(0xdc);
4346     emit_byte(0x05);
4347 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4348 gbeauche 1.1 emit_byte(0xd9);
4349     emit_byte(0xfd); /* and scale it */
4350     emit_byte(0xdd);
4351     emit_byte(0xd9); /* take he rounded value off */
4352     tos_make(d); /* store to destination */
4353     }
4354     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4355    
4356     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4357     {
4358     int ds;
4359    
4360     usereg(s);
4361     ds=stackpos(s);
4362     emit_byte(0xd9);
4363     emit_byte(0xc0+ds); /* duplicate source */
4364     emit_byte(0xd9);
4365     emit_byte(0xe8); /* push '1' */
4366     emit_byte(0xd9);
4367     emit_byte(0xc9); /* swap top two */
4368     emit_byte(0xd9);
4369     emit_byte(0xf1); /* take 1*log2(x) */
4370     tos_make(d); /* store to destination */
4371     }
4372     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4373    
4374    
4375     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4376     {
4377     int ds;
4378    
4379     if (d!=s) {
4380     usereg(s);
4381     ds=stackpos(s);
4382     emit_byte(0xd9);
4383     emit_byte(0xc0+ds); /* duplicate source */
4384     emit_byte(0xd9);
4385     emit_byte(0xe0); /* take fchs */
4386     tos_make(d); /* store to destination */
4387     }
4388     else {
4389     make_tos(d);
4390     emit_byte(0xd9);
4391     emit_byte(0xe0); /* take fchs */
4392     }
4393     }
4394     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4395    
4396     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4397     {
4398     int ds;
4399    
4400     usereg(s);
4401     usereg(d);
4402    
4403     if (live.spos[s]==live.tos) {
4404     /* Source is on top of stack */
4405     ds=stackpos(d);
4406     emit_byte(0xdc);
4407     emit_byte(0xc0+ds); /* add source to dest*/
4408     }
4409     else {
4410     make_tos(d);
4411     ds=stackpos(s);
4412    
4413     emit_byte(0xd8);
4414     emit_byte(0xc0+ds); /* add source to dest*/
4415     }
4416     }
4417     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4418    
4419     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4420     {
4421     int ds;
4422    
4423     usereg(s);
4424     usereg(d);
4425    
4426     if (live.spos[s]==live.tos) {
4427     /* Source is on top of stack */
4428     ds=stackpos(d);
4429     emit_byte(0xdc);
4430     emit_byte(0xe8+ds); /* sub source from dest*/
4431     }
4432     else {
4433     make_tos(d);
4434     ds=stackpos(s);
4435    
4436     emit_byte(0xd8);
4437     emit_byte(0xe0+ds); /* sub src from dest */
4438     }
4439     }
4440     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4441    
4442     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4443     {
4444     int ds;
4445    
4446     usereg(s);
4447     usereg(d);
4448    
4449     make_tos(d);
4450     ds=stackpos(s);
4451    
4452     emit_byte(0xdd);
4453     emit_byte(0xe0+ds); /* cmp dest with source*/
4454     }
4455     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4456    
4457     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4458     {
4459     int ds;
4460    
4461     usereg(s);
4462     usereg(d);
4463    
4464     if (live.spos[s]==live.tos) {
4465     /* Source is on top of stack */
4466     ds=stackpos(d);
4467     emit_byte(0xdc);
4468     emit_byte(0xc8+ds); /* mul dest by source*/
4469     }
4470     else {
4471     make_tos(d);
4472     ds=stackpos(s);
4473    
4474     emit_byte(0xd8);
4475     emit_byte(0xc8+ds); /* mul dest by source*/
4476     }
4477     }
4478     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4479    
4480     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4481     {
4482     int ds;
4483    
4484     usereg(s);
4485     usereg(d);
4486    
4487     if (live.spos[s]==live.tos) {
4488     /* Source is on top of stack */
4489     ds=stackpos(d);
4490     emit_byte(0xdc);
4491     emit_byte(0xf8+ds); /* div dest by source */
4492     }
4493     else {
4494     make_tos(d);
4495     ds=stackpos(s);
4496    
4497     emit_byte(0xd8);
4498     emit_byte(0xf0+ds); /* div dest by source*/
4499     }
4500     }
4501     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4502    
4503     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4504     {
4505     int ds;
4506    
4507     usereg(s);
4508     usereg(d);
4509    
4510     make_tos2(d,s);
4511     ds=stackpos(s);
4512    
4513     if (ds!=1) {
4514     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4515     abort();
4516     }
4517     emit_byte(0xd9);
4518     emit_byte(0xf8); /* take rem from dest by source */
4519     }
4520     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4521    
4522     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4523     {
4524     int ds;
4525    
4526     usereg(s);
4527     usereg(d);
4528    
4529     make_tos2(d,s);
4530     ds=stackpos(s);
4531    
4532     if (ds!=1) {
4533     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4534     abort();
4535     }
4536     emit_byte(0xd9);
4537     emit_byte(0xf5); /* take rem1 from dest by source */
4538     }
4539     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4540    
4541    
4542     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4543     {
4544     make_tos(r);
4545     emit_byte(0xd9); /* ftst */
4546     emit_byte(0xe4);
4547     }
4548     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4549    
4550     /* %eax register is clobbered if target processor doesn't support fucomi */
4551     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4552     #define FFLAG_NREG EAX_INDEX
4553    
4554     static __inline__ void raw_fflags_into_flags(int r)
4555     {
4556     int p;
4557    
4558     usereg(r);
4559     p=stackpos(r);
4560    
4561     emit_byte(0xd9);
4562     emit_byte(0xee); /* Push 0 */
4563     emit_byte(0xd9);
4564     emit_byte(0xc9+p); /* swap top two around */
4565     if (have_cmov) {
4566     // gb-- fucomi is for P6 cores only, not K6-2 then...
4567     emit_byte(0xdb);
4568     emit_byte(0xe9+p); /* fucomi them */
4569     }
4570     else {
4571     emit_byte(0xdd);
4572     emit_byte(0xe1+p); /* fucom them */
4573     emit_byte(0x9b);
4574     emit_byte(0xdf);
4575     emit_byte(0xe0); /* fstsw ax */
4576     raw_sahf(0); /* sahf */
4577     }
4578     emit_byte(0xdd);
4579     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4580     }