ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.29
Committed: 2005-07-24T14:48:27Z (18 years, 11 months ago) by gbeauche
Branch: MAIN
Changes since 1.28: +16 -18 lines
Log Message:
recognize more P4 cores

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.26 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 gbeauche 1.26 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.1
56     /* The register in which subroutines return an integer return value */
57 gbeauche 1.20 #define REG_RESULT EAX_INDEX
58 gbeauche 1.1
59     /* The registers subroutines take their first and second argument in */
60     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61     /* Handle the _fastcall parameters of ECX and EDX */
62 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
63     #define REG_PAR2 EDX_INDEX
64     #elif defined(__x86_64__)
65     #define REG_PAR1 EDI_INDEX
66     #define REG_PAR2 ESI_INDEX
67 gbeauche 1.1 #else
68 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
69     #define REG_PAR2 EDX_INDEX
70 gbeauche 1.1 #endif
71    
72 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
75 gbeauche 1.1 #else
76 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 gbeauche 1.1 #endif
78    
79 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 gbeauche 1.1 -1 if any reg will do */
81 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83 gbeauche 1.1
84     uae_s8 always_used[]={4,-1};
85 gbeauche 1.20 #if defined(__x86_64__)
86     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88     #else
89 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
90     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 gbeauche 1.20 #endif
92 gbeauche 1.1
93 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
94     /* Make sure interpretive core does not use cpuopti */
95     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 gbeauche 1.20 #error FIXME: code not ready
97 gbeauche 1.17 #else
98 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
99     by the caller */
100 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 gbeauche 1.17 #endif
102 gbeauche 1.1
103     /* This *should* be the same as call_saved. But:
104     - We might not really know which registers are saved, and which aren't,
105     so we need to preserve some, but don't want to rely on everyone else
106     also saving those registers
107     - Special registers (such like the stack pointer) should not be "preserved"
108     by pushing, even though they are "saved" across function calls
109     */
110 gbeauche 1.21 #if defined(__x86_64__)
111     /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
112 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
113     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
114 gbeauche 1.21 #else
115     static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
116     #endif
117 gbeauche 1.1
118     /* Whether classes of instructions do or don't clobber the native flags */
119     #define CLOBBER_MOV
120     #define CLOBBER_LEA
121     #define CLOBBER_CMOV
122     #define CLOBBER_POP
123     #define CLOBBER_PUSH
124     #define CLOBBER_SUB clobber_flags()
125     #define CLOBBER_SBB clobber_flags()
126     #define CLOBBER_CMP clobber_flags()
127     #define CLOBBER_ADD clobber_flags()
128     #define CLOBBER_ADC clobber_flags()
129     #define CLOBBER_AND clobber_flags()
130     #define CLOBBER_OR clobber_flags()
131     #define CLOBBER_XOR clobber_flags()
132    
133     #define CLOBBER_ROL clobber_flags()
134     #define CLOBBER_ROR clobber_flags()
135     #define CLOBBER_SHLL clobber_flags()
136     #define CLOBBER_SHRL clobber_flags()
137     #define CLOBBER_SHRA clobber_flags()
138     #define CLOBBER_TEST clobber_flags()
139     #define CLOBBER_CL16
140     #define CLOBBER_CL8
141 gbeauche 1.20 #define CLOBBER_SE32
142 gbeauche 1.1 #define CLOBBER_SE16
143     #define CLOBBER_SE8
144 gbeauche 1.20 #define CLOBBER_ZE32
145 gbeauche 1.1 #define CLOBBER_ZE16
146     #define CLOBBER_ZE8
147     #define CLOBBER_SW16 clobber_flags()
148     #define CLOBBER_SW32
149     #define CLOBBER_SETCC
150     #define CLOBBER_MUL clobber_flags()
151     #define CLOBBER_BT clobber_flags()
152     #define CLOBBER_BSF clobber_flags()
153    
154 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
155 gbeauche 1.20 #if defined(__x86_64__)
156     #define USE_NEW_RTASM 1
157     #endif
158    
159     #if USE_NEW_RTASM
160 gbeauche 1.13
161     #if defined(__x86_64__)
162     #define X86_TARGET_64BIT 1
163     #endif
164     #define X86_FLAT_REGISTERS 0
165 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
166     #define X86_OPTIMIZE_ROTSHI 1
167 gbeauche 1.13 #include "codegen_x86.h"
168    
169     #define x86_emit_byte(B) emit_byte(B)
170     #define x86_emit_word(W) emit_word(W)
171     #define x86_emit_long(L) emit_long(L)
172 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
173 gbeauche 1.13 #define x86_get_target() get_target()
174     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
175    
176     static void jit_fail(const char *msg, const char *file, int line, const char *function)
177     {
178     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
179     function, file, line, msg);
180     abort();
181     }
182    
183     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
184     {
185 gbeauche 1.20 #if defined(__x86_64__)
186     PUSHQr(r);
187     #else
188 gbeauche 1.13 PUSHLr(r);
189 gbeauche 1.20 #endif
190 gbeauche 1.13 }
191     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
192    
193     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
194     {
195 gbeauche 1.20 #if defined(__x86_64__)
196     POPQr(r);
197     #else
198 gbeauche 1.13 POPLr(r);
199 gbeauche 1.20 #endif
200 gbeauche 1.13 }
201     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
202    
203 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
204     {
205     #if defined(__x86_64__)
206     POPQm(d, X86_NOREG, X86_NOREG, 1);
207     #else
208     POPLm(d, X86_NOREG, X86_NOREG, 1);
209     #endif
210     }
211     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
212    
213 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
214     {
215     BTLir(i, r);
216     }
217     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
218    
219     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
220     {
221     BTLrr(b, r);
222     }
223     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
224    
225     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
226     {
227     BTCLir(i, r);
228     }
229     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
230    
231     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
232     {
233     BTCLrr(b, r);
234     }
235     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
236    
237     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
238     {
239     BTRLir(i, r);
240     }
241     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
242    
243     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
244     {
245     BTRLrr(b, r);
246     }
247     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
248    
249     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
250     {
251     BTSLir(i, r);
252     }
253     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
254    
255     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
256     {
257     BTSLrr(b, r);
258     }
259     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
260    
261     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
262     {
263     SUBWir(i, d);
264     }
265     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
266    
267     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
268     {
269     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
270     }
271     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
272    
273     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
274     {
275     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
276     }
277     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
278    
279     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
280     {
281     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
282     }
283     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
284    
285     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
286     {
287     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
288     }
289     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
290    
291     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
292     {
293     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
294     }
295     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
296    
297     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
298     {
299     ROLBir(i, r);
300     }
301     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
302    
303     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
304     {
305     ROLWir(i, r);
306     }
307     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
308    
309     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
310     {
311     ROLLir(i, r);
312     }
313     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
314    
315     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
316     {
317     ROLLrr(r, d);
318     }
319     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
320    
321     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
322     {
323     ROLWrr(r, d);
324     }
325     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
326    
327     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
328     {
329     ROLBrr(r, d);
330     }
331     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
332    
333     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
334     {
335     SHLLrr(r, d);
336     }
337     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
338    
339     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
340     {
341     SHLWrr(r, d);
342     }
343     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
344    
345     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
346     {
347     SHLBrr(r, d);
348     }
349     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
350    
351     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
352     {
353     RORBir(i, r);
354     }
355     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
356    
357     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
358     {
359     RORWir(i, r);
360     }
361     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
362    
363     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
364     {
365     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
366     }
367     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
368    
369     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
370     {
371     RORLir(i, r);
372     }
373     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
374    
375     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
376     {
377     RORLrr(r, d);
378     }
379     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
380    
381     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
382     {
383     RORWrr(r, d);
384     }
385     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
386    
387     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
388     {
389     RORBrr(r, d);
390     }
391     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
392    
393     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
394     {
395     SHRLrr(r, d);
396     }
397     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
398    
399     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
400     {
401     SHRWrr(r, d);
402     }
403     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
404    
405     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
406     {
407     SHRBrr(r, d);
408     }
409     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
410    
411     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
412     {
413 gbeauche 1.14 SARLrr(r, d);
414 gbeauche 1.13 }
415     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
416    
417     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
418     {
419 gbeauche 1.14 SARWrr(r, d);
420 gbeauche 1.13 }
421     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
422    
423     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
424     {
425 gbeauche 1.14 SARBrr(r, d);
426 gbeauche 1.13 }
427     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
428    
429     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
430     {
431     SHLLir(i, r);
432     }
433     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
434    
435     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
436     {
437     SHLWir(i, r);
438     }
439     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
440    
441     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
442     {
443     SHLBir(i, r);
444     }
445     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
446    
447     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
448     {
449     SHRLir(i, r);
450     }
451     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
452    
453     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
454     {
455     SHRWir(i, r);
456     }
457     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
458    
459     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
460     {
461     SHRBir(i, r);
462     }
463     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
464    
465     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
466     {
467 gbeauche 1.14 SARLir(i, r);
468 gbeauche 1.13 }
469     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
470    
471     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
472     {
473 gbeauche 1.14 SARWir(i, r);
474 gbeauche 1.13 }
475     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
476    
477     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
478     {
479 gbeauche 1.14 SARBir(i, r);
480 gbeauche 1.13 }
481     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
482    
483     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
484     {
485     SAHF();
486     }
487     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
488    
489     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
490     {
491     CPUID();
492     }
493     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
494    
495     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
496     {
497     LAHF();
498     }
499     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
500    
501     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
502     {
503     SETCCir(cc, d);
504     }
505     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
506    
507     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
508     {
509     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
510     }
511     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
512    
513     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
514     {
515 gbeauche 1.15 if (have_cmov)
516     CMOVLrr(cc, s, d);
517     else { /* replacement using branch and mov */
518     #if defined(__x86_64__)
519     write_log("x86-64 implementations are bound to have CMOV!\n");
520     abort();
521     #endif
522     JCCSii(cc^1, 2);
523     MOVLrr(s, d);
524     }
525 gbeauche 1.13 }
526     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
527    
528     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
529     {
530     BSFLrr(s, d);
531     }
532     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
533    
534 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
535     {
536     MOVSLQrr(s, d);
537     }
538     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
539    
540 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
541     {
542     MOVSWLrr(s, d);
543     }
544     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
545    
546     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
547     {
548     MOVSBLrr(s, d);
549     }
550     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
551    
552     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
553     {
554     MOVZWLrr(s, d);
555     }
556     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
557    
558     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
559     {
560     MOVZBLrr(s, d);
561     }
562     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
563    
564     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
565     {
566 gbeauche 1.14 IMULLrr(s, d);
567 gbeauche 1.13 }
568     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
569    
570     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
571     {
572 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
573     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
574 gbeauche 1.13 abort();
575 gbeauche 1.14 }
576     IMULLr(s);
577 gbeauche 1.13 }
578     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
579    
580     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
581     {
582 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
583     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
584 gbeauche 1.13 abort();
585 gbeauche 1.14 }
586     MULLr(s);
587 gbeauche 1.13 }
588     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
589    
590     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
591     {
592 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
593 gbeauche 1.13 }
594     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
595    
596     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
597     {
598     MOVBrr(s, d);
599     }
600     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
601    
602     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
603     {
604     MOVWrr(s, d);
605     }
606     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
607    
608     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
609     {
610     MOVLmr(0, baser, index, factor, d);
611     }
612     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
613    
614     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
615     {
616     MOVWmr(0, baser, index, factor, d);
617     }
618     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
619    
620     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
621     {
622     MOVBmr(0, baser, index, factor, d);
623     }
624     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
625    
626     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
627     {
628     MOVLrm(s, 0, baser, index, factor);
629     }
630     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
631    
632     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
633     {
634     MOVWrm(s, 0, baser, index, factor);
635     }
636     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
637    
638     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
639     {
640     MOVBrm(s, 0, baser, index, factor);
641     }
642     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
643    
644     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
645     {
646     MOVLrm(s, base, baser, index, factor);
647     }
648     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
649    
650     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
651     {
652     MOVWrm(s, base, baser, index, factor);
653     }
654     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
655    
656     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
657     {
658     MOVBrm(s, base, baser, index, factor);
659     }
660     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
661    
662     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
663     {
664     MOVLmr(base, baser, index, factor, d);
665     }
666     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
667    
668     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
669     {
670     MOVWmr(base, baser, index, factor, d);
671     }
672     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
673    
674     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
675     {
676     MOVBmr(base, baser, index, factor, d);
677     }
678     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
679    
680     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
681     {
682     MOVLmr(base, X86_NOREG, index, factor, d);
683     }
684     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
685    
686     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
687     {
688 gbeauche 1.15 if (have_cmov)
689     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
690     else { /* replacement using branch and mov */
691     #if defined(__x86_64__)
692     write_log("x86-64 implementations are bound to have CMOV!\n");
693     abort();
694     #endif
695     JCCSii(cond^1, 7);
696     MOVLmr(base, X86_NOREG, index, factor, d);
697     }
698 gbeauche 1.13 }
699     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
700    
701     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
702     {
703 gbeauche 1.15 if (have_cmov)
704     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
705     else { /* replacement using branch and mov */
706     #if defined(__x86_64__)
707     write_log("x86-64 implementations are bound to have CMOV!\n");
708     abort();
709     #endif
710     JCCSii(cond^1, 6);
711     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
712     }
713 gbeauche 1.13 }
714     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
715    
716     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
717     {
718     MOVLmr(offset, s, X86_NOREG, 1, d);
719     }
720     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
721    
722     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
723     {
724     MOVWmr(offset, s, X86_NOREG, 1, d);
725     }
726     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
727    
728     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
729     {
730     MOVBmr(offset, s, X86_NOREG, 1, d);
731     }
732     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
733    
734     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
735     {
736     MOVLmr(offset, s, X86_NOREG, 1, d);
737     }
738     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
739    
740     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
741     {
742     MOVWmr(offset, s, X86_NOREG, 1, d);
743     }
744     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
745    
746     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
747     {
748     MOVBmr(offset, s, X86_NOREG, 1, d);
749     }
750     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
751    
752     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
753     {
754     MOVLim(i, offset, d, X86_NOREG, 1);
755     }
756     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
757    
758     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
759     {
760     MOVWim(i, offset, d, X86_NOREG, 1);
761     }
762     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
763    
764     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
765     {
766     MOVBim(i, offset, d, X86_NOREG, 1);
767     }
768     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
769    
770     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
771     {
772     MOVLrm(s, offset, d, X86_NOREG, 1);
773     }
774     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
775    
776     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
777     {
778     MOVWrm(s, offset, d, X86_NOREG, 1);
779     }
780     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
781    
782     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
783     {
784     MOVBrm(s, offset, d, X86_NOREG, 1);
785     }
786     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
787    
788     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
789     {
790     LEALmr(offset, s, X86_NOREG, 1, d);
791     }
792     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
793    
794     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
795     {
796     LEALmr(offset, s, index, factor, d);
797     }
798     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
799    
800     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
801     {
802     LEALmr(0, s, index, factor, d);
803     }
804     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
805    
806     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
807     {
808     MOVLrm(s, offset, d, X86_NOREG, 1);
809     }
810     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
811    
812     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
813     {
814     MOVWrm(s, offset, d, X86_NOREG, 1);
815     }
816     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
817    
818     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
819     {
820     MOVBrm(s, offset, d, X86_NOREG, 1);
821     }
822     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
823    
824     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
825     {
826     BSWAPLr(r);
827     }
828     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
829    
830     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
831     {
832     ROLWir(8, r);
833     }
834     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
835    
836     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
837     {
838     MOVLrr(s, d);
839     }
840     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
841    
842     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
843     {
844     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
845     }
846     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
847    
848     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
849     {
850     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
851     }
852     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
853    
854     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
855     {
856     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
857     }
858     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
859    
860     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
861     {
862     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
863     }
864     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
865    
866     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
867     {
868     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
869     }
870     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
871    
872     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
873     {
874     MOVLir(s, d);
875     }
876     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
877    
878     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
879     {
880     MOVWir(s, d);
881     }
882     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
883    
884     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
885     {
886     MOVBir(s, d);
887     }
888     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
889    
890     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
891     {
892     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
893     }
894     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
895    
896     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
897     {
898     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
899     }
900     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
901    
902     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
903     {
904     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
905     }
906     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
907    
908     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
909     {
910     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
911     }
912     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
913    
914     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
915     {
916     TESTLir(i, d);
917     }
918     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
919    
920     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
921     {
922     TESTLrr(s, d);
923     }
924     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
925    
926     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
927     {
928     TESTWrr(s, d);
929     }
930     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
931    
932     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
933     {
934     TESTBrr(s, d);
935     }
936     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
937    
938 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
939     {
940     XORLir(i, d);
941     }
942     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
943    
944 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
945     {
946     ANDLir(i, d);
947     }
948     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
949    
950     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
951     {
952     ANDWir(i, d);
953     }
954     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
955    
956     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
957     {
958     ANDLrr(s, d);
959     }
960     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
961    
962     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
963     {
964     ANDWrr(s, d);
965     }
966     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
967    
968     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
969     {
970     ANDBrr(s, d);
971     }
972     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
973    
974     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
975     {
976     ORLir(i, d);
977     }
978     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
979    
980     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
981     {
982     ORLrr(s, d);
983     }
984     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
985    
986     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
987     {
988     ORWrr(s, d);
989     }
990     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
991    
992     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
993     {
994     ORBrr(s, d);
995     }
996     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
997    
998     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
999     {
1000     ADCLrr(s, d);
1001     }
1002     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1003    
1004     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1005     {
1006     ADCWrr(s, d);
1007     }
1008     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1009    
1010     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1011     {
1012     ADCBrr(s, d);
1013     }
1014     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1015    
1016     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1017     {
1018     ADDLrr(s, d);
1019     }
1020     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1021    
1022     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1023     {
1024     ADDWrr(s, d);
1025     }
1026     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1027    
1028     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1029     {
1030     ADDBrr(s, d);
1031     }
1032     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1033    
1034     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1035     {
1036     SUBLir(i, d);
1037     }
1038     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1039    
1040     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1041     {
1042     SUBBir(i, d);
1043     }
1044     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1045    
1046     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1047     {
1048     ADDLir(i, d);
1049     }
1050     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1051    
1052     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1053     {
1054     ADDWir(i, d);
1055     }
1056     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1057    
1058     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1059     {
1060     ADDBir(i, d);
1061     }
1062     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1063    
1064     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1065     {
1066     SBBLrr(s, d);
1067     }
1068     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1069    
1070     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1071     {
1072     SBBWrr(s, d);
1073     }
1074     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1075    
1076     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1077     {
1078     SBBBrr(s, d);
1079     }
1080     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1081    
1082     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1083     {
1084     SUBLrr(s, d);
1085     }
1086     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1087    
1088     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1089     {
1090     SUBWrr(s, d);
1091     }
1092     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1093    
1094     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1095     {
1096     SUBBrr(s, d);
1097     }
1098     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1099    
1100     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1101     {
1102     CMPLrr(s, d);
1103     }
1104     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1105    
1106     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1107     {
1108     CMPLir(i, r);
1109     }
1110     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1111    
1112     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1113     {
1114     CMPWrr(s, d);
1115     }
1116     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1117    
1118     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1119     {
1120     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1121     }
1122     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1123    
1124     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1125     {
1126     CMPBir(i, d);
1127     }
1128     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1129    
1130     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1131     {
1132     CMPBrr(s, d);
1133     }
1134     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1135    
1136     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1137     {
1138     CMPLmr(offset, X86_NOREG, index, factor, d);
1139     }
1140     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1141    
1142     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1143     {
1144     XORLrr(s, d);
1145     }
1146     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1147    
1148     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1149     {
1150     XORWrr(s, d);
1151     }
1152     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1153    
1154     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1155     {
1156     XORBrr(s, d);
1157     }
1158     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1159    
1160     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1161     {
1162     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1163     }
1164     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1165    
1166     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1167     {
1168     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1169     }
1170     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1171    
1172     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1173     {
1174     XCHGLrr(r2, r1);
1175     }
1176     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1177    
1178     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1179     {
1180 gbeauche 1.18 PUSHF();
1181 gbeauche 1.13 }
1182     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1183    
1184     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1185     {
1186 gbeauche 1.18 POPF();
1187 gbeauche 1.13 }
1188     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1189    
1190     #else
1191    
1192 gbeauche 1.2 const bool optimize_accum = true;
1193 gbeauche 1.1 const bool optimize_imm8 = true;
1194     const bool optimize_shift_once = true;
1195    
1196     /*************************************************************************
1197     * Actual encoding of the instructions on the target CPU *
1198     *************************************************************************/
1199    
1200 gbeauche 1.2 static __inline__ int isaccum(int r)
1201     {
1202     return (r == EAX_INDEX);
1203     }
1204    
1205 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1206     {
1207     return (x>=-128 && x<=127);
1208     }
1209    
1210     static __inline__ int isword(uae_s32 x)
1211     {
1212     return (x>=-32768 && x<=32767);
1213     }
1214    
1215     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1216     {
1217     emit_byte(0x50+r);
1218     }
1219     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1220    
1221     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1222     {
1223     emit_byte(0x58+r);
1224     }
1225     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1226    
1227 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1228     {
1229     emit_byte(0x8f);
1230     emit_byte(0x05);
1231     emit_long(d);
1232     }
1233     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1234    
1235 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1236     {
1237     emit_byte(0x0f);
1238     emit_byte(0xba);
1239     emit_byte(0xe0+r);
1240     emit_byte(i);
1241     }
1242     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1243    
1244     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1245     {
1246     emit_byte(0x0f);
1247     emit_byte(0xa3);
1248     emit_byte(0xc0+8*b+r);
1249     }
1250     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1251    
1252     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1253     {
1254     emit_byte(0x0f);
1255     emit_byte(0xba);
1256     emit_byte(0xf8+r);
1257     emit_byte(i);
1258     }
1259     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1260    
1261     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1262     {
1263     emit_byte(0x0f);
1264     emit_byte(0xbb);
1265     emit_byte(0xc0+8*b+r);
1266     }
1267     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1268    
1269    
1270     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1271     {
1272     emit_byte(0x0f);
1273     emit_byte(0xba);
1274     emit_byte(0xf0+r);
1275     emit_byte(i);
1276     }
1277     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1278    
1279     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1280     {
1281     emit_byte(0x0f);
1282     emit_byte(0xb3);
1283     emit_byte(0xc0+8*b+r);
1284     }
1285     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1286    
1287     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1288     {
1289     emit_byte(0x0f);
1290     emit_byte(0xba);
1291     emit_byte(0xe8+r);
1292     emit_byte(i);
1293     }
1294     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1295    
1296     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1297     {
1298     emit_byte(0x0f);
1299     emit_byte(0xab);
1300     emit_byte(0xc0+8*b+r);
1301     }
1302     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1303    
1304     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1305     {
1306     emit_byte(0x66);
1307     if (isbyte(i)) {
1308     emit_byte(0x83);
1309     emit_byte(0xe8+d);
1310     emit_byte(i);
1311     }
1312     else {
1313 gbeauche 1.2 if (optimize_accum && isaccum(d))
1314     emit_byte(0x2d);
1315     else {
1316 gbeauche 1.1 emit_byte(0x81);
1317     emit_byte(0xe8+d);
1318 gbeauche 1.2 }
1319 gbeauche 1.1 emit_word(i);
1320     }
1321     }
1322     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1323    
1324    
1325     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1326     {
1327     emit_byte(0x8b);
1328     emit_byte(0x05+8*d);
1329     emit_long(s);
1330     }
1331     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1332    
1333     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1334     {
1335     emit_byte(0xc7);
1336     emit_byte(0x05);
1337     emit_long(d);
1338     emit_long(s);
1339     }
1340     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1341    
1342     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1343     {
1344     emit_byte(0x66);
1345     emit_byte(0xc7);
1346     emit_byte(0x05);
1347     emit_long(d);
1348     emit_word(s);
1349     }
1350     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1351    
1352     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1353     {
1354     emit_byte(0xc6);
1355     emit_byte(0x05);
1356     emit_long(d);
1357     emit_byte(s);
1358     }
1359     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1360    
1361     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1362     {
1363     if (optimize_shift_once && (i == 1)) {
1364     emit_byte(0xd0);
1365     emit_byte(0x05);
1366     emit_long(d);
1367     }
1368     else {
1369     emit_byte(0xc0);
1370     emit_byte(0x05);
1371     emit_long(d);
1372     emit_byte(i);
1373     }
1374     }
1375     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1376    
1377     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1378     {
1379     if (optimize_shift_once && (i == 1)) {
1380     emit_byte(0xd0);
1381     emit_byte(0xc0+r);
1382     }
1383     else {
1384     emit_byte(0xc0);
1385     emit_byte(0xc0+r);
1386     emit_byte(i);
1387     }
1388     }
1389     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1390    
1391     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1392     {
1393     emit_byte(0x66);
1394     emit_byte(0xc1);
1395     emit_byte(0xc0+r);
1396     emit_byte(i);
1397     }
1398     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1399    
1400     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1401     {
1402     if (optimize_shift_once && (i == 1)) {
1403     emit_byte(0xd1);
1404     emit_byte(0xc0+r);
1405     }
1406     else {
1407     emit_byte(0xc1);
1408     emit_byte(0xc0+r);
1409     emit_byte(i);
1410     }
1411     }
1412     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1413    
1414     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1415     {
1416     emit_byte(0xd3);
1417     emit_byte(0xc0+d);
1418     }
1419     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1420    
1421     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1422     {
1423     emit_byte(0x66);
1424     emit_byte(0xd3);
1425     emit_byte(0xc0+d);
1426     }
1427     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1428    
1429     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1430     {
1431     emit_byte(0xd2);
1432     emit_byte(0xc0+d);
1433     }
1434     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1435    
1436     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1437     {
1438     emit_byte(0xd3);
1439     emit_byte(0xe0+d);
1440     }
1441     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1442    
1443     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1444     {
1445     emit_byte(0x66);
1446     emit_byte(0xd3);
1447     emit_byte(0xe0+d);
1448     }
1449     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1450    
1451     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1452     {
1453     emit_byte(0xd2);
1454     emit_byte(0xe0+d);
1455     }
1456     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1457    
1458     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1459     {
1460     if (optimize_shift_once && (i == 1)) {
1461     emit_byte(0xd0);
1462     emit_byte(0xc8+r);
1463     }
1464     else {
1465     emit_byte(0xc0);
1466     emit_byte(0xc8+r);
1467     emit_byte(i);
1468     }
1469     }
1470     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1471    
1472     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1473     {
1474     emit_byte(0x66);
1475     emit_byte(0xc1);
1476     emit_byte(0xc8+r);
1477     emit_byte(i);
1478     }
1479     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1480    
1481     // gb-- used for making an fpcr value in compemu_fpp.cpp
1482     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1483     {
1484     emit_byte(0x0b);
1485     emit_byte(0x05+8*d);
1486     emit_long(s);
1487     }
1488     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1489    
1490     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1491     {
1492     if (optimize_shift_once && (i == 1)) {
1493     emit_byte(0xd1);
1494     emit_byte(0xc8+r);
1495     }
1496     else {
1497     emit_byte(0xc1);
1498     emit_byte(0xc8+r);
1499     emit_byte(i);
1500     }
1501     }
1502     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1503    
1504     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1505     {
1506     emit_byte(0xd3);
1507     emit_byte(0xc8+d);
1508     }
1509     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1510    
1511     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1512     {
1513     emit_byte(0x66);
1514     emit_byte(0xd3);
1515     emit_byte(0xc8+d);
1516     }
1517     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1518    
1519     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1520     {
1521     emit_byte(0xd2);
1522     emit_byte(0xc8+d);
1523     }
1524     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1525    
1526     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1527     {
1528     emit_byte(0xd3);
1529     emit_byte(0xe8+d);
1530     }
1531     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1532    
1533     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1534     {
1535     emit_byte(0x66);
1536     emit_byte(0xd3);
1537     emit_byte(0xe8+d);
1538     }
1539     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1540    
1541     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1542     {
1543     emit_byte(0xd2);
1544     emit_byte(0xe8+d);
1545     }
1546     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1547    
1548     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1549     {
1550     emit_byte(0xd3);
1551     emit_byte(0xf8+d);
1552     }
1553     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1554    
1555     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1556     {
1557     emit_byte(0x66);
1558     emit_byte(0xd3);
1559     emit_byte(0xf8+d);
1560     }
1561     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1562    
1563     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1564     {
1565     emit_byte(0xd2);
1566     emit_byte(0xf8+d);
1567     }
1568     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1569    
1570     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1571     {
1572     if (optimize_shift_once && (i == 1)) {
1573     emit_byte(0xd1);
1574     emit_byte(0xe0+r);
1575     }
1576     else {
1577     emit_byte(0xc1);
1578     emit_byte(0xe0+r);
1579     emit_byte(i);
1580     }
1581     }
1582     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1583    
1584     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1585     {
1586     emit_byte(0x66);
1587     emit_byte(0xc1);
1588     emit_byte(0xe0+r);
1589     emit_byte(i);
1590     }
1591     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1592    
1593     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1594     {
1595     if (optimize_shift_once && (i == 1)) {
1596     emit_byte(0xd0);
1597     emit_byte(0xe0+r);
1598     }
1599     else {
1600     emit_byte(0xc0);
1601     emit_byte(0xe0+r);
1602     emit_byte(i);
1603     }
1604     }
1605     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1606    
1607     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1608     {
1609     if (optimize_shift_once && (i == 1)) {
1610     emit_byte(0xd1);
1611     emit_byte(0xe8+r);
1612     }
1613     else {
1614     emit_byte(0xc1);
1615     emit_byte(0xe8+r);
1616     emit_byte(i);
1617     }
1618     }
1619     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1620    
1621     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1622     {
1623     emit_byte(0x66);
1624     emit_byte(0xc1);
1625     emit_byte(0xe8+r);
1626     emit_byte(i);
1627     }
1628     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1629    
1630     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1631     {
1632     if (optimize_shift_once && (i == 1)) {
1633     emit_byte(0xd0);
1634     emit_byte(0xe8+r);
1635     }
1636     else {
1637     emit_byte(0xc0);
1638     emit_byte(0xe8+r);
1639     emit_byte(i);
1640     }
1641     }
1642     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1643    
1644     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1645     {
1646     if (optimize_shift_once && (i == 1)) {
1647     emit_byte(0xd1);
1648     emit_byte(0xf8+r);
1649     }
1650     else {
1651     emit_byte(0xc1);
1652     emit_byte(0xf8+r);
1653     emit_byte(i);
1654     }
1655     }
1656     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1657    
1658     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1659     {
1660     emit_byte(0x66);
1661     emit_byte(0xc1);
1662     emit_byte(0xf8+r);
1663     emit_byte(i);
1664     }
1665     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1666    
1667     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1668     {
1669     if (optimize_shift_once && (i == 1)) {
1670     emit_byte(0xd0);
1671     emit_byte(0xf8+r);
1672     }
1673     else {
1674     emit_byte(0xc0);
1675     emit_byte(0xf8+r);
1676     emit_byte(i);
1677     }
1678     }
1679     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1680    
1681     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1682     {
1683     emit_byte(0x9e);
1684     }
1685     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1686    
1687     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1688     {
1689     emit_byte(0x0f);
1690     emit_byte(0xa2);
1691     }
1692     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1693    
1694     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1695     {
1696     emit_byte(0x9f);
1697     }
1698     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1699    
1700     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1701     {
1702     emit_byte(0x0f);
1703     emit_byte(0x90+cc);
1704     emit_byte(0xc0+d);
1705     }
1706     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1707    
1708     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1709     {
1710     emit_byte(0x0f);
1711     emit_byte(0x90+cc);
1712     emit_byte(0x05);
1713     emit_long(d);
1714     }
1715     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1716    
1717     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1718     {
1719     if (have_cmov) {
1720     emit_byte(0x0f);
1721     emit_byte(0x40+cc);
1722     emit_byte(0xc0+8*d+s);
1723     }
1724     else { /* replacement using branch and mov */
1725     int uncc=(cc^1);
1726     emit_byte(0x70+uncc);
1727     emit_byte(2); /* skip next 2 bytes if not cc=true */
1728     emit_byte(0x89);
1729     emit_byte(0xc0+8*s+d);
1730     }
1731     }
1732     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1733    
1734     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1735     {
1736     emit_byte(0x0f);
1737     emit_byte(0xbc);
1738     emit_byte(0xc0+8*d+s);
1739     }
1740     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1741    
1742     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1743     {
1744     emit_byte(0x0f);
1745     emit_byte(0xbf);
1746     emit_byte(0xc0+8*d+s);
1747     }
1748     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1749    
1750     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1751     {
1752     emit_byte(0x0f);
1753     emit_byte(0xbe);
1754     emit_byte(0xc0+8*d+s);
1755     }
1756     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1757    
1758     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1759     {
1760     emit_byte(0x0f);
1761     emit_byte(0xb7);
1762     emit_byte(0xc0+8*d+s);
1763     }
1764     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1765    
1766     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1767     {
1768     emit_byte(0x0f);
1769     emit_byte(0xb6);
1770     emit_byte(0xc0+8*d+s);
1771     }
1772     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1773    
1774     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1775     {
1776     emit_byte(0x0f);
1777     emit_byte(0xaf);
1778     emit_byte(0xc0+8*d+s);
1779     }
1780     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1781    
1782     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1783     {
1784     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1785     abort();
1786     emit_byte(0xf7);
1787     emit_byte(0xea);
1788     }
1789     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1790    
1791     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1792     {
1793     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1794     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1795     abort();
1796     }
1797     emit_byte(0xf7);
1798     emit_byte(0xe2);
1799     }
1800     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1801    
1802     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1803     {
1804     abort(); /* %^$&%^$%#^ x86! */
1805     emit_byte(0x0f);
1806     emit_byte(0xaf);
1807     emit_byte(0xc0+8*d+s);
1808     }
1809     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1810    
1811     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1812     {
1813     emit_byte(0x88);
1814     emit_byte(0xc0+8*s+d);
1815     }
1816     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1817    
1818     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1819     {
1820     emit_byte(0x66);
1821     emit_byte(0x89);
1822     emit_byte(0xc0+8*s+d);
1823     }
1824     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1825    
1826     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1827     {
1828     int isebp=(baser==5)?0x40:0;
1829     int fi;
1830    
1831     switch(factor) {
1832     case 1: fi=0; break;
1833     case 2: fi=1; break;
1834     case 4: fi=2; break;
1835     case 8: fi=3; break;
1836     default: abort();
1837     }
1838    
1839    
1840     emit_byte(0x8b);
1841     emit_byte(0x04+8*d+isebp);
1842     emit_byte(baser+8*index+0x40*fi);
1843     if (isebp)
1844     emit_byte(0x00);
1845     }
1846     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1847    
1848     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1849     {
1850     int fi;
1851     int isebp;
1852    
1853     switch(factor) {
1854     case 1: fi=0; break;
1855     case 2: fi=1; break;
1856     case 4: fi=2; break;
1857     case 8: fi=3; break;
1858     default: abort();
1859     }
1860     isebp=(baser==5)?0x40:0;
1861    
1862     emit_byte(0x66);
1863     emit_byte(0x8b);
1864     emit_byte(0x04+8*d+isebp);
1865     emit_byte(baser+8*index+0x40*fi);
1866     if (isebp)
1867     emit_byte(0x00);
1868     }
1869     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1870    
1871     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1872     {
1873     int fi;
1874     int isebp;
1875    
1876     switch(factor) {
1877     case 1: fi=0; break;
1878     case 2: fi=1; break;
1879     case 4: fi=2; break;
1880     case 8: fi=3; break;
1881     default: abort();
1882     }
1883     isebp=(baser==5)?0x40:0;
1884    
1885     emit_byte(0x8a);
1886     emit_byte(0x04+8*d+isebp);
1887     emit_byte(baser+8*index+0x40*fi);
1888     if (isebp)
1889     emit_byte(0x00);
1890     }
1891     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1892    
1893     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1894     {
1895     int fi;
1896     int isebp;
1897    
1898     switch(factor) {
1899     case 1: fi=0; break;
1900     case 2: fi=1; break;
1901     case 4: fi=2; break;
1902     case 8: fi=3; break;
1903     default: abort();
1904     }
1905    
1906    
1907     isebp=(baser==5)?0x40:0;
1908    
1909     emit_byte(0x89);
1910     emit_byte(0x04+8*s+isebp);
1911     emit_byte(baser+8*index+0x40*fi);
1912     if (isebp)
1913     emit_byte(0x00);
1914     }
1915     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1916    
1917     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1918     {
1919     int fi;
1920     int isebp;
1921    
1922     switch(factor) {
1923     case 1: fi=0; break;
1924     case 2: fi=1; break;
1925     case 4: fi=2; break;
1926     case 8: fi=3; break;
1927     default: abort();
1928     }
1929     isebp=(baser==5)?0x40:0;
1930    
1931     emit_byte(0x66);
1932     emit_byte(0x89);
1933     emit_byte(0x04+8*s+isebp);
1934     emit_byte(baser+8*index+0x40*fi);
1935     if (isebp)
1936     emit_byte(0x00);
1937     }
1938     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1939    
1940     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1941     {
1942     int fi;
1943     int isebp;
1944    
1945     switch(factor) {
1946     case 1: fi=0; break;
1947     case 2: fi=1; break;
1948     case 4: fi=2; break;
1949     case 8: fi=3; break;
1950     default: abort();
1951     }
1952     isebp=(baser==5)?0x40:0;
1953    
1954     emit_byte(0x88);
1955     emit_byte(0x04+8*s+isebp);
1956     emit_byte(baser+8*index+0x40*fi);
1957     if (isebp)
1958     emit_byte(0x00);
1959     }
1960     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1961    
1962     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1963     {
1964     int fi;
1965    
1966     switch(factor) {
1967     case 1: fi=0; break;
1968     case 2: fi=1; break;
1969     case 4: fi=2; break;
1970     case 8: fi=3; break;
1971     default: abort();
1972     }
1973    
1974     emit_byte(0x89);
1975     emit_byte(0x84+8*s);
1976     emit_byte(baser+8*index+0x40*fi);
1977     emit_long(base);
1978     }
1979     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1980    
1981     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1982     {
1983     int fi;
1984    
1985     switch(factor) {
1986     case 1: fi=0; break;
1987     case 2: fi=1; break;
1988     case 4: fi=2; break;
1989     case 8: fi=3; break;
1990     default: abort();
1991     }
1992    
1993     emit_byte(0x66);
1994     emit_byte(0x89);
1995     emit_byte(0x84+8*s);
1996     emit_byte(baser+8*index+0x40*fi);
1997     emit_long(base);
1998     }
1999     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2000    
2001     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2002     {
2003     int fi;
2004    
2005     switch(factor) {
2006     case 1: fi=0; break;
2007     case 2: fi=1; break;
2008     case 4: fi=2; break;
2009     case 8: fi=3; break;
2010     default: abort();
2011     }
2012    
2013     emit_byte(0x88);
2014     emit_byte(0x84+8*s);
2015     emit_byte(baser+8*index+0x40*fi);
2016     emit_long(base);
2017     }
2018     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2019    
2020     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2021     {
2022     int fi;
2023    
2024     switch(factor) {
2025     case 1: fi=0; break;
2026     case 2: fi=1; break;
2027     case 4: fi=2; break;
2028     case 8: fi=3; break;
2029     default: abort();
2030     }
2031    
2032     emit_byte(0x8b);
2033     emit_byte(0x84+8*d);
2034     emit_byte(baser+8*index+0x40*fi);
2035     emit_long(base);
2036     }
2037     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2038    
2039     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2040     {
2041     int fi;
2042    
2043     switch(factor) {
2044     case 1: fi=0; break;
2045     case 2: fi=1; break;
2046     case 4: fi=2; break;
2047     case 8: fi=3; break;
2048     default: abort();
2049     }
2050    
2051     emit_byte(0x66);
2052     emit_byte(0x8b);
2053     emit_byte(0x84+8*d);
2054     emit_byte(baser+8*index+0x40*fi);
2055     emit_long(base);
2056     }
2057     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2058    
2059     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2060     {
2061     int fi;
2062    
2063     switch(factor) {
2064     case 1: fi=0; break;
2065     case 2: fi=1; break;
2066     case 4: fi=2; break;
2067     case 8: fi=3; break;
2068     default: abort();
2069     }
2070    
2071     emit_byte(0x8a);
2072     emit_byte(0x84+8*d);
2073     emit_byte(baser+8*index+0x40*fi);
2074     emit_long(base);
2075     }
2076     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2077    
2078     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2079     {
2080     int fi;
2081     switch(factor) {
2082     case 1: fi=0; break;
2083     case 2: fi=1; break;
2084     case 4: fi=2; break;
2085     case 8: fi=3; break;
2086     default:
2087     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2088     abort();
2089     }
2090     emit_byte(0x8b);
2091     emit_byte(0x04+8*d);
2092     emit_byte(0x05+8*index+64*fi);
2093     emit_long(base);
2094     }
2095     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2096    
2097     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2098     {
2099     int fi;
2100     switch(factor) {
2101     case 1: fi=0; break;
2102     case 2: fi=1; break;
2103     case 4: fi=2; break;
2104     case 8: fi=3; break;
2105     default:
2106     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2107     abort();
2108     }
2109     if (have_cmov) {
2110     emit_byte(0x0f);
2111     emit_byte(0x40+cond);
2112     emit_byte(0x04+8*d);
2113     emit_byte(0x05+8*index+64*fi);
2114     emit_long(base);
2115     }
2116     else { /* replacement using branch and mov */
2117     int uncc=(cond^1);
2118     emit_byte(0x70+uncc);
2119     emit_byte(7); /* skip next 7 bytes if not cc=true */
2120     emit_byte(0x8b);
2121     emit_byte(0x04+8*d);
2122     emit_byte(0x05+8*index+64*fi);
2123     emit_long(base);
2124     }
2125     }
2126     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2127    
2128     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2129     {
2130     if (have_cmov) {
2131     emit_byte(0x0f);
2132     emit_byte(0x40+cond);
2133     emit_byte(0x05+8*d);
2134     emit_long(mem);
2135     }
2136     else { /* replacement using branch and mov */
2137     int uncc=(cond^1);
2138     emit_byte(0x70+uncc);
2139     emit_byte(6); /* skip next 6 bytes if not cc=true */
2140     emit_byte(0x8b);
2141     emit_byte(0x05+8*d);
2142     emit_long(mem);
2143     }
2144     }
2145     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2146    
2147     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2148     {
2149 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2150 gbeauche 1.1 emit_byte(0x8b);
2151     emit_byte(0x40+8*d+s);
2152     emit_byte(offset);
2153     }
2154     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2155    
2156     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2157     {
2158 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2159 gbeauche 1.1 emit_byte(0x66);
2160     emit_byte(0x8b);
2161     emit_byte(0x40+8*d+s);
2162     emit_byte(offset);
2163     }
2164     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2165    
2166     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2167     {
2168 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2169 gbeauche 1.1 emit_byte(0x8a);
2170     emit_byte(0x40+8*d+s);
2171     emit_byte(offset);
2172     }
2173     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2174    
2175     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2176     {
2177     emit_byte(0x8b);
2178     emit_byte(0x80+8*d+s);
2179     emit_long(offset);
2180     }
2181     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2182    
2183     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2184     {
2185     emit_byte(0x66);
2186     emit_byte(0x8b);
2187     emit_byte(0x80+8*d+s);
2188     emit_long(offset);
2189     }
2190     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2191    
2192     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2193     {
2194     emit_byte(0x8a);
2195     emit_byte(0x80+8*d+s);
2196     emit_long(offset);
2197     }
2198     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2199    
2200     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2201     {
2202 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2203 gbeauche 1.1 emit_byte(0xc7);
2204     emit_byte(0x40+d);
2205     emit_byte(offset);
2206     emit_long(i);
2207     }
2208     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2209    
2210     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2211     {
2212 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2213 gbeauche 1.1 emit_byte(0x66);
2214     emit_byte(0xc7);
2215     emit_byte(0x40+d);
2216     emit_byte(offset);
2217     emit_word(i);
2218     }
2219     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2220    
2221     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2222     {
2223 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2224 gbeauche 1.1 emit_byte(0xc6);
2225     emit_byte(0x40+d);
2226     emit_byte(offset);
2227     emit_byte(i);
2228     }
2229     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2230    
2231     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2232     {
2233 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2234 gbeauche 1.1 emit_byte(0x89);
2235     emit_byte(0x40+8*s+d);
2236     emit_byte(offset);
2237     }
2238     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2239    
2240     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2241     {
2242 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2243 gbeauche 1.1 emit_byte(0x66);
2244     emit_byte(0x89);
2245     emit_byte(0x40+8*s+d);
2246     emit_byte(offset);
2247     }
2248     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2249    
2250     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2251     {
2252 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2253 gbeauche 1.1 emit_byte(0x88);
2254     emit_byte(0x40+8*s+d);
2255     emit_byte(offset);
2256     }
2257     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2258    
2259     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2260     {
2261     if (optimize_imm8 && isbyte(offset)) {
2262     emit_byte(0x8d);
2263     emit_byte(0x40+8*d+s);
2264     emit_byte(offset);
2265     }
2266     else {
2267     emit_byte(0x8d);
2268     emit_byte(0x80+8*d+s);
2269     emit_long(offset);
2270     }
2271     }
2272     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2273    
2274     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2275     {
2276     int fi;
2277    
2278     switch(factor) {
2279     case 1: fi=0; break;
2280     case 2: fi=1; break;
2281     case 4: fi=2; break;
2282     case 8: fi=3; break;
2283     default: abort();
2284     }
2285    
2286     if (optimize_imm8 && isbyte(offset)) {
2287     emit_byte(0x8d);
2288     emit_byte(0x44+8*d);
2289     emit_byte(0x40*fi+8*index+s);
2290     emit_byte(offset);
2291     }
2292     else {
2293     emit_byte(0x8d);
2294     emit_byte(0x84+8*d);
2295     emit_byte(0x40*fi+8*index+s);
2296     emit_long(offset);
2297     }
2298     }
2299     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2300    
2301     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2302     {
2303     int isebp=(s==5)?0x40:0;
2304     int fi;
2305    
2306     switch(factor) {
2307     case 1: fi=0; break;
2308     case 2: fi=1; break;
2309     case 4: fi=2; break;
2310     case 8: fi=3; break;
2311     default: abort();
2312     }
2313    
2314     emit_byte(0x8d);
2315     emit_byte(0x04+8*d+isebp);
2316     emit_byte(0x40*fi+8*index+s);
2317     if (isebp)
2318     emit_byte(0);
2319     }
2320     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2321    
2322     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2323     {
2324     if (optimize_imm8 && isbyte(offset)) {
2325     emit_byte(0x89);
2326     emit_byte(0x40+8*s+d);
2327     emit_byte(offset);
2328     }
2329     else {
2330     emit_byte(0x89);
2331     emit_byte(0x80+8*s+d);
2332     emit_long(offset);
2333     }
2334     }
2335     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2336    
2337     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2338     {
2339     emit_byte(0x66);
2340     emit_byte(0x89);
2341     emit_byte(0x80+8*s+d);
2342     emit_long(offset);
2343     }
2344     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2345    
2346     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2347     {
2348     if (optimize_imm8 && isbyte(offset)) {
2349     emit_byte(0x88);
2350     emit_byte(0x40+8*s+d);
2351     emit_byte(offset);
2352     }
2353     else {
2354     emit_byte(0x88);
2355     emit_byte(0x80+8*s+d);
2356     emit_long(offset);
2357     }
2358     }
2359     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2360    
2361     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2362     {
2363     emit_byte(0x0f);
2364     emit_byte(0xc8+r);
2365     }
2366     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2367    
2368     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2369     {
2370     emit_byte(0x66);
2371     emit_byte(0xc1);
2372     emit_byte(0xc0+r);
2373     emit_byte(0x08);
2374     }
2375     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2376    
2377     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2378     {
2379     emit_byte(0x89);
2380     emit_byte(0xc0+8*s+d);
2381     }
2382     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2383    
2384     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2385     {
2386     emit_byte(0x89);
2387     emit_byte(0x05+8*s);
2388     emit_long(d);
2389     }
2390     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2391    
2392     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2393     {
2394     emit_byte(0x66);
2395     emit_byte(0x89);
2396     emit_byte(0x05+8*s);
2397     emit_long(d);
2398     }
2399     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2400    
2401     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2402     {
2403     emit_byte(0x66);
2404     emit_byte(0x8b);
2405     emit_byte(0x05+8*d);
2406     emit_long(s);
2407     }
2408     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2409    
2410     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2411     {
2412     emit_byte(0x88);
2413     emit_byte(0x05+8*s);
2414     emit_long(d);
2415     }
2416     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2417    
2418     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2419     {
2420     emit_byte(0x8a);
2421     emit_byte(0x05+8*d);
2422     emit_long(s);
2423     }
2424     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2425    
2426     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2427     {
2428     emit_byte(0xb8+d);
2429     emit_long(s);
2430     }
2431     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2432    
2433     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2434     {
2435     emit_byte(0x66);
2436     emit_byte(0xb8+d);
2437     emit_word(s);
2438     }
2439     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2440    
2441     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2442     {
2443     emit_byte(0xb0+d);
2444     emit_byte(s);
2445     }
2446     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2447    
2448     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2449     {
2450     emit_byte(0x81);
2451     emit_byte(0x15);
2452     emit_long(d);
2453     emit_long(s);
2454     }
2455     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2456    
2457     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2458     {
2459     if (optimize_imm8 && isbyte(s)) {
2460     emit_byte(0x83);
2461     emit_byte(0x05);
2462     emit_long(d);
2463     emit_byte(s);
2464     }
2465     else {
2466     emit_byte(0x81);
2467     emit_byte(0x05);
2468     emit_long(d);
2469     emit_long(s);
2470     }
2471     }
2472     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2473    
2474     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2475     {
2476     emit_byte(0x66);
2477     emit_byte(0x81);
2478     emit_byte(0x05);
2479     emit_long(d);
2480     emit_word(s);
2481     }
2482     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2483    
2484     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2485     {
2486     emit_byte(0x80);
2487     emit_byte(0x05);
2488     emit_long(d);
2489     emit_byte(s);
2490     }
2491     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2492    
2493     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2494     {
2495 gbeauche 1.2 if (optimize_accum && isaccum(d))
2496     emit_byte(0xa9);
2497     else {
2498 gbeauche 1.1 emit_byte(0xf7);
2499     emit_byte(0xc0+d);
2500 gbeauche 1.2 }
2501 gbeauche 1.1 emit_long(i);
2502     }
2503     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2504    
2505     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2506     {
2507     emit_byte(0x85);
2508     emit_byte(0xc0+8*s+d);
2509     }
2510     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2511    
2512     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2513     {
2514     emit_byte(0x66);
2515     emit_byte(0x85);
2516     emit_byte(0xc0+8*s+d);
2517     }
2518     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2519    
2520     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2521     {
2522     emit_byte(0x84);
2523     emit_byte(0xc0+8*s+d);
2524     }
2525     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2526    
2527 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2528     {
2529     emit_byte(0x81);
2530     emit_byte(0xf0+d);
2531     emit_long(i);
2532     }
2533     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2534    
2535 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2536     {
2537     if (optimize_imm8 && isbyte(i)) {
2538 gbeauche 1.2 emit_byte(0x83);
2539     emit_byte(0xe0+d);
2540     emit_byte(i);
2541 gbeauche 1.1 }
2542     else {
2543 gbeauche 1.2 if (optimize_accum && isaccum(d))
2544     emit_byte(0x25);
2545     else {
2546     emit_byte(0x81);
2547     emit_byte(0xe0+d);
2548     }
2549     emit_long(i);
2550 gbeauche 1.1 }
2551     }
2552     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2553    
2554     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2555     {
2556 gbeauche 1.2 emit_byte(0x66);
2557     if (optimize_imm8 && isbyte(i)) {
2558     emit_byte(0x83);
2559     emit_byte(0xe0+d);
2560     emit_byte(i);
2561     }
2562     else {
2563     if (optimize_accum && isaccum(d))
2564     emit_byte(0x25);
2565     else {
2566     emit_byte(0x81);
2567     emit_byte(0xe0+d);
2568     }
2569     emit_word(i);
2570     }
2571 gbeauche 1.1 }
2572     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2573    
2574     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2575     {
2576     emit_byte(0x21);
2577     emit_byte(0xc0+8*s+d);
2578     }
2579     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2580    
2581     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2582     {
2583     emit_byte(0x66);
2584     emit_byte(0x21);
2585     emit_byte(0xc0+8*s+d);
2586     }
2587     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2588    
2589     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2590     {
2591     emit_byte(0x20);
2592     emit_byte(0xc0+8*s+d);
2593     }
2594     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2595    
2596     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2597     {
2598     if (optimize_imm8 && isbyte(i)) {
2599     emit_byte(0x83);
2600     emit_byte(0xc8+d);
2601     emit_byte(i);
2602     }
2603     else {
2604 gbeauche 1.2 if (optimize_accum && isaccum(d))
2605     emit_byte(0x0d);
2606     else {
2607 gbeauche 1.1 emit_byte(0x81);
2608     emit_byte(0xc8+d);
2609 gbeauche 1.2 }
2610 gbeauche 1.1 emit_long(i);
2611     }
2612     }
2613     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2614    
2615     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2616     {
2617     emit_byte(0x09);
2618     emit_byte(0xc0+8*s+d);
2619     }
2620     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2621    
2622     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2623     {
2624     emit_byte(0x66);
2625     emit_byte(0x09);
2626     emit_byte(0xc0+8*s+d);
2627     }
2628     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2629    
2630     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2631     {
2632     emit_byte(0x08);
2633     emit_byte(0xc0+8*s+d);
2634     }
2635     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2636    
2637     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2638     {
2639     emit_byte(0x11);
2640     emit_byte(0xc0+8*s+d);
2641     }
2642     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2643    
2644     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2645     {
2646     emit_byte(0x66);
2647     emit_byte(0x11);
2648     emit_byte(0xc0+8*s+d);
2649     }
2650     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2651    
2652     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2653     {
2654     emit_byte(0x10);
2655     emit_byte(0xc0+8*s+d);
2656     }
2657     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2658    
2659     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2660     {
2661     emit_byte(0x01);
2662     emit_byte(0xc0+8*s+d);
2663     }
2664     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2665    
2666     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2667     {
2668     emit_byte(0x66);
2669     emit_byte(0x01);
2670     emit_byte(0xc0+8*s+d);
2671     }
2672     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2673    
2674     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2675     {
2676     emit_byte(0x00);
2677     emit_byte(0xc0+8*s+d);
2678     }
2679     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2680    
2681     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2682     {
2683     if (isbyte(i)) {
2684     emit_byte(0x83);
2685     emit_byte(0xe8+d);
2686     emit_byte(i);
2687     }
2688     else {
2689 gbeauche 1.2 if (optimize_accum && isaccum(d))
2690     emit_byte(0x2d);
2691     else {
2692 gbeauche 1.1 emit_byte(0x81);
2693     emit_byte(0xe8+d);
2694 gbeauche 1.2 }
2695 gbeauche 1.1 emit_long(i);
2696     }
2697     }
2698     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2699    
2700     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2701     {
2702 gbeauche 1.2 if (optimize_accum && isaccum(d))
2703     emit_byte(0x2c);
2704     else {
2705 gbeauche 1.1 emit_byte(0x80);
2706     emit_byte(0xe8+d);
2707 gbeauche 1.2 }
2708 gbeauche 1.1 emit_byte(i);
2709     }
2710     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2711    
2712     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2713     {
2714     if (isbyte(i)) {
2715     emit_byte(0x83);
2716     emit_byte(0xc0+d);
2717     emit_byte(i);
2718     }
2719     else {
2720 gbeauche 1.2 if (optimize_accum && isaccum(d))
2721     emit_byte(0x05);
2722     else {
2723 gbeauche 1.1 emit_byte(0x81);
2724     emit_byte(0xc0+d);
2725 gbeauche 1.2 }
2726 gbeauche 1.1 emit_long(i);
2727     }
2728     }
2729     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2730    
2731     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2732     {
2733 gbeauche 1.2 emit_byte(0x66);
2734 gbeauche 1.1 if (isbyte(i)) {
2735     emit_byte(0x83);
2736     emit_byte(0xc0+d);
2737     emit_byte(i);
2738     }
2739     else {
2740 gbeauche 1.2 if (optimize_accum && isaccum(d))
2741     emit_byte(0x05);
2742     else {
2743 gbeauche 1.1 emit_byte(0x81);
2744     emit_byte(0xc0+d);
2745 gbeauche 1.2 }
2746 gbeauche 1.1 emit_word(i);
2747     }
2748     }
2749     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2750    
2751     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2752     {
2753 gbeauche 1.2 if (optimize_accum && isaccum(d))
2754     emit_byte(0x04);
2755     else {
2756     emit_byte(0x80);
2757     emit_byte(0xc0+d);
2758     }
2759 gbeauche 1.1 emit_byte(i);
2760     }
2761     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2762    
2763     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2764     {
2765     emit_byte(0x19);
2766     emit_byte(0xc0+8*s+d);
2767     }
2768     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2769    
2770     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2771     {
2772     emit_byte(0x66);
2773     emit_byte(0x19);
2774     emit_byte(0xc0+8*s+d);
2775     }
2776     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2777    
2778     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2779     {
2780     emit_byte(0x18);
2781     emit_byte(0xc0+8*s+d);
2782     }
2783     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2784    
2785     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2786     {
2787     emit_byte(0x29);
2788     emit_byte(0xc0+8*s+d);
2789     }
2790     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2791    
2792     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2793     {
2794     emit_byte(0x66);
2795     emit_byte(0x29);
2796     emit_byte(0xc0+8*s+d);
2797     }
2798     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2799    
2800     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2801     {
2802     emit_byte(0x28);
2803     emit_byte(0xc0+8*s+d);
2804     }
2805     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2806    
2807     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2808     {
2809     emit_byte(0x39);
2810     emit_byte(0xc0+8*s+d);
2811     }
2812     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2813    
2814     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2815     {
2816     if (optimize_imm8 && isbyte(i)) {
2817     emit_byte(0x83);
2818     emit_byte(0xf8+r);
2819     emit_byte(i);
2820     }
2821     else {
2822 gbeauche 1.2 if (optimize_accum && isaccum(r))
2823     emit_byte(0x3d);
2824     else {
2825 gbeauche 1.1 emit_byte(0x81);
2826     emit_byte(0xf8+r);
2827 gbeauche 1.2 }
2828 gbeauche 1.1 emit_long(i);
2829     }
2830     }
2831     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2832    
2833     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2834     {
2835     emit_byte(0x66);
2836     emit_byte(0x39);
2837     emit_byte(0xc0+8*s+d);
2838     }
2839     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2840    
2841 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2842     {
2843     emit_byte(0x80);
2844     emit_byte(0x3d);
2845     emit_long(d);
2846     emit_byte(s);
2847     }
2848     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2849    
2850 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2851     {
2852 gbeauche 1.2 if (optimize_accum && isaccum(d))
2853     emit_byte(0x3c);
2854     else {
2855 gbeauche 1.1 emit_byte(0x80);
2856     emit_byte(0xf8+d);
2857 gbeauche 1.2 }
2858 gbeauche 1.1 emit_byte(i);
2859     }
2860     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2861    
2862     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2863     {
2864     emit_byte(0x38);
2865     emit_byte(0xc0+8*s+d);
2866     }
2867     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2868    
2869     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2870     {
2871     int fi;
2872    
2873     switch(factor) {
2874     case 1: fi=0; break;
2875     case 2: fi=1; break;
2876     case 4: fi=2; break;
2877     case 8: fi=3; break;
2878     default: abort();
2879     }
2880     emit_byte(0x39);
2881     emit_byte(0x04+8*d);
2882     emit_byte(5+8*index+0x40*fi);
2883     emit_long(offset);
2884     }
2885     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2886    
2887     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2888     {
2889     emit_byte(0x31);
2890     emit_byte(0xc0+8*s+d);
2891     }
2892     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2893    
2894     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2895     {
2896     emit_byte(0x66);
2897     emit_byte(0x31);
2898     emit_byte(0xc0+8*s+d);
2899     }
2900     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2901    
2902     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2903     {
2904     emit_byte(0x30);
2905     emit_byte(0xc0+8*s+d);
2906     }
2907     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2908    
2909     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2910     {
2911     if (optimize_imm8 && isbyte(s)) {
2912     emit_byte(0x83);
2913     emit_byte(0x2d);
2914     emit_long(d);
2915     emit_byte(s);
2916     }
2917     else {
2918     emit_byte(0x81);
2919     emit_byte(0x2d);
2920     emit_long(d);
2921     emit_long(s);
2922     }
2923     }
2924     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2925    
2926     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2927     {
2928     if (optimize_imm8 && isbyte(s)) {
2929     emit_byte(0x83);
2930     emit_byte(0x3d);
2931     emit_long(d);
2932     emit_byte(s);
2933     }
2934     else {
2935     emit_byte(0x81);
2936     emit_byte(0x3d);
2937     emit_long(d);
2938     emit_long(s);
2939     }
2940     }
2941     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2942    
2943     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2944     {
2945     emit_byte(0x87);
2946     emit_byte(0xc0+8*r1+r2);
2947     }
2948     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2949    
2950     /*************************************************************************
2951     * FIXME: mem access modes probably wrong *
2952     *************************************************************************/
2953    
2954     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2955     {
2956     emit_byte(0x9c);
2957     }
2958     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2959    
2960     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2961     {
2962     emit_byte(0x9d);
2963     }
2964     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2965 gbeauche 1.13
2966     #endif
2967 gbeauche 1.1
2968     /*************************************************************************
2969     * Unoptimizable stuff --- jump *
2970     *************************************************************************/
2971    
2972     static __inline__ void raw_call_r(R4 r)
2973     {
2974 gbeauche 1.20 #if USE_NEW_RTASM
2975     CALLsr(r);
2976     #else
2977 gbeauche 1.1 emit_byte(0xff);
2978     emit_byte(0xd0+r);
2979 gbeauche 1.20 #endif
2980 gbeauche 1.5 }
2981    
2982     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2983     {
2984 gbeauche 1.20 #if USE_NEW_RTASM
2985     CALLsm(base, X86_NOREG, r, m);
2986     #else
2987 gbeauche 1.5 int mu;
2988     switch(m) {
2989     case 1: mu=0; break;
2990     case 2: mu=1; break;
2991     case 4: mu=2; break;
2992     case 8: mu=3; break;
2993     default: abort();
2994     }
2995     emit_byte(0xff);
2996     emit_byte(0x14);
2997     emit_byte(0x05+8*r+0x40*mu);
2998     emit_long(base);
2999 gbeauche 1.20 #endif
3000 gbeauche 1.1 }
3001    
3002     static __inline__ void raw_jmp_r(R4 r)
3003     {
3004 gbeauche 1.20 #if USE_NEW_RTASM
3005     JMPsr(r);
3006     #else
3007 gbeauche 1.1 emit_byte(0xff);
3008     emit_byte(0xe0+r);
3009 gbeauche 1.20 #endif
3010 gbeauche 1.1 }
3011    
3012     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3013     {
3014 gbeauche 1.20 #if USE_NEW_RTASM
3015     JMPsm(base, X86_NOREG, r, m);
3016     #else
3017 gbeauche 1.1 int mu;
3018     switch(m) {
3019     case 1: mu=0; break;
3020     case 2: mu=1; break;
3021     case 4: mu=2; break;
3022     case 8: mu=3; break;
3023     default: abort();
3024     }
3025     emit_byte(0xff);
3026     emit_byte(0x24);
3027     emit_byte(0x05+8*r+0x40*mu);
3028     emit_long(base);
3029 gbeauche 1.20 #endif
3030 gbeauche 1.1 }
3031    
3032     static __inline__ void raw_jmp_m(uae_u32 base)
3033     {
3034     emit_byte(0xff);
3035     emit_byte(0x25);
3036     emit_long(base);
3037     }
3038    
3039    
3040     static __inline__ void raw_call(uae_u32 t)
3041     {
3042 gbeauche 1.20 #if USE_NEW_RTASM
3043     CALLm(t);
3044     #else
3045 gbeauche 1.1 emit_byte(0xe8);
3046     emit_long(t-(uae_u32)target-4);
3047 gbeauche 1.20 #endif
3048 gbeauche 1.1 }
3049    
3050     static __inline__ void raw_jmp(uae_u32 t)
3051     {
3052 gbeauche 1.20 #if USE_NEW_RTASM
3053     JMPm(t);
3054     #else
3055 gbeauche 1.1 emit_byte(0xe9);
3056     emit_long(t-(uae_u32)target-4);
3057 gbeauche 1.20 #endif
3058 gbeauche 1.1 }
3059    
3060     static __inline__ void raw_jl(uae_u32 t)
3061     {
3062     emit_byte(0x0f);
3063     emit_byte(0x8c);
3064 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3065 gbeauche 1.1 }
3066    
3067     static __inline__ void raw_jz(uae_u32 t)
3068     {
3069     emit_byte(0x0f);
3070     emit_byte(0x84);
3071 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3072 gbeauche 1.1 }
3073    
3074     static __inline__ void raw_jnz(uae_u32 t)
3075     {
3076     emit_byte(0x0f);
3077     emit_byte(0x85);
3078 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3079 gbeauche 1.1 }
3080    
3081     static __inline__ void raw_jnz_l_oponly(void)
3082     {
3083     emit_byte(0x0f);
3084     emit_byte(0x85);
3085     }
3086    
3087     static __inline__ void raw_jcc_l_oponly(int cc)
3088     {
3089     emit_byte(0x0f);
3090     emit_byte(0x80+cc);
3091     }
3092    
3093     static __inline__ void raw_jnz_b_oponly(void)
3094     {
3095     emit_byte(0x75);
3096     }
3097    
3098     static __inline__ void raw_jz_b_oponly(void)
3099     {
3100     emit_byte(0x74);
3101     }
3102    
3103     static __inline__ void raw_jcc_b_oponly(int cc)
3104     {
3105     emit_byte(0x70+cc);
3106     }
3107    
3108     static __inline__ void raw_jmp_l_oponly(void)
3109     {
3110     emit_byte(0xe9);
3111     }
3112    
3113     static __inline__ void raw_jmp_b_oponly(void)
3114     {
3115     emit_byte(0xeb);
3116     }
3117    
3118     static __inline__ void raw_ret(void)
3119     {
3120     emit_byte(0xc3);
3121     }
3122    
3123     static __inline__ void raw_nop(void)
3124     {
3125     emit_byte(0x90);
3126     }
3127    
3128 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3129     {
3130     /* Source: GNU Binutils 2.12.90.0.15 */
3131     /* Various efficient no-op patterns for aligning code labels.
3132     Note: Don't try to assemble the instructions in the comments.
3133     0L and 0w are not legal. */
3134     static const uae_u8 f32_1[] =
3135     {0x90}; /* nop */
3136     static const uae_u8 f32_2[] =
3137     {0x89,0xf6}; /* movl %esi,%esi */
3138     static const uae_u8 f32_3[] =
3139     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3140     static const uae_u8 f32_4[] =
3141     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3142     static const uae_u8 f32_5[] =
3143     {0x90, /* nop */
3144     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3145     static const uae_u8 f32_6[] =
3146     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3147     static const uae_u8 f32_7[] =
3148     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3149     static const uae_u8 f32_8[] =
3150     {0x90, /* nop */
3151     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3152     static const uae_u8 f32_9[] =
3153     {0x89,0xf6, /* movl %esi,%esi */
3154     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3155     static const uae_u8 f32_10[] =
3156     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3157     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3158     static const uae_u8 f32_11[] =
3159     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3160     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3161     static const uae_u8 f32_12[] =
3162     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3163     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3164     static const uae_u8 f32_13[] =
3165     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3166     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3167     static const uae_u8 f32_14[] =
3168     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3169     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3170     static const uae_u8 f32_15[] =
3171     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3172     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3173     static const uae_u8 f32_16[] =
3174     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3175     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3176     static const uae_u8 *const f32_patt[] = {
3177     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3178     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3179     };
3180 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3181 gbeauche 1.8
3182 gbeauche 1.21 #if defined(__x86_64__)
3183     /* The recommended way to pad 64bit code is to use NOPs preceded by
3184     maximally four 0x66 prefixes. Balance the size of nops. */
3185     if (nbytes == 0)
3186     return;
3187    
3188     int i;
3189     int nnops = (nbytes + 3) / 4;
3190     int len = nbytes / nnops;
3191     int remains = nbytes - nnops * len;
3192    
3193     for (i = 0; i < remains; i++) {
3194     emit_block(prefixes, len);
3195     raw_nop();
3196     }
3197     for (; i < nnops; i++) {
3198     emit_block(prefixes, len - 1);
3199     raw_nop();
3200     }
3201     #else
3202 gbeauche 1.8 int nloops = nbytes / 16;
3203     while (nloops-- > 0)
3204     emit_block(f32_16, sizeof(f32_16));
3205    
3206     nbytes %= 16;
3207     if (nbytes)
3208     emit_block(f32_patt[nbytes - 1], nbytes);
3209 gbeauche 1.21 #endif
3210 gbeauche 1.8 }
3211    
3212 gbeauche 1.1
3213     /*************************************************************************
3214     * Flag handling, to and fro UAE flag register *
3215     *************************************************************************/
3216    
3217     #ifdef SAHF_SETO_PROFITABLE
3218    
3219     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3220    
3221     static __inline__ void raw_flags_to_reg(int r)
3222     {
3223     raw_lahf(0); /* Most flags in AH */
3224     //raw_setcc(r,0); /* V flag in AL */
3225 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3226 gbeauche 1.1
3227     #if 1 /* Let's avoid those nasty partial register stalls */
3228 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3229     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3230 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3231     live.state[FLAGTMP].status=INMEM;
3232     live.state[FLAGTMP].realreg=-1;
3233     /* We just "evicted" FLAGTMP. */
3234     if (live.nat[r].nholds!=1) {
3235     /* Huh? */
3236     abort();
3237     }
3238     live.nat[r].nholds=0;
3239     #endif
3240     }
3241    
3242     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3243     static __inline__ void raw_reg_to_flags(int r)
3244     {
3245     raw_cmp_b_ri(r,-127); /* set V */
3246     raw_sahf(0);
3247     }
3248    
3249 gbeauche 1.24 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3250     static __inline__ void raw_flags_set_zero(int s, int tmp)
3251     {
3252     raw_mov_l_rr(tmp,s);
3253     raw_lahf(s); /* flags into ah */
3254     raw_and_l_ri(s,0xffffbfff);
3255     raw_and_l_ri(tmp,0x00004000);
3256     raw_xor_l_ri(tmp,0x00004000);
3257     raw_or_l(s,tmp);
3258     raw_sahf(s);
3259     }
3260    
3261 gbeauche 1.1 #else
3262    
3263     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3264     static __inline__ void raw_flags_to_reg(int r)
3265     {
3266     raw_pushfl();
3267     raw_pop_l_r(r);
3268 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3269 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3270     live.state[FLAGTMP].status=INMEM;
3271     live.state[FLAGTMP].realreg=-1;
3272     /* We just "evicted" FLAGTMP. */
3273     if (live.nat[r].nholds!=1) {
3274     /* Huh? */
3275     abort();
3276     }
3277     live.nat[r].nholds=0;
3278     }
3279    
3280     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3281     static __inline__ void raw_reg_to_flags(int r)
3282     {
3283     raw_push_l_r(r);
3284     raw_popfl();
3285     }
3286    
3287 gbeauche 1.24 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3288     static __inline__ void raw_flags_set_zero(int s, int tmp)
3289     {
3290     raw_mov_l_rr(tmp,s);
3291     raw_pushfl();
3292     raw_pop_l_r(s);
3293     raw_and_l_ri(s,0xffffffbf);
3294     raw_and_l_ri(tmp,0x00000040);
3295     raw_xor_l_ri(tmp,0x00000040);
3296     raw_or_l(s,tmp);
3297     raw_push_l_r(s);
3298     raw_popfl();
3299     }
3300 gbeauche 1.1 #endif
3301    
3302     /* Apparently, there are enough instructions between flag store and
3303     flag reload to avoid the partial memory stall */
3304     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3305     {
3306     #if 1
3307 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3308 gbeauche 1.1 #else
3309 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3310     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3311 gbeauche 1.1 #endif
3312     }
3313    
3314     /* FLAGX is byte sized, and we *do* write it at that size */
3315     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3316     {
3317     if (live.nat[target].canbyte)
3318 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3319 gbeauche 1.1 else if (live.nat[target].canword)
3320 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3321 gbeauche 1.1 else
3322 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3323 gbeauche 1.1 }
3324    
3325     static __inline__ void raw_inc_sp(int off)
3326     {
3327 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
3328 gbeauche 1.1 }
3329    
3330     /*************************************************************************
3331     * Handling mistaken direct memory access *
3332     *************************************************************************/
3333    
3334     // gb-- I don't need that part for JIT Basilisk II
3335     #if defined(NATMEM_OFFSET) && 0
3336     #include <asm/sigcontext.h>
3337     #include <signal.h>
3338    
3339     #define SIG_READ 1
3340     #define SIG_WRITE 2
3341    
3342     static int in_handler=0;
3343     static uae_u8 veccode[256];
3344    
3345     static void vec(int x, struct sigcontext sc)
3346     {
3347     uae_u8* i=(uae_u8*)sc.eip;
3348     uae_u32 addr=sc.cr2;
3349     int r=-1;
3350     int size=4;
3351     int dir=-1;
3352     int len=0;
3353     int j;
3354    
3355     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3356     if (!canbang)
3357     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3358     if (in_handler)
3359     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3360    
3361     if (canbang && i>=compiled_code && i<=current_compile_p) {
3362     if (*i==0x66) {
3363     i++;
3364     size=2;
3365     len++;
3366     }
3367    
3368     switch(i[0]) {
3369     case 0x8a:
3370     if ((i[1]&0xc0)==0x80) {
3371     r=(i[1]>>3)&7;
3372     dir=SIG_READ;
3373     size=1;
3374     len+=6;
3375     break;
3376     }
3377     break;
3378     case 0x88:
3379     if ((i[1]&0xc0)==0x80) {
3380     r=(i[1]>>3)&7;
3381     dir=SIG_WRITE;
3382     size=1;
3383     len+=6;
3384     break;
3385     }
3386     break;
3387     case 0x8b:
3388     if ((i[1]&0xc0)==0x80) {
3389     r=(i[1]>>3)&7;
3390     dir=SIG_READ;
3391     len+=6;
3392     break;
3393     }
3394     if ((i[1]&0xc0)==0x40) {
3395     r=(i[1]>>3)&7;
3396     dir=SIG_READ;
3397     len+=3;
3398     break;
3399     }
3400     break;
3401     case 0x89:
3402     if ((i[1]&0xc0)==0x80) {
3403     r=(i[1]>>3)&7;
3404     dir=SIG_WRITE;
3405     len+=6;
3406     break;
3407     }
3408     if ((i[1]&0xc0)==0x40) {
3409     r=(i[1]>>3)&7;
3410     dir=SIG_WRITE;
3411     len+=3;
3412     break;
3413     }
3414     break;
3415     }
3416     }
3417    
3418     if (r!=-1) {
3419     void* pr=NULL;
3420     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3421    
3422     switch(r) {
3423     case 0: pr=&(sc.eax); break;
3424     case 1: pr=&(sc.ecx); break;
3425     case 2: pr=&(sc.edx); break;
3426     case 3: pr=&(sc.ebx); break;
3427     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3428     case 5: pr=(size>1)?
3429     (void*)(&(sc.ebp)):
3430     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3431     case 6: pr=(size>1)?
3432     (void*)(&(sc.esi)):
3433     (void*)(((uae_u8*)&(sc.edx))+1); break;
3434     case 7: pr=(size>1)?
3435     (void*)(&(sc.edi)):
3436     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3437     default: abort();
3438     }
3439     if (pr) {
3440     blockinfo* bi;
3441    
3442     if (currprefs.comp_oldsegv) {
3443     addr-=NATMEM_OFFSET;
3444    
3445     if ((addr>=0x10000000 && addr<0x40000000) ||
3446     (addr>=0x50000000)) {
3447     write_log("Suspicious address in %x SEGV handler.\n",addr);
3448     }
3449     if (dir==SIG_READ) {
3450     switch(size) {
3451     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3452     case 2: *((uae_u16*)pr)=get_word(addr); break;
3453     case 4: *((uae_u32*)pr)=get_long(addr); break;
3454     default: abort();
3455     }
3456     }
3457     else { /* write */
3458     switch(size) {
3459     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3460     case 2: put_word(addr,*((uae_u16*)pr)); break;
3461     case 4: put_long(addr,*((uae_u32*)pr)); break;
3462     default: abort();
3463     }
3464     }
3465     write_log("Handled one access!\n");
3466     fflush(stdout);
3467     segvcount++;
3468     sc.eip+=len;
3469     }
3470     else {
3471     void* tmp=target;
3472     int i;
3473     uae_u8 vecbuf[5];
3474    
3475     addr-=NATMEM_OFFSET;
3476    
3477     if ((addr>=0x10000000 && addr<0x40000000) ||
3478     (addr>=0x50000000)) {
3479     write_log("Suspicious address in %x SEGV handler.\n",addr);
3480     }
3481    
3482     target=(uae_u8*)sc.eip;
3483     for (i=0;i<5;i++)
3484     vecbuf[i]=target[i];
3485     emit_byte(0xe9);
3486 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3487 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3488    
3489     write_log("Handled one access!\n");
3490     fflush(stdout);
3491     segvcount++;
3492    
3493     target=veccode;
3494    
3495     if (dir==SIG_READ) {
3496     switch(size) {
3497     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3498     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3499     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3500     default: abort();
3501     }
3502     }
3503     else { /* write */
3504     switch(size) {
3505     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3506     case 2: put_word(addr,*((uae_u16*)pr)); break;
3507     case 4: put_long(addr,*((uae_u32*)pr)); break;
3508     default: abort();
3509     }
3510     }
3511     for (i=0;i<5;i++)
3512     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3513 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3514 gbeauche 1.1 emit_byte(0xe9);
3515 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3516 gbeauche 1.1 in_handler=1;
3517     target=tmp;
3518     }
3519     bi=active;
3520     while (bi) {
3521     if (bi->handler &&
3522     (uae_u8*)bi->direct_handler<=i &&
3523     (uae_u8*)bi->nexthandler>i) {
3524     write_log("deleted trigger (%p<%p<%p) %p\n",
3525     bi->handler,
3526     i,
3527     bi->nexthandler,
3528     bi->pc_p);
3529     invalidate_block(bi);
3530     raise_in_cl_list(bi);
3531     set_special(0);
3532     return;
3533     }
3534     bi=bi->next;
3535     }
3536     /* Not found in the active list. Might be a rom routine that
3537     is in the dormant list */
3538     bi=dormant;
3539     while (bi) {
3540     if (bi->handler &&
3541     (uae_u8*)bi->direct_handler<=i &&
3542     (uae_u8*)bi->nexthandler>i) {
3543     write_log("deleted trigger (%p<%p<%p) %p\n",
3544     bi->handler,
3545     i,
3546     bi->nexthandler,
3547     bi->pc_p);
3548     invalidate_block(bi);
3549     raise_in_cl_list(bi);
3550     set_special(0);
3551     return;
3552     }
3553     bi=bi->next;
3554     }
3555     write_log("Huh? Could not find trigger!\n");
3556     return;
3557     }
3558     }
3559     write_log("Can't handle access!\n");
3560     for (j=0;j<10;j++) {
3561     write_log("instruction byte %2d is %02x\n",j,i[j]);
3562     }
3563     write_log("Please send the above info (starting at \"fault address\") to\n"
3564     "bmeyer@csse.monash.edu.au\n"
3565     "This shouldn't happen ;-)\n");
3566     fflush(stdout);
3567     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3568     }
3569     #endif
3570    
3571    
3572     /*************************************************************************
3573     * Checking for CPU features *
3574     *************************************************************************/
3575    
3576 gbeauche 1.3 struct cpuinfo_x86 {
3577     uae_u8 x86; // CPU family
3578     uae_u8 x86_vendor; // CPU vendor
3579     uae_u8 x86_processor; // CPU canonical processor type
3580     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3581     uae_u32 x86_hwcap;
3582     uae_u8 x86_model;
3583     uae_u8 x86_mask;
3584     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3585     char x86_vendor_id[16];
3586     };
3587     struct cpuinfo_x86 cpuinfo;
3588    
3589     enum {
3590     X86_VENDOR_INTEL = 0,
3591     X86_VENDOR_CYRIX = 1,
3592     X86_VENDOR_AMD = 2,
3593     X86_VENDOR_UMC = 3,
3594     X86_VENDOR_NEXGEN = 4,
3595     X86_VENDOR_CENTAUR = 5,
3596     X86_VENDOR_RISE = 6,
3597     X86_VENDOR_TRANSMETA = 7,
3598     X86_VENDOR_NSC = 8,
3599     X86_VENDOR_UNKNOWN = 0xff
3600     };
3601    
3602     enum {
3603     X86_PROCESSOR_I386, /* 80386 */
3604     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3605     X86_PROCESSOR_PENTIUM,
3606     X86_PROCESSOR_PENTIUMPRO,
3607     X86_PROCESSOR_K6,
3608     X86_PROCESSOR_ATHLON,
3609     X86_PROCESSOR_PENTIUM4,
3610 gbeauche 1.28 X86_PROCESSOR_X86_64,
3611 gbeauche 1.3 X86_PROCESSOR_max
3612     };
3613    
3614     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3615     "80386",
3616     "80486",
3617     "Pentium",
3618     "PentiumPro",
3619     "K6",
3620     "Athlon",
3621 gbeauche 1.16 "Pentium4",
3622 gbeauche 1.28 "x86-64"
3623 gbeauche 1.3 };
3624    
3625     static struct ptt {
3626     const int align_loop;
3627     const int align_loop_max_skip;
3628     const int align_jump;
3629     const int align_jump_max_skip;
3630     const int align_func;
3631     }
3632     x86_alignments[X86_PROCESSOR_max] = {
3633     { 4, 3, 4, 3, 4 },
3634     { 16, 15, 16, 15, 16 },
3635     { 16, 7, 16, 7, 16 },
3636     { 16, 15, 16, 7, 16 },
3637     { 32, 7, 32, 7, 32 },
3638 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3639 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3640     { 16, 7, 16, 7, 16 }
3641 gbeauche 1.3 };
3642 gbeauche 1.1
3643 gbeauche 1.3 static void
3644     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3645 gbeauche 1.1 {
3646 gbeauche 1.3 char *v = c->x86_vendor_id;
3647    
3648     if (!strcmp(v, "GenuineIntel"))
3649     c->x86_vendor = X86_VENDOR_INTEL;
3650     else if (!strcmp(v, "AuthenticAMD"))
3651     c->x86_vendor = X86_VENDOR_AMD;
3652     else if (!strcmp(v, "CyrixInstead"))
3653     c->x86_vendor = X86_VENDOR_CYRIX;
3654     else if (!strcmp(v, "Geode by NSC"))
3655     c->x86_vendor = X86_VENDOR_NSC;
3656     else if (!strcmp(v, "UMC UMC UMC "))
3657     c->x86_vendor = X86_VENDOR_UMC;
3658     else if (!strcmp(v, "CentaurHauls"))
3659     c->x86_vendor = X86_VENDOR_CENTAUR;
3660     else if (!strcmp(v, "NexGenDriven"))
3661     c->x86_vendor = X86_VENDOR_NEXGEN;
3662     else if (!strcmp(v, "RiseRiseRise"))
3663     c->x86_vendor = X86_VENDOR_RISE;
3664     else if (!strcmp(v, "GenuineTMx86") ||
3665     !strcmp(v, "TransmetaCPU"))
3666     c->x86_vendor = X86_VENDOR_TRANSMETA;
3667     else
3668     c->x86_vendor = X86_VENDOR_UNKNOWN;
3669     }
3670 gbeauche 1.1
3671 gbeauche 1.3 static void
3672     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3673     {
3674 gbeauche 1.27 const int CPUID_SPACE = 4096;
3675     uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3676     if (cpuid_space == VM_MAP_FAILED)
3677     abort();
3678     vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3679    
3680 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3681 gbeauche 1.3 uae_u8* tmp=get_target();
3682 gbeauche 1.1
3683 gbeauche 1.20 s_op = op;
3684 gbeauche 1.3 set_target(cpuid_space);
3685     raw_push_l_r(0); /* eax */
3686     raw_push_l_r(1); /* ecx */
3687     raw_push_l_r(2); /* edx */
3688     raw_push_l_r(3); /* ebx */
3689 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3690 gbeauche 1.3 raw_cpuid(0);
3691 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3692     raw_mov_l_mr((uintptr)&s_ebx,3);
3693     raw_mov_l_mr((uintptr)&s_ecx,1);
3694     raw_mov_l_mr((uintptr)&s_edx,2);
3695 gbeauche 1.3 raw_pop_l_r(3);
3696     raw_pop_l_r(2);
3697     raw_pop_l_r(1);
3698     raw_pop_l_r(0);
3699     raw_ret();
3700     set_target(tmp);
3701 gbeauche 1.1
3702 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3703 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3704     if (ebx != NULL) *ebx = s_ebx;
3705     if (ecx != NULL) *ecx = s_ecx;
3706     if (edx != NULL) *edx = s_edx;
3707 gbeauche 1.27
3708     vm_release(cpuid_space, CPUID_SPACE);
3709 gbeauche 1.1 }
3710    
3711 gbeauche 1.3 static void
3712     raw_init_cpu(void)
3713 gbeauche 1.1 {
3714 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3715    
3716     /* Defaults */
3717 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3718 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3719     c->cpuid_level = -1; /* CPUID not detected */
3720     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3721     c->x86_vendor_id[0] = '\0'; /* Unset */
3722     c->x86_hwcap = 0;
3723    
3724     /* Get vendor name */
3725     c->x86_vendor_id[12] = '\0';
3726     cpuid(0x00000000,
3727     (uae_u32 *)&c->cpuid_level,
3728     (uae_u32 *)&c->x86_vendor_id[0],
3729     (uae_u32 *)&c->x86_vendor_id[8],
3730     (uae_u32 *)&c->x86_vendor_id[4]);
3731     x86_get_cpu_vendor(c);
3732    
3733     /* Intel-defined flags: level 0x00000001 */
3734     c->x86_brand_id = 0;
3735     if ( c->cpuid_level >= 0x00000001 ) {
3736     uae_u32 tfms, brand_id;
3737     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3738     c->x86 = (tfms >> 8) & 15;
3739 gbeauche 1.29 if (c->x86 == 0xf)
3740     c->x86 += (tfms >> 20) & 0xff; /* extended family */
3741 gbeauche 1.3 c->x86_model = (tfms >> 4) & 15;
3742 gbeauche 1.29 if (c->x86_model == 0xf)
3743     c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3744 gbeauche 1.3 c->x86_brand_id = brand_id & 0xff;
3745     c->x86_mask = tfms & 15;
3746     } else {
3747     /* Have CPUID level 0 only - unheard of */
3748     c->x86 = 4;
3749     }
3750    
3751 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3752     uae_u32 xlvl;
3753     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3754     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3755     if ( xlvl >= 0x80000001 ) {
3756 gbeauche 1.28 uae_u32 features, extra_features;
3757     cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3758 gbeauche 1.16 if (features & (1 << 29)) {
3759     /* Assume x86-64 if long mode is supported */
3760 gbeauche 1.28 c->x86_processor = X86_PROCESSOR_X86_64;
3761 gbeauche 1.16 }
3762 gbeauche 1.28 if (extra_features & (1 << 0))
3763     have_lahf_lm = true;
3764 gbeauche 1.16 }
3765     }
3766    
3767 gbeauche 1.3 /* Canonicalize processor ID */
3768     switch (c->x86) {
3769     case 3:
3770     c->x86_processor = X86_PROCESSOR_I386;
3771     break;
3772     case 4:
3773     c->x86_processor = X86_PROCESSOR_I486;
3774     break;
3775     case 5:
3776     if (c->x86_vendor == X86_VENDOR_AMD)
3777     c->x86_processor = X86_PROCESSOR_K6;
3778     else
3779     c->x86_processor = X86_PROCESSOR_PENTIUM;
3780     break;
3781     case 6:
3782     if (c->x86_vendor == X86_VENDOR_AMD)
3783     c->x86_processor = X86_PROCESSOR_ATHLON;
3784     else
3785     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3786     break;
3787     case 15:
3788 gbeauche 1.29 if (c->x86_processor == X86_PROCESSOR_max) {
3789     switch (c->x86_vendor) {
3790     case X86_VENDOR_INTEL:
3791     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3792     break;
3793     case X86_VENDOR_AMD:
3794     /* Assume a 32-bit Athlon processor if not in long mode */
3795     c->x86_processor = X86_PROCESSOR_ATHLON;
3796     break;
3797     }
3798     }
3799     break;
3800 gbeauche 1.3 }
3801     if (c->x86_processor == X86_PROCESSOR_max) {
3802     fprintf(stderr, "Error: unknown processor type\n");
3803     fprintf(stderr, " Family : %d\n", c->x86);
3804     fprintf(stderr, " Model : %d\n", c->x86_model);
3805     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3806 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3807 gbeauche 1.3 if (c->x86_brand_id)
3808     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3809     abort();
3810     }
3811    
3812     /* Have CMOV support? */
3813 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3814 gbeauche 1.3
3815     /* Can the host CPU suffer from partial register stalls? */
3816     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3817     #if 1
3818     /* It appears that partial register writes are a bad idea even on
3819 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3820     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3821 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3822     have_rat_stall = true;
3823 gbeauche 1.1 #endif
3824 gbeauche 1.3
3825     /* Alignments */
3826     if (tune_alignment) {
3827     align_loops = x86_alignments[c->x86_processor].align_loop;
3828     align_jumps = x86_alignments[c->x86_processor].align_jump;
3829     }
3830    
3831     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3832     c->cpuid_level, c->x86_vendor_id,
3833     x86_processor_string_table[c->x86_processor]);
3834 gbeauche 1.1 }
3835    
3836 gbeauche 1.10 static bool target_check_bsf(void)
3837     {
3838     bool mismatch = false;
3839     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3840     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3841     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3842     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3843     for (int value = -1; value <= 1; value++) {
3844 gbeauche 1.25 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3845     unsigned long tmp = value;
3846 gbeauche 1.10 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3847 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3848 gbeauche 1.10 int OF = (flags >> 11) & 1;
3849     int SF = (flags >> 7) & 1;
3850     int ZF = (flags >> 6) & 1;
3851     int CF = flags & 1;
3852     tmp = (value == 0);
3853     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3854     mismatch = true;
3855     }
3856     }}}}
3857     if (mismatch)
3858     write_log("Target CPU defines all flags on BSF instruction\n");
3859     return !mismatch;
3860     }
3861    
3862 gbeauche 1.1
3863     /*************************************************************************
3864     * FPU stuff *
3865     *************************************************************************/
3866    
3867    
3868     static __inline__ void raw_fp_init(void)
3869     {
3870     int i;
3871    
3872     for (i=0;i<N_FREGS;i++)
3873     live.spos[i]=-2;
3874     live.tos=-1; /* Stack is empty */
3875     }
3876    
3877     static __inline__ void raw_fp_cleanup_drop(void)
3878     {
3879     #if 0
3880     /* using FINIT instead of popping all the entries.
3881     Seems to have side effects --- there is display corruption in
3882     Quake when this is used */
3883     if (live.tos>1) {
3884     emit_byte(0x9b);
3885     emit_byte(0xdb);
3886     emit_byte(0xe3);
3887     live.tos=-1;
3888     }
3889     #endif
3890     while (live.tos>=1) {
3891     emit_byte(0xde);
3892     emit_byte(0xd9);
3893     live.tos-=2;
3894     }
3895     while (live.tos>=0) {
3896     emit_byte(0xdd);
3897     emit_byte(0xd8);
3898     live.tos--;
3899     }
3900     raw_fp_init();
3901     }
3902    
3903     static __inline__ void make_tos(int r)
3904     {
3905     int p,q;
3906    
3907     if (live.spos[r]<0) { /* Register not yet on stack */
3908     emit_byte(0xd9);
3909     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3910     live.tos++;
3911     live.spos[r]=live.tos;
3912     live.onstack[live.tos]=r;
3913     return;
3914     }
3915     /* Register is on stack */
3916     if (live.tos==live.spos[r])
3917     return;
3918     p=live.spos[r];
3919     q=live.onstack[live.tos];
3920    
3921     emit_byte(0xd9);
3922     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3923     live.onstack[live.tos]=r;
3924     live.spos[r]=live.tos;
3925     live.onstack[p]=q;
3926     live.spos[q]=p;
3927     }
3928    
3929     static __inline__ void make_tos2(int r, int r2)
3930     {
3931     int q;
3932    
3933     make_tos(r2); /* Put the reg that's supposed to end up in position2
3934     on top */
3935    
3936     if (live.spos[r]<0) { /* Register not yet on stack */
3937     make_tos(r); /* This will extend the stack */
3938     return;
3939     }
3940     /* Register is on stack */
3941     emit_byte(0xd9);
3942     emit_byte(0xc9); /* Move r2 into position 2 */
3943    
3944     q=live.onstack[live.tos-1];
3945     live.onstack[live.tos]=q;
3946     live.spos[q]=live.tos;
3947     live.onstack[live.tos-1]=r2;
3948     live.spos[r2]=live.tos-1;
3949    
3950     make_tos(r); /* And r into 1 */
3951     }
3952    
3953     static __inline__ int stackpos(int r)
3954     {
3955     if (live.spos[r]<0)
3956     abort();
3957     if (live.tos<live.spos[r]) {
3958     printf("Looking for spos for fnreg %d\n",r);
3959     abort();
3960     }
3961     return live.tos-live.spos[r];
3962     }
3963    
3964     static __inline__ void usereg(int r)
3965     {
3966     if (live.spos[r]<0)
3967     make_tos(r);
3968     }
3969    
3970     /* This is called with one FP value in a reg *above* tos, which it will
3971     pop off the stack if necessary */
3972     static __inline__ void tos_make(int r)
3973     {
3974     if (live.spos[r]<0) {
3975     live.tos++;
3976     live.spos[r]=live.tos;
3977     live.onstack[live.tos]=r;
3978     return;
3979     }
3980     emit_byte(0xdd);
3981     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3982     and pop it*/
3983     }
3984 gbeauche 1.23
3985     /* FP helper functions */
3986     #if USE_NEW_RTASM
3987     #define DEFINE_OP(NAME, GEN) \
3988     static inline void raw_##NAME(uint32 m) \
3989     { \
3990     GEN(m, X86_NOREG, X86_NOREG, 1); \
3991     }
3992     DEFINE_OP(fstl, FSTLm);
3993     DEFINE_OP(fstpl, FSTPLm);
3994     DEFINE_OP(fldl, FLDLm);
3995     DEFINE_OP(fildl, FILDLm);
3996     DEFINE_OP(fistl, FISTLm);
3997     DEFINE_OP(flds, FLDSm);
3998     DEFINE_OP(fsts, FSTSm);
3999     DEFINE_OP(fstpt, FSTPTm);
4000     DEFINE_OP(fldt, FLDTm);
4001     #else
4002     #define DEFINE_OP(NAME, OP1, OP2) \
4003     static inline void raw_##NAME(uint32 m) \
4004     { \
4005     emit_byte(OP1); \
4006     emit_byte(OP2); \
4007     emit_long(m); \
4008     }
4009     DEFINE_OP(fstl, 0xdd, 0x15);
4010     DEFINE_OP(fstpl, 0xdd, 0x1d);
4011     DEFINE_OP(fldl, 0xdd, 0x05);
4012     DEFINE_OP(fildl, 0xdb, 0x05);
4013     DEFINE_OP(fistl, 0xdb, 0x15);
4014     DEFINE_OP(flds, 0xd9, 0x05);
4015     DEFINE_OP(fsts, 0xd9, 0x15);
4016     DEFINE_OP(fstpt, 0xdb, 0x3d);
4017     DEFINE_OP(fldt, 0xdb, 0x2d);
4018     #endif
4019     #undef DEFINE_OP
4020    
4021 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4022     {
4023     make_tos(r);
4024 gbeauche 1.23 raw_fstl(m);
4025 gbeauche 1.1 }
4026     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4027    
4028     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4029     {
4030     make_tos(r);
4031 gbeauche 1.23 raw_fstpl(m);
4032 gbeauche 1.1 live.onstack[live.tos]=-1;
4033     live.tos--;
4034     live.spos[r]=-2;
4035     }
4036     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4037    
4038     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4039     {
4040 gbeauche 1.23 raw_fldl(m);
4041 gbeauche 1.1 tos_make(r);
4042     }
4043     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4044    
4045     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4046     {
4047 gbeauche 1.23 raw_fildl(m);
4048 gbeauche 1.1 tos_make(r);
4049     }
4050     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4051    
4052     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4053     {
4054     make_tos(r);
4055 gbeauche 1.23 raw_fistl(m);
4056 gbeauche 1.1 }
4057     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4058    
4059     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4060     {
4061 gbeauche 1.23 raw_flds(m);
4062 gbeauche 1.1 tos_make(r);
4063     }
4064     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4065    
4066     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4067     {
4068     make_tos(r);
4069 gbeauche 1.23 raw_fsts(m);
4070 gbeauche 1.1 }
4071     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4072    
4073     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4074     {
4075     int rs;
4076    
4077     /* Stupid x87 can't write a long double to mem without popping the
4078     stack! */
4079     usereg(r);
4080     rs=stackpos(r);
4081     emit_byte(0xd9); /* Get a copy to the top of stack */
4082     emit_byte(0xc0+rs);
4083    
4084 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4085 gbeauche 1.1 }
4086     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4087    
4088     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4089     {
4090     int rs;
4091    
4092     make_tos(r);
4093 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4094 gbeauche 1.1 live.onstack[live.tos]=-1;
4095     live.tos--;
4096     live.spos[r]=-2;
4097     }
4098     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4099    
4100     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4101     {
4102 gbeauche 1.23 raw_fldt(m);
4103 gbeauche 1.1 tos_make(r);
4104     }
4105     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4106    
4107     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4108     {
4109     emit_byte(0xd9);
4110     emit_byte(0xeb);
4111     tos_make(r);
4112     }
4113     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4114    
4115     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4116     {
4117     emit_byte(0xd9);
4118     emit_byte(0xec);
4119     tos_make(r);
4120     }
4121     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4122    
4123     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4124     {
4125     emit_byte(0xd9);
4126     emit_byte(0xea);
4127     tos_make(r);
4128     }
4129     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4130    
4131     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4132     {
4133     emit_byte(0xd9);
4134     emit_byte(0xed);
4135     tos_make(r);
4136     }
4137     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4138    
4139     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4140     {
4141     emit_byte(0xd9);
4142     emit_byte(0xe8);
4143     tos_make(r);
4144     }
4145     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4146    
4147     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4148     {
4149     emit_byte(0xd9);
4150     emit_byte(0xee);
4151     tos_make(r);
4152     }
4153     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4154    
4155     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4156     {
4157     int ds;
4158    
4159     usereg(s);
4160     ds=stackpos(s);
4161     if (ds==0 && live.spos[d]>=0) {
4162     /* source is on top of stack, and we already have the dest */
4163     int dd=stackpos(d);
4164     emit_byte(0xdd);
4165     emit_byte(0xd0+dd);
4166     }
4167     else {
4168     emit_byte(0xd9);
4169     emit_byte(0xc0+ds); /* duplicate source on tos */
4170     tos_make(d); /* store to destination, pop if necessary */
4171     }
4172     }
4173     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4174    
4175     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4176     {
4177     emit_byte(0xd9);
4178     emit_byte(0xa8+index);
4179     emit_long(base);
4180     }
4181     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4182    
4183    
4184     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4185     {
4186     int ds;
4187    
4188     if (d!=s) {
4189     usereg(s);
4190     ds=stackpos(s);
4191     emit_byte(0xd9);
4192     emit_byte(0xc0+ds); /* duplicate source */
4193     emit_byte(0xd9);
4194     emit_byte(0xfa); /* take square root */
4195     tos_make(d); /* store to destination */
4196     }
4197     else {
4198     make_tos(d);
4199     emit_byte(0xd9);
4200     emit_byte(0xfa); /* take square root */
4201     }
4202     }
4203     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4204    
4205     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4206     {
4207     int ds;
4208    
4209     if (d!=s) {
4210     usereg(s);
4211     ds=stackpos(s);
4212     emit_byte(0xd9);
4213     emit_byte(0xc0+ds); /* duplicate source */
4214     emit_byte(0xd9);
4215     emit_byte(0xe1); /* take fabs */
4216     tos_make(d); /* store to destination */
4217     }
4218     else {
4219     make_tos(d);
4220     emit_byte(0xd9);
4221     emit_byte(0xe1); /* take fabs */
4222     }
4223     }
4224     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4225    
4226     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4227     {
4228     int ds;
4229    
4230     if (d!=s) {
4231     usereg(s);
4232     ds=stackpos(s);
4233     emit_byte(0xd9);
4234     emit_byte(0xc0+ds); /* duplicate source */
4235     emit_byte(0xd9);
4236     emit_byte(0xfc); /* take frndint */
4237     tos_make(d); /* store to destination */
4238     }
4239     else {
4240     make_tos(d);
4241     emit_byte(0xd9);
4242     emit_byte(0xfc); /* take frndint */
4243     }
4244     }
4245     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4246    
4247     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4248     {
4249     int ds;
4250    
4251     if (d!=s) {
4252     usereg(s);
4253     ds=stackpos(s);
4254     emit_byte(0xd9);
4255     emit_byte(0xc0+ds); /* duplicate source */
4256     emit_byte(0xd9);
4257     emit_byte(0xff); /* take cos */
4258     tos_make(d); /* store to destination */
4259     }
4260     else {
4261     make_tos(d);
4262     emit_byte(0xd9);
4263     emit_byte(0xff); /* take cos */
4264     }
4265     }
4266     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4267    
4268     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4269     {
4270     int ds;
4271    
4272     if (d!=s) {
4273     usereg(s);
4274     ds=stackpos(s);
4275     emit_byte(0xd9);
4276     emit_byte(0xc0+ds); /* duplicate source */
4277     emit_byte(0xd9);
4278     emit_byte(0xfe); /* take sin */
4279     tos_make(d); /* store to destination */
4280     }
4281     else {
4282     make_tos(d);
4283     emit_byte(0xd9);
4284     emit_byte(0xfe); /* take sin */
4285     }
4286     }
4287     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4288    
4289     double one=1;
4290     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4291     {
4292     int ds;
4293    
4294     usereg(s);
4295     ds=stackpos(s);
4296     emit_byte(0xd9);
4297     emit_byte(0xc0+ds); /* duplicate source */
4298    
4299     emit_byte(0xd9);
4300     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4301     emit_byte(0xd9);
4302     emit_byte(0xfc); /* rndint */
4303     emit_byte(0xd9);
4304     emit_byte(0xc9); /* swap top two elements */
4305     emit_byte(0xd8);
4306     emit_byte(0xe1); /* subtract rounded from original */
4307     emit_byte(0xd9);
4308     emit_byte(0xf0); /* f2xm1 */
4309     emit_byte(0xdc);
4310     emit_byte(0x05);
4311 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4312 gbeauche 1.1 emit_byte(0xd9);
4313     emit_byte(0xfd); /* and scale it */
4314     emit_byte(0xdd);
4315     emit_byte(0xd9); /* take he rounded value off */
4316     tos_make(d); /* store to destination */
4317     }
4318     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4319    
4320     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4321     {
4322     int ds;
4323    
4324     usereg(s);
4325     ds=stackpos(s);
4326     emit_byte(0xd9);
4327     emit_byte(0xc0+ds); /* duplicate source */
4328     emit_byte(0xd9);
4329     emit_byte(0xea); /* fldl2e */
4330     emit_byte(0xde);
4331     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4332    
4333     emit_byte(0xd9);
4334     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4335     emit_byte(0xd9);
4336     emit_byte(0xfc); /* rndint */
4337     emit_byte(0xd9);
4338     emit_byte(0xc9); /* swap top two elements */
4339     emit_byte(0xd8);
4340     emit_byte(0xe1); /* subtract rounded from original */
4341     emit_byte(0xd9);
4342     emit_byte(0xf0); /* f2xm1 */
4343     emit_byte(0xdc);
4344     emit_byte(0x05);
4345 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4346 gbeauche 1.1 emit_byte(0xd9);
4347     emit_byte(0xfd); /* and scale it */
4348     emit_byte(0xdd);
4349     emit_byte(0xd9); /* take he rounded value off */
4350     tos_make(d); /* store to destination */
4351     }
4352     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4353    
4354     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4355     {
4356     int ds;
4357    
4358     usereg(s);
4359     ds=stackpos(s);
4360     emit_byte(0xd9);
4361     emit_byte(0xc0+ds); /* duplicate source */
4362     emit_byte(0xd9);
4363     emit_byte(0xe8); /* push '1' */
4364     emit_byte(0xd9);
4365     emit_byte(0xc9); /* swap top two */
4366     emit_byte(0xd9);
4367     emit_byte(0xf1); /* take 1*log2(x) */
4368     tos_make(d); /* store to destination */
4369     }
4370     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4371    
4372    
4373     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4374     {
4375     int ds;
4376    
4377     if (d!=s) {
4378     usereg(s);
4379     ds=stackpos(s);
4380     emit_byte(0xd9);
4381     emit_byte(0xc0+ds); /* duplicate source */
4382     emit_byte(0xd9);
4383     emit_byte(0xe0); /* take fchs */
4384     tos_make(d); /* store to destination */
4385     }
4386     else {
4387     make_tos(d);
4388     emit_byte(0xd9);
4389     emit_byte(0xe0); /* take fchs */
4390     }
4391     }
4392     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4393    
4394     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4395     {
4396     int ds;
4397    
4398     usereg(s);
4399     usereg(d);
4400    
4401     if (live.spos[s]==live.tos) {
4402     /* Source is on top of stack */
4403     ds=stackpos(d);
4404     emit_byte(0xdc);
4405     emit_byte(0xc0+ds); /* add source to dest*/
4406     }
4407     else {
4408     make_tos(d);
4409     ds=stackpos(s);
4410    
4411     emit_byte(0xd8);
4412     emit_byte(0xc0+ds); /* add source to dest*/
4413     }
4414     }
4415     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4416    
4417     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4418     {
4419     int ds;
4420    
4421     usereg(s);
4422     usereg(d);
4423    
4424     if (live.spos[s]==live.tos) {
4425     /* Source is on top of stack */
4426     ds=stackpos(d);
4427     emit_byte(0xdc);
4428     emit_byte(0xe8+ds); /* sub source from dest*/
4429     }
4430     else {
4431     make_tos(d);
4432     ds=stackpos(s);
4433    
4434     emit_byte(0xd8);
4435     emit_byte(0xe0+ds); /* sub src from dest */
4436     }
4437     }
4438     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4439    
4440     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4441     {
4442     int ds;
4443    
4444     usereg(s);
4445     usereg(d);
4446    
4447     make_tos(d);
4448     ds=stackpos(s);
4449    
4450     emit_byte(0xdd);
4451     emit_byte(0xe0+ds); /* cmp dest with source*/
4452     }
4453     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4454    
4455     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4456     {
4457     int ds;
4458    
4459     usereg(s);
4460     usereg(d);
4461    
4462     if (live.spos[s]==live.tos) {
4463     /* Source is on top of stack */
4464     ds=stackpos(d);
4465     emit_byte(0xdc);
4466     emit_byte(0xc8+ds); /* mul dest by source*/
4467     }
4468     else {
4469     make_tos(d);
4470     ds=stackpos(s);
4471    
4472     emit_byte(0xd8);
4473     emit_byte(0xc8+ds); /* mul dest by source*/
4474     }
4475     }
4476     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4477    
4478     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4479     {
4480     int ds;
4481    
4482     usereg(s);
4483     usereg(d);
4484    
4485     if (live.spos[s]==live.tos) {
4486     /* Source is on top of stack */
4487     ds=stackpos(d);
4488     emit_byte(0xdc);
4489     emit_byte(0xf8+ds); /* div dest by source */
4490     }
4491     else {
4492     make_tos(d);
4493     ds=stackpos(s);
4494    
4495     emit_byte(0xd8);
4496     emit_byte(0xf0+ds); /* div dest by source*/
4497     }
4498     }
4499     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4500    
4501     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4502     {
4503     int ds;
4504    
4505     usereg(s);
4506     usereg(d);
4507    
4508     make_tos2(d,s);
4509     ds=stackpos(s);
4510    
4511     if (ds!=1) {
4512     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4513     abort();
4514     }
4515     emit_byte(0xd9);
4516     emit_byte(0xf8); /* take rem from dest by source */
4517     }
4518     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4519    
4520     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4521     {
4522     int ds;
4523    
4524     usereg(s);
4525     usereg(d);
4526    
4527     make_tos2(d,s);
4528     ds=stackpos(s);
4529    
4530     if (ds!=1) {
4531     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4532     abort();
4533     }
4534     emit_byte(0xd9);
4535     emit_byte(0xf5); /* take rem1 from dest by source */
4536     }
4537     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4538    
4539    
4540     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4541     {
4542     make_tos(r);
4543     emit_byte(0xd9); /* ftst */
4544     emit_byte(0xe4);
4545     }
4546     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4547    
4548     /* %eax register is clobbered if target processor doesn't support fucomi */
4549     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4550     #define FFLAG_NREG EAX_INDEX
4551    
4552     static __inline__ void raw_fflags_into_flags(int r)
4553     {
4554     int p;
4555    
4556     usereg(r);
4557     p=stackpos(r);
4558    
4559     emit_byte(0xd9);
4560     emit_byte(0xee); /* Push 0 */
4561     emit_byte(0xd9);
4562     emit_byte(0xc9+p); /* swap top two around */
4563     if (have_cmov) {
4564     // gb-- fucomi is for P6 cores only, not K6-2 then...
4565     emit_byte(0xdb);
4566     emit_byte(0xe9+p); /* fucomi them */
4567     }
4568     else {
4569     emit_byte(0xdd);
4570     emit_byte(0xe1+p); /* fucom them */
4571     emit_byte(0x9b);
4572     emit_byte(0xdf);
4573     emit_byte(0xe0); /* fstsw ax */
4574     raw_sahf(0); /* sahf */
4575     }
4576     emit_byte(0xdd);
4577     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4578     }