ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.24
Committed: 2004-11-08T21:10:46Z (19 years, 10 months ago) by gbeauche
Branch: MAIN
Changes since 1.23: +57 -16 lines
Log Message:
Merge BSF simulation on P4 from Amithlon. Use 33-bit memory addressing model.

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 cebix 1.19 * Adaptation for Basilisk II and improvements, copyright 2000-2004
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 cebix 1.19 * Basilisk II (C) 1997-2004 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.1
56     /* The register in which subroutines return an integer return value */
57 gbeauche 1.20 #define REG_RESULT EAX_INDEX
58 gbeauche 1.1
59     /* The registers subroutines take their first and second argument in */
60     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61     /* Handle the _fastcall parameters of ECX and EDX */
62 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
63     #define REG_PAR2 EDX_INDEX
64     #elif defined(__x86_64__)
65     #define REG_PAR1 EDI_INDEX
66     #define REG_PAR2 ESI_INDEX
67 gbeauche 1.1 #else
68 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
69     #define REG_PAR2 EDX_INDEX
70 gbeauche 1.1 #endif
71    
72 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
75 gbeauche 1.1 #else
76 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 gbeauche 1.1 #endif
78    
79 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 gbeauche 1.1 -1 if any reg will do */
81 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83 gbeauche 1.1
84     uae_s8 always_used[]={4,-1};
85 gbeauche 1.20 #if defined(__x86_64__)
86     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88     #else
89 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
90     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 gbeauche 1.20 #endif
92 gbeauche 1.1
93 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
94     /* Make sure interpretive core does not use cpuopti */
95     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 gbeauche 1.20 #error FIXME: code not ready
97 gbeauche 1.17 #else
98 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
99     by the caller */
100 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 gbeauche 1.17 #endif
102 gbeauche 1.1
103     /* This *should* be the same as call_saved. But:
104     - We might not really know which registers are saved, and which aren't,
105     so we need to preserve some, but don't want to rely on everyone else
106     also saving those registers
107     - Special registers (such like the stack pointer) should not be "preserved"
108     by pushing, even though they are "saved" across function calls
109     */
110 gbeauche 1.21 #if defined(__x86_64__)
111     /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
112 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
113     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
114 gbeauche 1.21 #else
115     static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
116     #endif
117 gbeauche 1.1
118     /* Whether classes of instructions do or don't clobber the native flags */
119     #define CLOBBER_MOV
120     #define CLOBBER_LEA
121     #define CLOBBER_CMOV
122     #define CLOBBER_POP
123     #define CLOBBER_PUSH
124     #define CLOBBER_SUB clobber_flags()
125     #define CLOBBER_SBB clobber_flags()
126     #define CLOBBER_CMP clobber_flags()
127     #define CLOBBER_ADD clobber_flags()
128     #define CLOBBER_ADC clobber_flags()
129     #define CLOBBER_AND clobber_flags()
130     #define CLOBBER_OR clobber_flags()
131     #define CLOBBER_XOR clobber_flags()
132    
133     #define CLOBBER_ROL clobber_flags()
134     #define CLOBBER_ROR clobber_flags()
135     #define CLOBBER_SHLL clobber_flags()
136     #define CLOBBER_SHRL clobber_flags()
137     #define CLOBBER_SHRA clobber_flags()
138     #define CLOBBER_TEST clobber_flags()
139     #define CLOBBER_CL16
140     #define CLOBBER_CL8
141 gbeauche 1.20 #define CLOBBER_SE32
142 gbeauche 1.1 #define CLOBBER_SE16
143     #define CLOBBER_SE8
144 gbeauche 1.20 #define CLOBBER_ZE32
145 gbeauche 1.1 #define CLOBBER_ZE16
146     #define CLOBBER_ZE8
147     #define CLOBBER_SW16 clobber_flags()
148     #define CLOBBER_SW32
149     #define CLOBBER_SETCC
150     #define CLOBBER_MUL clobber_flags()
151     #define CLOBBER_BT clobber_flags()
152     #define CLOBBER_BSF clobber_flags()
153    
154 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
155 gbeauche 1.20 #if defined(__x86_64__)
156     #define USE_NEW_RTASM 1
157     #endif
158    
159     #if USE_NEW_RTASM
160 gbeauche 1.13
161     #if defined(__x86_64__)
162     #define X86_TARGET_64BIT 1
163     #endif
164     #define X86_FLAT_REGISTERS 0
165 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
166     #define X86_OPTIMIZE_ROTSHI 1
167 gbeauche 1.13 #include "codegen_x86.h"
168    
169     #define x86_emit_byte(B) emit_byte(B)
170     #define x86_emit_word(W) emit_word(W)
171     #define x86_emit_long(L) emit_long(L)
172 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
173 gbeauche 1.13 #define x86_get_target() get_target()
174     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
175    
176     static void jit_fail(const char *msg, const char *file, int line, const char *function)
177     {
178     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
179     function, file, line, msg);
180     abort();
181     }
182    
183     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
184     {
185 gbeauche 1.20 #if defined(__x86_64__)
186     PUSHQr(r);
187     #else
188 gbeauche 1.13 PUSHLr(r);
189 gbeauche 1.20 #endif
190 gbeauche 1.13 }
191     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
192    
193     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
194     {
195 gbeauche 1.20 #if defined(__x86_64__)
196     POPQr(r);
197     #else
198 gbeauche 1.13 POPLr(r);
199 gbeauche 1.20 #endif
200 gbeauche 1.13 }
201     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
202    
203 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
204     {
205     #if defined(__x86_64__)
206     POPQm(d, X86_NOREG, X86_NOREG, 1);
207     #else
208     POPLm(d, X86_NOREG, X86_NOREG, 1);
209     #endif
210     }
211     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
212    
213 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
214     {
215     BTLir(i, r);
216     }
217     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
218    
219     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
220     {
221     BTLrr(b, r);
222     }
223     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
224    
225     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
226     {
227     BTCLir(i, r);
228     }
229     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
230    
231     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
232     {
233     BTCLrr(b, r);
234     }
235     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
236    
237     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
238     {
239     BTRLir(i, r);
240     }
241     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
242    
243     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
244     {
245     BTRLrr(b, r);
246     }
247     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
248    
249     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
250     {
251     BTSLir(i, r);
252     }
253     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
254    
255     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
256     {
257     BTSLrr(b, r);
258     }
259     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
260    
261     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
262     {
263     SUBWir(i, d);
264     }
265     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
266    
267     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
268     {
269     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
270     }
271     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
272    
273     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
274     {
275     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
276     }
277     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
278    
279     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
280     {
281     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
282     }
283     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
284    
285     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
286     {
287     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
288     }
289     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
290    
291     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
292     {
293     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
294     }
295     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
296    
297     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
298     {
299     ROLBir(i, r);
300     }
301     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
302    
303     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
304     {
305     ROLWir(i, r);
306     }
307     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
308    
309     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
310     {
311     ROLLir(i, r);
312     }
313     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
314    
315     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
316     {
317     ROLLrr(r, d);
318     }
319     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
320    
321     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
322     {
323     ROLWrr(r, d);
324     }
325     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
326    
327     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
328     {
329     ROLBrr(r, d);
330     }
331     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
332    
333     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
334     {
335     SHLLrr(r, d);
336     }
337     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
338    
339     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
340     {
341     SHLWrr(r, d);
342     }
343     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
344    
345     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
346     {
347     SHLBrr(r, d);
348     }
349     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
350    
351     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
352     {
353     RORBir(i, r);
354     }
355     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
356    
357     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
358     {
359     RORWir(i, r);
360     }
361     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
362    
363     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
364     {
365     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
366     }
367     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
368    
369     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
370     {
371     RORLir(i, r);
372     }
373     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
374    
375     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
376     {
377     RORLrr(r, d);
378     }
379     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
380    
381     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
382     {
383     RORWrr(r, d);
384     }
385     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
386    
387     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
388     {
389     RORBrr(r, d);
390     }
391     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
392    
393     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
394     {
395     SHRLrr(r, d);
396     }
397     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
398    
399     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
400     {
401     SHRWrr(r, d);
402     }
403     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
404    
405     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
406     {
407     SHRBrr(r, d);
408     }
409     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
410    
411     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
412     {
413 gbeauche 1.14 SARLrr(r, d);
414 gbeauche 1.13 }
415     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
416    
417     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
418     {
419 gbeauche 1.14 SARWrr(r, d);
420 gbeauche 1.13 }
421     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
422    
423     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
424     {
425 gbeauche 1.14 SARBrr(r, d);
426 gbeauche 1.13 }
427     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
428    
429     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
430     {
431     SHLLir(i, r);
432     }
433     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
434    
435     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
436     {
437     SHLWir(i, r);
438     }
439     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
440    
441     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
442     {
443     SHLBir(i, r);
444     }
445     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
446    
447     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
448     {
449     SHRLir(i, r);
450     }
451     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
452    
453     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
454     {
455     SHRWir(i, r);
456     }
457     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
458    
459     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
460     {
461     SHRBir(i, r);
462     }
463     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
464    
465     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
466     {
467 gbeauche 1.14 SARLir(i, r);
468 gbeauche 1.13 }
469     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
470    
471     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
472     {
473 gbeauche 1.14 SARWir(i, r);
474 gbeauche 1.13 }
475     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
476    
477     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
478     {
479 gbeauche 1.14 SARBir(i, r);
480 gbeauche 1.13 }
481     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
482    
483     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
484     {
485     SAHF();
486     }
487     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
488    
489     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
490     {
491     CPUID();
492     }
493     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
494    
495     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
496     {
497     LAHF();
498     }
499     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
500    
501     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
502     {
503     SETCCir(cc, d);
504     }
505     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
506    
507     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
508     {
509     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
510     }
511     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
512    
513     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
514     {
515 gbeauche 1.15 if (have_cmov)
516     CMOVLrr(cc, s, d);
517     else { /* replacement using branch and mov */
518     #if defined(__x86_64__)
519     write_log("x86-64 implementations are bound to have CMOV!\n");
520     abort();
521     #endif
522     JCCSii(cc^1, 2);
523     MOVLrr(s, d);
524     }
525 gbeauche 1.13 }
526     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
527    
528     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
529     {
530     BSFLrr(s, d);
531     }
532     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
533    
534 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
535     {
536     MOVSLQrr(s, d);
537     }
538     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
539    
540 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
541     {
542     MOVSWLrr(s, d);
543     }
544     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
545    
546     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
547     {
548     MOVSBLrr(s, d);
549     }
550     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
551    
552     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
553     {
554     MOVZWLrr(s, d);
555     }
556     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
557    
558     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
559     {
560     MOVZBLrr(s, d);
561     }
562     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
563    
564     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
565     {
566 gbeauche 1.14 IMULLrr(s, d);
567 gbeauche 1.13 }
568     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
569    
570     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
571     {
572 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
573     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
574 gbeauche 1.13 abort();
575 gbeauche 1.14 }
576     IMULLr(s);
577 gbeauche 1.13 }
578     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
579    
580     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
581     {
582 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
583     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
584 gbeauche 1.13 abort();
585 gbeauche 1.14 }
586     MULLr(s);
587 gbeauche 1.13 }
588     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
589    
590     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
591     {
592 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
593 gbeauche 1.13 }
594     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
595    
596     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
597     {
598     MOVBrr(s, d);
599     }
600     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
601    
602     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
603     {
604     MOVWrr(s, d);
605     }
606     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
607    
608     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
609     {
610     MOVLmr(0, baser, index, factor, d);
611     }
612     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
613    
614     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
615     {
616     MOVWmr(0, baser, index, factor, d);
617     }
618     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
619    
620     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
621     {
622     MOVBmr(0, baser, index, factor, d);
623     }
624     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
625    
626     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
627     {
628     MOVLrm(s, 0, baser, index, factor);
629     }
630     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
631    
632     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
633     {
634     MOVWrm(s, 0, baser, index, factor);
635     }
636     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
637    
638     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
639     {
640     MOVBrm(s, 0, baser, index, factor);
641     }
642     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
643    
644     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
645     {
646     MOVLrm(s, base, baser, index, factor);
647     }
648     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
649    
650     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
651     {
652     MOVWrm(s, base, baser, index, factor);
653     }
654     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
655    
656     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
657     {
658     MOVBrm(s, base, baser, index, factor);
659     }
660     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
661    
662     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
663     {
664     MOVLmr(base, baser, index, factor, d);
665     }
666     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
667    
668     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
669     {
670     MOVWmr(base, baser, index, factor, d);
671     }
672     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
673    
674     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
675     {
676     MOVBmr(base, baser, index, factor, d);
677     }
678     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
679    
680     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
681     {
682     MOVLmr(base, X86_NOREG, index, factor, d);
683     }
684     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
685    
686     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
687     {
688 gbeauche 1.15 if (have_cmov)
689     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
690     else { /* replacement using branch and mov */
691     #if defined(__x86_64__)
692     write_log("x86-64 implementations are bound to have CMOV!\n");
693     abort();
694     #endif
695     JCCSii(cond^1, 7);
696     MOVLmr(base, X86_NOREG, index, factor, d);
697     }
698 gbeauche 1.13 }
699     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
700    
701     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
702     {
703 gbeauche 1.15 if (have_cmov)
704     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
705     else { /* replacement using branch and mov */
706     #if defined(__x86_64__)
707     write_log("x86-64 implementations are bound to have CMOV!\n");
708     abort();
709     #endif
710     JCCSii(cond^1, 6);
711     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
712     }
713 gbeauche 1.13 }
714     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
715    
716     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
717     {
718     MOVLmr(offset, s, X86_NOREG, 1, d);
719     }
720     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
721    
722     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
723     {
724     MOVWmr(offset, s, X86_NOREG, 1, d);
725     }
726     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
727    
728     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
729     {
730     MOVBmr(offset, s, X86_NOREG, 1, d);
731     }
732     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
733    
734     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
735     {
736     MOVLmr(offset, s, X86_NOREG, 1, d);
737     }
738     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
739    
740     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
741     {
742     MOVWmr(offset, s, X86_NOREG, 1, d);
743     }
744     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
745    
746     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
747     {
748     MOVBmr(offset, s, X86_NOREG, 1, d);
749     }
750     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
751    
752     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
753     {
754     MOVLim(i, offset, d, X86_NOREG, 1);
755     }
756     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
757    
758     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
759     {
760     MOVWim(i, offset, d, X86_NOREG, 1);
761     }
762     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
763    
764     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
765     {
766     MOVBim(i, offset, d, X86_NOREG, 1);
767     }
768     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
769    
770     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
771     {
772     MOVLrm(s, offset, d, X86_NOREG, 1);
773     }
774     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
775    
776     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
777     {
778     MOVWrm(s, offset, d, X86_NOREG, 1);
779     }
780     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
781    
782     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
783     {
784     MOVBrm(s, offset, d, X86_NOREG, 1);
785     }
786     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
787    
788     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
789     {
790     LEALmr(offset, s, X86_NOREG, 1, d);
791     }
792     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
793    
794     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
795     {
796     LEALmr(offset, s, index, factor, d);
797     }
798     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
799    
800     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
801     {
802     LEALmr(0, s, index, factor, d);
803     }
804     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
805    
806     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
807     {
808     MOVLrm(s, offset, d, X86_NOREG, 1);
809     }
810     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
811    
812     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
813     {
814     MOVWrm(s, offset, d, X86_NOREG, 1);
815     }
816     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
817    
818     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
819     {
820     MOVBrm(s, offset, d, X86_NOREG, 1);
821     }
822     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
823    
824     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
825     {
826     BSWAPLr(r);
827     }
828     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
829    
830     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
831     {
832     ROLWir(8, r);
833     }
834     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
835    
836     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
837     {
838     MOVLrr(s, d);
839     }
840     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
841    
842     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
843     {
844     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
845     }
846     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
847    
848     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
849     {
850     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
851     }
852     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
853    
854     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
855     {
856     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
857     }
858     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
859    
860     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
861     {
862     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
863     }
864     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
865    
866     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
867     {
868     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
869     }
870     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
871    
872     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
873     {
874     MOVLir(s, d);
875     }
876     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
877    
878     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
879     {
880     MOVWir(s, d);
881     }
882     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
883    
884     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
885     {
886     MOVBir(s, d);
887     }
888     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
889    
890     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
891     {
892     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
893     }
894     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
895    
896     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
897     {
898     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
899     }
900     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
901    
902     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
903     {
904     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
905     }
906     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
907    
908     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
909     {
910     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
911     }
912     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
913    
914     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
915     {
916     TESTLir(i, d);
917     }
918     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
919    
920     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
921     {
922     TESTLrr(s, d);
923     }
924     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
925    
926     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
927     {
928     TESTWrr(s, d);
929     }
930     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
931    
932     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
933     {
934     TESTBrr(s, d);
935     }
936     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
937    
938 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
939     {
940     XORLir(i, d);
941     }
942     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
943    
944 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
945     {
946     ANDLir(i, d);
947     }
948     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
949    
950     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
951     {
952     ANDWir(i, d);
953     }
954     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
955    
956     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
957     {
958     ANDLrr(s, d);
959     }
960     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
961    
962     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
963     {
964     ANDWrr(s, d);
965     }
966     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
967    
968     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
969     {
970     ANDBrr(s, d);
971     }
972     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
973    
974     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
975     {
976     ORLir(i, d);
977     }
978     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
979    
980     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
981     {
982     ORLrr(s, d);
983     }
984     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
985    
986     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
987     {
988     ORWrr(s, d);
989     }
990     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
991    
992     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
993     {
994     ORBrr(s, d);
995     }
996     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
997    
998     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
999     {
1000     ADCLrr(s, d);
1001     }
1002     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1003    
1004     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1005     {
1006     ADCWrr(s, d);
1007     }
1008     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1009    
1010     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1011     {
1012     ADCBrr(s, d);
1013     }
1014     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1015    
1016     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1017     {
1018     ADDLrr(s, d);
1019     }
1020     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1021    
1022     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1023     {
1024     ADDWrr(s, d);
1025     }
1026     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1027    
1028     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1029     {
1030     ADDBrr(s, d);
1031     }
1032     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1033    
1034     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1035     {
1036     SUBLir(i, d);
1037     }
1038     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1039    
1040     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1041     {
1042     SUBBir(i, d);
1043     }
1044     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1045    
1046     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1047     {
1048     ADDLir(i, d);
1049     }
1050     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1051    
1052     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1053     {
1054     ADDWir(i, d);
1055     }
1056     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1057    
1058     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1059     {
1060     ADDBir(i, d);
1061     }
1062     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1063    
1064     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1065     {
1066     SBBLrr(s, d);
1067     }
1068     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1069    
1070     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1071     {
1072     SBBWrr(s, d);
1073     }
1074     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1075    
1076     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1077     {
1078     SBBBrr(s, d);
1079     }
1080     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1081    
1082     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1083     {
1084     SUBLrr(s, d);
1085     }
1086     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1087    
1088     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1089     {
1090     SUBWrr(s, d);
1091     }
1092     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1093    
1094     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1095     {
1096     SUBBrr(s, d);
1097     }
1098     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1099    
1100     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1101     {
1102     CMPLrr(s, d);
1103     }
1104     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1105    
1106     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1107     {
1108     CMPLir(i, r);
1109     }
1110     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1111    
1112     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1113     {
1114     CMPWrr(s, d);
1115     }
1116     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1117    
1118     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1119     {
1120     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1121     }
1122     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1123    
1124     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1125     {
1126     CMPBir(i, d);
1127     }
1128     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1129    
1130     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1131     {
1132     CMPBrr(s, d);
1133     }
1134     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1135    
1136     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1137     {
1138     CMPLmr(offset, X86_NOREG, index, factor, d);
1139     }
1140     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1141    
1142     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1143     {
1144     XORLrr(s, d);
1145     }
1146     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1147    
1148     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1149     {
1150     XORWrr(s, d);
1151     }
1152     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1153    
1154     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1155     {
1156     XORBrr(s, d);
1157     }
1158     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1159    
1160     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1161     {
1162     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1163     }
1164     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1165    
1166     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1167     {
1168     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1169     }
1170     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1171    
1172     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1173     {
1174     XCHGLrr(r2, r1);
1175     }
1176     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1177    
1178     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1179     {
1180 gbeauche 1.18 PUSHF();
1181 gbeauche 1.13 }
1182     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1183    
1184     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1185     {
1186 gbeauche 1.18 POPF();
1187 gbeauche 1.13 }
1188     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1189    
1190     #else
1191    
1192 gbeauche 1.2 const bool optimize_accum = true;
1193 gbeauche 1.1 const bool optimize_imm8 = true;
1194     const bool optimize_shift_once = true;
1195    
1196     /*************************************************************************
1197     * Actual encoding of the instructions on the target CPU *
1198     *************************************************************************/
1199    
1200 gbeauche 1.2 static __inline__ int isaccum(int r)
1201     {
1202     return (r == EAX_INDEX);
1203     }
1204    
1205 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1206     {
1207     return (x>=-128 && x<=127);
1208     }
1209    
1210     static __inline__ int isword(uae_s32 x)
1211     {
1212     return (x>=-32768 && x<=32767);
1213     }
1214    
1215     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1216     {
1217     emit_byte(0x50+r);
1218     }
1219     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1220    
1221     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1222     {
1223     emit_byte(0x58+r);
1224     }
1225     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1226    
1227 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1228     {
1229     emit_byte(0x8f);
1230     emit_byte(0x05);
1231     emit_long(d);
1232     }
1233     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1234    
1235 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1236     {
1237     emit_byte(0x0f);
1238     emit_byte(0xba);
1239     emit_byte(0xe0+r);
1240     emit_byte(i);
1241     }
1242     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1243    
1244     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1245     {
1246     emit_byte(0x0f);
1247     emit_byte(0xa3);
1248     emit_byte(0xc0+8*b+r);
1249     }
1250     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1251    
1252     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1253     {
1254     emit_byte(0x0f);
1255     emit_byte(0xba);
1256     emit_byte(0xf8+r);
1257     emit_byte(i);
1258     }
1259     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1260    
1261     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1262     {
1263     emit_byte(0x0f);
1264     emit_byte(0xbb);
1265     emit_byte(0xc0+8*b+r);
1266     }
1267     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1268    
1269    
1270     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1271     {
1272     emit_byte(0x0f);
1273     emit_byte(0xba);
1274     emit_byte(0xf0+r);
1275     emit_byte(i);
1276     }
1277     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1278    
1279     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1280     {
1281     emit_byte(0x0f);
1282     emit_byte(0xb3);
1283     emit_byte(0xc0+8*b+r);
1284     }
1285     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1286    
1287     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1288     {
1289     emit_byte(0x0f);
1290     emit_byte(0xba);
1291     emit_byte(0xe8+r);
1292     emit_byte(i);
1293     }
1294     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1295    
1296     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1297     {
1298     emit_byte(0x0f);
1299     emit_byte(0xab);
1300     emit_byte(0xc0+8*b+r);
1301     }
1302     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1303    
1304     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1305     {
1306     emit_byte(0x66);
1307     if (isbyte(i)) {
1308     emit_byte(0x83);
1309     emit_byte(0xe8+d);
1310     emit_byte(i);
1311     }
1312     else {
1313 gbeauche 1.2 if (optimize_accum && isaccum(d))
1314     emit_byte(0x2d);
1315     else {
1316 gbeauche 1.1 emit_byte(0x81);
1317     emit_byte(0xe8+d);
1318 gbeauche 1.2 }
1319 gbeauche 1.1 emit_word(i);
1320     }
1321     }
1322     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1323    
1324    
1325     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1326     {
1327     emit_byte(0x8b);
1328     emit_byte(0x05+8*d);
1329     emit_long(s);
1330     }
1331     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1332    
1333     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1334     {
1335     emit_byte(0xc7);
1336     emit_byte(0x05);
1337     emit_long(d);
1338     emit_long(s);
1339     }
1340     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1341    
1342     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1343     {
1344     emit_byte(0x66);
1345     emit_byte(0xc7);
1346     emit_byte(0x05);
1347     emit_long(d);
1348     emit_word(s);
1349     }
1350     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1351    
1352     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1353     {
1354     emit_byte(0xc6);
1355     emit_byte(0x05);
1356     emit_long(d);
1357     emit_byte(s);
1358     }
1359     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1360    
1361     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1362     {
1363     if (optimize_shift_once && (i == 1)) {
1364     emit_byte(0xd0);
1365     emit_byte(0x05);
1366     emit_long(d);
1367     }
1368     else {
1369     emit_byte(0xc0);
1370     emit_byte(0x05);
1371     emit_long(d);
1372     emit_byte(i);
1373     }
1374     }
1375     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1376    
1377     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1378     {
1379     if (optimize_shift_once && (i == 1)) {
1380     emit_byte(0xd0);
1381     emit_byte(0xc0+r);
1382     }
1383     else {
1384     emit_byte(0xc0);
1385     emit_byte(0xc0+r);
1386     emit_byte(i);
1387     }
1388     }
1389     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1390    
1391     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1392     {
1393     emit_byte(0x66);
1394     emit_byte(0xc1);
1395     emit_byte(0xc0+r);
1396     emit_byte(i);
1397     }
1398     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1399    
1400     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1401     {
1402     if (optimize_shift_once && (i == 1)) {
1403     emit_byte(0xd1);
1404     emit_byte(0xc0+r);
1405     }
1406     else {
1407     emit_byte(0xc1);
1408     emit_byte(0xc0+r);
1409     emit_byte(i);
1410     }
1411     }
1412     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1413    
1414     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1415     {
1416     emit_byte(0xd3);
1417     emit_byte(0xc0+d);
1418     }
1419     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1420    
1421     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1422     {
1423     emit_byte(0x66);
1424     emit_byte(0xd3);
1425     emit_byte(0xc0+d);
1426     }
1427     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1428    
1429     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1430     {
1431     emit_byte(0xd2);
1432     emit_byte(0xc0+d);
1433     }
1434     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1435    
1436     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1437     {
1438     emit_byte(0xd3);
1439     emit_byte(0xe0+d);
1440     }
1441     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1442    
1443     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1444     {
1445     emit_byte(0x66);
1446     emit_byte(0xd3);
1447     emit_byte(0xe0+d);
1448     }
1449     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1450    
1451     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1452     {
1453     emit_byte(0xd2);
1454     emit_byte(0xe0+d);
1455     }
1456     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1457    
1458     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1459     {
1460     if (optimize_shift_once && (i == 1)) {
1461     emit_byte(0xd0);
1462     emit_byte(0xc8+r);
1463     }
1464     else {
1465     emit_byte(0xc0);
1466     emit_byte(0xc8+r);
1467     emit_byte(i);
1468     }
1469     }
1470     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1471    
1472     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1473     {
1474     emit_byte(0x66);
1475     emit_byte(0xc1);
1476     emit_byte(0xc8+r);
1477     emit_byte(i);
1478     }
1479     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1480    
1481     // gb-- used for making an fpcr value in compemu_fpp.cpp
1482     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1483     {
1484     emit_byte(0x0b);
1485     emit_byte(0x05+8*d);
1486     emit_long(s);
1487     }
1488     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1489    
1490     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1491     {
1492     if (optimize_shift_once && (i == 1)) {
1493     emit_byte(0xd1);
1494     emit_byte(0xc8+r);
1495     }
1496     else {
1497     emit_byte(0xc1);
1498     emit_byte(0xc8+r);
1499     emit_byte(i);
1500     }
1501     }
1502     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1503    
1504     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1505     {
1506     emit_byte(0xd3);
1507     emit_byte(0xc8+d);
1508     }
1509     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1510    
1511     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1512     {
1513     emit_byte(0x66);
1514     emit_byte(0xd3);
1515     emit_byte(0xc8+d);
1516     }
1517     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1518    
1519     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1520     {
1521     emit_byte(0xd2);
1522     emit_byte(0xc8+d);
1523     }
1524     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1525    
1526     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1527     {
1528     emit_byte(0xd3);
1529     emit_byte(0xe8+d);
1530     }
1531     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1532    
1533     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1534     {
1535     emit_byte(0x66);
1536     emit_byte(0xd3);
1537     emit_byte(0xe8+d);
1538     }
1539     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1540    
1541     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1542     {
1543     emit_byte(0xd2);
1544     emit_byte(0xe8+d);
1545     }
1546     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1547    
1548     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1549     {
1550     emit_byte(0xd3);
1551     emit_byte(0xf8+d);
1552     }
1553     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1554    
1555     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1556     {
1557     emit_byte(0x66);
1558     emit_byte(0xd3);
1559     emit_byte(0xf8+d);
1560     }
1561     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1562    
1563     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1564     {
1565     emit_byte(0xd2);
1566     emit_byte(0xf8+d);
1567     }
1568     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1569    
1570     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1571     {
1572     if (optimize_shift_once && (i == 1)) {
1573     emit_byte(0xd1);
1574     emit_byte(0xe0+r);
1575     }
1576     else {
1577     emit_byte(0xc1);
1578     emit_byte(0xe0+r);
1579     emit_byte(i);
1580     }
1581     }
1582     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1583    
1584     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1585     {
1586     emit_byte(0x66);
1587     emit_byte(0xc1);
1588     emit_byte(0xe0+r);
1589     emit_byte(i);
1590     }
1591     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1592    
1593     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1594     {
1595     if (optimize_shift_once && (i == 1)) {
1596     emit_byte(0xd0);
1597     emit_byte(0xe0+r);
1598     }
1599     else {
1600     emit_byte(0xc0);
1601     emit_byte(0xe0+r);
1602     emit_byte(i);
1603     }
1604     }
1605     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1606    
1607     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1608     {
1609     if (optimize_shift_once && (i == 1)) {
1610     emit_byte(0xd1);
1611     emit_byte(0xe8+r);
1612     }
1613     else {
1614     emit_byte(0xc1);
1615     emit_byte(0xe8+r);
1616     emit_byte(i);
1617     }
1618     }
1619     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1620    
1621     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1622     {
1623     emit_byte(0x66);
1624     emit_byte(0xc1);
1625     emit_byte(0xe8+r);
1626     emit_byte(i);
1627     }
1628     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1629    
1630     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1631     {
1632     if (optimize_shift_once && (i == 1)) {
1633     emit_byte(0xd0);
1634     emit_byte(0xe8+r);
1635     }
1636     else {
1637     emit_byte(0xc0);
1638     emit_byte(0xe8+r);
1639     emit_byte(i);
1640     }
1641     }
1642     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1643    
1644     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1645     {
1646     if (optimize_shift_once && (i == 1)) {
1647     emit_byte(0xd1);
1648     emit_byte(0xf8+r);
1649     }
1650     else {
1651     emit_byte(0xc1);
1652     emit_byte(0xf8+r);
1653     emit_byte(i);
1654     }
1655     }
1656     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1657    
1658     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1659     {
1660     emit_byte(0x66);
1661     emit_byte(0xc1);
1662     emit_byte(0xf8+r);
1663     emit_byte(i);
1664     }
1665     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1666    
1667     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1668     {
1669     if (optimize_shift_once && (i == 1)) {
1670     emit_byte(0xd0);
1671     emit_byte(0xf8+r);
1672     }
1673     else {
1674     emit_byte(0xc0);
1675     emit_byte(0xf8+r);
1676     emit_byte(i);
1677     }
1678     }
1679     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1680    
1681     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1682     {
1683     emit_byte(0x9e);
1684     }
1685     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1686    
1687     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1688     {
1689     emit_byte(0x0f);
1690     emit_byte(0xa2);
1691     }
1692     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1693    
1694     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1695     {
1696     emit_byte(0x9f);
1697     }
1698     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1699    
1700     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1701     {
1702     emit_byte(0x0f);
1703     emit_byte(0x90+cc);
1704     emit_byte(0xc0+d);
1705     }
1706     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1707    
1708     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1709     {
1710     emit_byte(0x0f);
1711     emit_byte(0x90+cc);
1712     emit_byte(0x05);
1713     emit_long(d);
1714     }
1715     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1716    
1717     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1718     {
1719     if (have_cmov) {
1720     emit_byte(0x0f);
1721     emit_byte(0x40+cc);
1722     emit_byte(0xc0+8*d+s);
1723     }
1724     else { /* replacement using branch and mov */
1725     int uncc=(cc^1);
1726     emit_byte(0x70+uncc);
1727     emit_byte(2); /* skip next 2 bytes if not cc=true */
1728     emit_byte(0x89);
1729     emit_byte(0xc0+8*s+d);
1730     }
1731     }
1732     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1733    
1734     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1735     {
1736     emit_byte(0x0f);
1737     emit_byte(0xbc);
1738     emit_byte(0xc0+8*d+s);
1739     }
1740     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1741    
1742     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1743     {
1744     emit_byte(0x0f);
1745     emit_byte(0xbf);
1746     emit_byte(0xc0+8*d+s);
1747     }
1748     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1749    
1750     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1751     {
1752     emit_byte(0x0f);
1753     emit_byte(0xbe);
1754     emit_byte(0xc0+8*d+s);
1755     }
1756     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1757    
1758     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1759     {
1760     emit_byte(0x0f);
1761     emit_byte(0xb7);
1762     emit_byte(0xc0+8*d+s);
1763     }
1764     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1765    
1766     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1767     {
1768     emit_byte(0x0f);
1769     emit_byte(0xb6);
1770     emit_byte(0xc0+8*d+s);
1771     }
1772     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1773    
1774     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1775     {
1776     emit_byte(0x0f);
1777     emit_byte(0xaf);
1778     emit_byte(0xc0+8*d+s);
1779     }
1780     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1781    
1782     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1783     {
1784     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1785     abort();
1786     emit_byte(0xf7);
1787     emit_byte(0xea);
1788     }
1789     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1790    
1791     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1792     {
1793     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1794     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1795     abort();
1796     }
1797     emit_byte(0xf7);
1798     emit_byte(0xe2);
1799     }
1800     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1801    
1802     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1803     {
1804     abort(); /* %^$&%^$%#^ x86! */
1805     emit_byte(0x0f);
1806     emit_byte(0xaf);
1807     emit_byte(0xc0+8*d+s);
1808     }
1809     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1810    
1811     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1812     {
1813     emit_byte(0x88);
1814     emit_byte(0xc0+8*s+d);
1815     }
1816     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1817    
1818     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1819     {
1820     emit_byte(0x66);
1821     emit_byte(0x89);
1822     emit_byte(0xc0+8*s+d);
1823     }
1824     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1825    
1826     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1827     {
1828     int isebp=(baser==5)?0x40:0;
1829     int fi;
1830    
1831     switch(factor) {
1832     case 1: fi=0; break;
1833     case 2: fi=1; break;
1834     case 4: fi=2; break;
1835     case 8: fi=3; break;
1836     default: abort();
1837     }
1838    
1839    
1840     emit_byte(0x8b);
1841     emit_byte(0x04+8*d+isebp);
1842     emit_byte(baser+8*index+0x40*fi);
1843     if (isebp)
1844     emit_byte(0x00);
1845     }
1846     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1847    
1848     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1849     {
1850     int fi;
1851     int isebp;
1852    
1853     switch(factor) {
1854     case 1: fi=0; break;
1855     case 2: fi=1; break;
1856     case 4: fi=2; break;
1857     case 8: fi=3; break;
1858     default: abort();
1859     }
1860     isebp=(baser==5)?0x40:0;
1861    
1862     emit_byte(0x66);
1863     emit_byte(0x8b);
1864     emit_byte(0x04+8*d+isebp);
1865     emit_byte(baser+8*index+0x40*fi);
1866     if (isebp)
1867     emit_byte(0x00);
1868     }
1869     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1870    
1871     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1872     {
1873     int fi;
1874     int isebp;
1875    
1876     switch(factor) {
1877     case 1: fi=0; break;
1878     case 2: fi=1; break;
1879     case 4: fi=2; break;
1880     case 8: fi=3; break;
1881     default: abort();
1882     }
1883     isebp=(baser==5)?0x40:0;
1884    
1885     emit_byte(0x8a);
1886     emit_byte(0x04+8*d+isebp);
1887     emit_byte(baser+8*index+0x40*fi);
1888     if (isebp)
1889     emit_byte(0x00);
1890     }
1891     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1892    
1893     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1894     {
1895     int fi;
1896     int isebp;
1897    
1898     switch(factor) {
1899     case 1: fi=0; break;
1900     case 2: fi=1; break;
1901     case 4: fi=2; break;
1902     case 8: fi=3; break;
1903     default: abort();
1904     }
1905    
1906    
1907     isebp=(baser==5)?0x40:0;
1908    
1909     emit_byte(0x89);
1910     emit_byte(0x04+8*s+isebp);
1911     emit_byte(baser+8*index+0x40*fi);
1912     if (isebp)
1913     emit_byte(0x00);
1914     }
1915     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1916    
1917     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1918     {
1919     int fi;
1920     int isebp;
1921    
1922     switch(factor) {
1923     case 1: fi=0; break;
1924     case 2: fi=1; break;
1925     case 4: fi=2; break;
1926     case 8: fi=3; break;
1927     default: abort();
1928     }
1929     isebp=(baser==5)?0x40:0;
1930    
1931     emit_byte(0x66);
1932     emit_byte(0x89);
1933     emit_byte(0x04+8*s+isebp);
1934     emit_byte(baser+8*index+0x40*fi);
1935     if (isebp)
1936     emit_byte(0x00);
1937     }
1938     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1939    
1940     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1941     {
1942     int fi;
1943     int isebp;
1944    
1945     switch(factor) {
1946     case 1: fi=0; break;
1947     case 2: fi=1; break;
1948     case 4: fi=2; break;
1949     case 8: fi=3; break;
1950     default: abort();
1951     }
1952     isebp=(baser==5)?0x40:0;
1953    
1954     emit_byte(0x88);
1955     emit_byte(0x04+8*s+isebp);
1956     emit_byte(baser+8*index+0x40*fi);
1957     if (isebp)
1958     emit_byte(0x00);
1959     }
1960     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1961    
1962     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1963     {
1964     int fi;
1965    
1966     switch(factor) {
1967     case 1: fi=0; break;
1968     case 2: fi=1; break;
1969     case 4: fi=2; break;
1970     case 8: fi=3; break;
1971     default: abort();
1972     }
1973    
1974     emit_byte(0x89);
1975     emit_byte(0x84+8*s);
1976     emit_byte(baser+8*index+0x40*fi);
1977     emit_long(base);
1978     }
1979     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1980    
1981     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1982     {
1983     int fi;
1984    
1985     switch(factor) {
1986     case 1: fi=0; break;
1987     case 2: fi=1; break;
1988     case 4: fi=2; break;
1989     case 8: fi=3; break;
1990     default: abort();
1991     }
1992    
1993     emit_byte(0x66);
1994     emit_byte(0x89);
1995     emit_byte(0x84+8*s);
1996     emit_byte(baser+8*index+0x40*fi);
1997     emit_long(base);
1998     }
1999     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2000    
2001     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2002     {
2003     int fi;
2004    
2005     switch(factor) {
2006     case 1: fi=0; break;
2007     case 2: fi=1; break;
2008     case 4: fi=2; break;
2009     case 8: fi=3; break;
2010     default: abort();
2011     }
2012    
2013     emit_byte(0x88);
2014     emit_byte(0x84+8*s);
2015     emit_byte(baser+8*index+0x40*fi);
2016     emit_long(base);
2017     }
2018     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2019    
2020     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2021     {
2022     int fi;
2023    
2024     switch(factor) {
2025     case 1: fi=0; break;
2026     case 2: fi=1; break;
2027     case 4: fi=2; break;
2028     case 8: fi=3; break;
2029     default: abort();
2030     }
2031    
2032     emit_byte(0x8b);
2033     emit_byte(0x84+8*d);
2034     emit_byte(baser+8*index+0x40*fi);
2035     emit_long(base);
2036     }
2037     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2038    
2039     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2040     {
2041     int fi;
2042    
2043     switch(factor) {
2044     case 1: fi=0; break;
2045     case 2: fi=1; break;
2046     case 4: fi=2; break;
2047     case 8: fi=3; break;
2048     default: abort();
2049     }
2050    
2051     emit_byte(0x66);
2052     emit_byte(0x8b);
2053     emit_byte(0x84+8*d);
2054     emit_byte(baser+8*index+0x40*fi);
2055     emit_long(base);
2056     }
2057     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2058    
2059     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2060     {
2061     int fi;
2062    
2063     switch(factor) {
2064     case 1: fi=0; break;
2065     case 2: fi=1; break;
2066     case 4: fi=2; break;
2067     case 8: fi=3; break;
2068     default: abort();
2069     }
2070    
2071     emit_byte(0x8a);
2072     emit_byte(0x84+8*d);
2073     emit_byte(baser+8*index+0x40*fi);
2074     emit_long(base);
2075     }
2076     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2077    
2078     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2079     {
2080     int fi;
2081     switch(factor) {
2082     case 1: fi=0; break;
2083     case 2: fi=1; break;
2084     case 4: fi=2; break;
2085     case 8: fi=3; break;
2086     default:
2087     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2088     abort();
2089     }
2090     emit_byte(0x8b);
2091     emit_byte(0x04+8*d);
2092     emit_byte(0x05+8*index+64*fi);
2093     emit_long(base);
2094     }
2095     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2096    
2097     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2098     {
2099     int fi;
2100     switch(factor) {
2101     case 1: fi=0; break;
2102     case 2: fi=1; break;
2103     case 4: fi=2; break;
2104     case 8: fi=3; break;
2105     default:
2106     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2107     abort();
2108     }
2109     if (have_cmov) {
2110     emit_byte(0x0f);
2111     emit_byte(0x40+cond);
2112     emit_byte(0x04+8*d);
2113     emit_byte(0x05+8*index+64*fi);
2114     emit_long(base);
2115     }
2116     else { /* replacement using branch and mov */
2117     int uncc=(cond^1);
2118     emit_byte(0x70+uncc);
2119     emit_byte(7); /* skip next 7 bytes if not cc=true */
2120     emit_byte(0x8b);
2121     emit_byte(0x04+8*d);
2122     emit_byte(0x05+8*index+64*fi);
2123     emit_long(base);
2124     }
2125     }
2126     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2127    
2128     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2129     {
2130     if (have_cmov) {
2131     emit_byte(0x0f);
2132     emit_byte(0x40+cond);
2133     emit_byte(0x05+8*d);
2134     emit_long(mem);
2135     }
2136     else { /* replacement using branch and mov */
2137     int uncc=(cond^1);
2138     emit_byte(0x70+uncc);
2139     emit_byte(6); /* skip next 6 bytes if not cc=true */
2140     emit_byte(0x8b);
2141     emit_byte(0x05+8*d);
2142     emit_long(mem);
2143     }
2144     }
2145     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2146    
2147     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2148     {
2149 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2150 gbeauche 1.1 emit_byte(0x8b);
2151     emit_byte(0x40+8*d+s);
2152     emit_byte(offset);
2153     }
2154     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2155    
2156     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2157     {
2158 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2159 gbeauche 1.1 emit_byte(0x66);
2160     emit_byte(0x8b);
2161     emit_byte(0x40+8*d+s);
2162     emit_byte(offset);
2163     }
2164     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2165    
2166     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2167     {
2168 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2169 gbeauche 1.1 emit_byte(0x8a);
2170     emit_byte(0x40+8*d+s);
2171     emit_byte(offset);
2172     }
2173     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2174    
2175     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2176     {
2177     emit_byte(0x8b);
2178     emit_byte(0x80+8*d+s);
2179     emit_long(offset);
2180     }
2181     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2182    
2183     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2184     {
2185     emit_byte(0x66);
2186     emit_byte(0x8b);
2187     emit_byte(0x80+8*d+s);
2188     emit_long(offset);
2189     }
2190     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2191    
2192     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2193     {
2194     emit_byte(0x8a);
2195     emit_byte(0x80+8*d+s);
2196     emit_long(offset);
2197     }
2198     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2199    
2200     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2201     {
2202 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2203 gbeauche 1.1 emit_byte(0xc7);
2204     emit_byte(0x40+d);
2205     emit_byte(offset);
2206     emit_long(i);
2207     }
2208     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2209    
2210     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2211     {
2212 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2213 gbeauche 1.1 emit_byte(0x66);
2214     emit_byte(0xc7);
2215     emit_byte(0x40+d);
2216     emit_byte(offset);
2217     emit_word(i);
2218     }
2219     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2220    
2221     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2222     {
2223 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2224 gbeauche 1.1 emit_byte(0xc6);
2225     emit_byte(0x40+d);
2226     emit_byte(offset);
2227     emit_byte(i);
2228     }
2229     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2230    
2231     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2232     {
2233 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2234 gbeauche 1.1 emit_byte(0x89);
2235     emit_byte(0x40+8*s+d);
2236     emit_byte(offset);
2237     }
2238     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2239    
2240     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2241     {
2242 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2243 gbeauche 1.1 emit_byte(0x66);
2244     emit_byte(0x89);
2245     emit_byte(0x40+8*s+d);
2246     emit_byte(offset);
2247     }
2248     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2249    
2250     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2251     {
2252 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2253 gbeauche 1.1 emit_byte(0x88);
2254     emit_byte(0x40+8*s+d);
2255     emit_byte(offset);
2256     }
2257     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2258    
2259     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2260     {
2261     if (optimize_imm8 && isbyte(offset)) {
2262     emit_byte(0x8d);
2263     emit_byte(0x40+8*d+s);
2264     emit_byte(offset);
2265     }
2266     else {
2267     emit_byte(0x8d);
2268     emit_byte(0x80+8*d+s);
2269     emit_long(offset);
2270     }
2271     }
2272     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2273    
2274     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2275     {
2276     int fi;
2277    
2278     switch(factor) {
2279     case 1: fi=0; break;
2280     case 2: fi=1; break;
2281     case 4: fi=2; break;
2282     case 8: fi=3; break;
2283     default: abort();
2284     }
2285    
2286     if (optimize_imm8 && isbyte(offset)) {
2287     emit_byte(0x8d);
2288     emit_byte(0x44+8*d);
2289     emit_byte(0x40*fi+8*index+s);
2290     emit_byte(offset);
2291     }
2292     else {
2293     emit_byte(0x8d);
2294     emit_byte(0x84+8*d);
2295     emit_byte(0x40*fi+8*index+s);
2296     emit_long(offset);
2297     }
2298     }
2299     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2300    
2301     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2302     {
2303     int isebp=(s==5)?0x40:0;
2304     int fi;
2305    
2306     switch(factor) {
2307     case 1: fi=0; break;
2308     case 2: fi=1; break;
2309     case 4: fi=2; break;
2310     case 8: fi=3; break;
2311     default: abort();
2312     }
2313    
2314     emit_byte(0x8d);
2315     emit_byte(0x04+8*d+isebp);
2316     emit_byte(0x40*fi+8*index+s);
2317     if (isebp)
2318     emit_byte(0);
2319     }
2320     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2321    
2322     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2323     {
2324     if (optimize_imm8 && isbyte(offset)) {
2325     emit_byte(0x89);
2326     emit_byte(0x40+8*s+d);
2327     emit_byte(offset);
2328     }
2329     else {
2330     emit_byte(0x89);
2331     emit_byte(0x80+8*s+d);
2332     emit_long(offset);
2333     }
2334     }
2335     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2336    
2337     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2338     {
2339     emit_byte(0x66);
2340     emit_byte(0x89);
2341     emit_byte(0x80+8*s+d);
2342     emit_long(offset);
2343     }
2344     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2345    
2346     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2347     {
2348     if (optimize_imm8 && isbyte(offset)) {
2349     emit_byte(0x88);
2350     emit_byte(0x40+8*s+d);
2351     emit_byte(offset);
2352     }
2353     else {
2354     emit_byte(0x88);
2355     emit_byte(0x80+8*s+d);
2356     emit_long(offset);
2357     }
2358     }
2359     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2360    
2361     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2362     {
2363     emit_byte(0x0f);
2364     emit_byte(0xc8+r);
2365     }
2366     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2367    
2368     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2369     {
2370     emit_byte(0x66);
2371     emit_byte(0xc1);
2372     emit_byte(0xc0+r);
2373     emit_byte(0x08);
2374     }
2375     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2376    
2377     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2378     {
2379     emit_byte(0x89);
2380     emit_byte(0xc0+8*s+d);
2381     }
2382     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2383    
2384     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2385     {
2386     emit_byte(0x89);
2387     emit_byte(0x05+8*s);
2388     emit_long(d);
2389     }
2390     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2391    
2392     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2393     {
2394     emit_byte(0x66);
2395     emit_byte(0x89);
2396     emit_byte(0x05+8*s);
2397     emit_long(d);
2398     }
2399     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2400    
2401     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2402     {
2403     emit_byte(0x66);
2404     emit_byte(0x8b);
2405     emit_byte(0x05+8*d);
2406     emit_long(s);
2407     }
2408     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2409    
2410     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2411     {
2412     emit_byte(0x88);
2413     emit_byte(0x05+8*s);
2414     emit_long(d);
2415     }
2416     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2417    
2418     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2419     {
2420     emit_byte(0x8a);
2421     emit_byte(0x05+8*d);
2422     emit_long(s);
2423     }
2424     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2425    
2426     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2427     {
2428     emit_byte(0xb8+d);
2429     emit_long(s);
2430     }
2431     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2432    
2433     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2434     {
2435     emit_byte(0x66);
2436     emit_byte(0xb8+d);
2437     emit_word(s);
2438     }
2439     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2440    
2441     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2442     {
2443     emit_byte(0xb0+d);
2444     emit_byte(s);
2445     }
2446     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2447    
2448     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2449     {
2450     emit_byte(0x81);
2451     emit_byte(0x15);
2452     emit_long(d);
2453     emit_long(s);
2454     }
2455     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2456    
2457     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2458     {
2459     if (optimize_imm8 && isbyte(s)) {
2460     emit_byte(0x83);
2461     emit_byte(0x05);
2462     emit_long(d);
2463     emit_byte(s);
2464     }
2465     else {
2466     emit_byte(0x81);
2467     emit_byte(0x05);
2468     emit_long(d);
2469     emit_long(s);
2470     }
2471     }
2472     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2473    
2474     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2475     {
2476     emit_byte(0x66);
2477     emit_byte(0x81);
2478     emit_byte(0x05);
2479     emit_long(d);
2480     emit_word(s);
2481     }
2482     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2483    
2484     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2485     {
2486     emit_byte(0x80);
2487     emit_byte(0x05);
2488     emit_long(d);
2489     emit_byte(s);
2490     }
2491     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2492    
2493     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2494     {
2495 gbeauche 1.2 if (optimize_accum && isaccum(d))
2496     emit_byte(0xa9);
2497     else {
2498 gbeauche 1.1 emit_byte(0xf7);
2499     emit_byte(0xc0+d);
2500 gbeauche 1.2 }
2501 gbeauche 1.1 emit_long(i);
2502     }
2503     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2504    
2505     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2506     {
2507     emit_byte(0x85);
2508     emit_byte(0xc0+8*s+d);
2509     }
2510     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2511    
2512     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2513     {
2514     emit_byte(0x66);
2515     emit_byte(0x85);
2516     emit_byte(0xc0+8*s+d);
2517     }
2518     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2519    
2520     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2521     {
2522     emit_byte(0x84);
2523     emit_byte(0xc0+8*s+d);
2524     }
2525     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2526    
2527 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2528     {
2529     emit_byte(0x81);
2530     emit_byte(0xf0+d);
2531     emit_long(i);
2532     }
2533     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2534    
2535 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2536     {
2537     if (optimize_imm8 && isbyte(i)) {
2538 gbeauche 1.2 emit_byte(0x83);
2539     emit_byte(0xe0+d);
2540     emit_byte(i);
2541 gbeauche 1.1 }
2542     else {
2543 gbeauche 1.2 if (optimize_accum && isaccum(d))
2544     emit_byte(0x25);
2545     else {
2546     emit_byte(0x81);
2547     emit_byte(0xe0+d);
2548     }
2549     emit_long(i);
2550 gbeauche 1.1 }
2551     }
2552     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2553    
2554     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2555     {
2556 gbeauche 1.2 emit_byte(0x66);
2557     if (optimize_imm8 && isbyte(i)) {
2558     emit_byte(0x83);
2559     emit_byte(0xe0+d);
2560     emit_byte(i);
2561     }
2562     else {
2563     if (optimize_accum && isaccum(d))
2564     emit_byte(0x25);
2565     else {
2566     emit_byte(0x81);
2567     emit_byte(0xe0+d);
2568     }
2569     emit_word(i);
2570     }
2571 gbeauche 1.1 }
2572     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2573    
2574     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2575     {
2576     emit_byte(0x21);
2577     emit_byte(0xc0+8*s+d);
2578     }
2579     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2580    
2581     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2582     {
2583     emit_byte(0x66);
2584     emit_byte(0x21);
2585     emit_byte(0xc0+8*s+d);
2586     }
2587     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2588    
2589     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2590     {
2591     emit_byte(0x20);
2592     emit_byte(0xc0+8*s+d);
2593     }
2594     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2595    
2596     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2597     {
2598     if (optimize_imm8 && isbyte(i)) {
2599     emit_byte(0x83);
2600     emit_byte(0xc8+d);
2601     emit_byte(i);
2602     }
2603     else {
2604 gbeauche 1.2 if (optimize_accum && isaccum(d))
2605     emit_byte(0x0d);
2606     else {
2607 gbeauche 1.1 emit_byte(0x81);
2608     emit_byte(0xc8+d);
2609 gbeauche 1.2 }
2610 gbeauche 1.1 emit_long(i);
2611     }
2612     }
2613     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2614    
2615     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2616     {
2617     emit_byte(0x09);
2618     emit_byte(0xc0+8*s+d);
2619     }
2620     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2621    
2622     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2623     {
2624     emit_byte(0x66);
2625     emit_byte(0x09);
2626     emit_byte(0xc0+8*s+d);
2627     }
2628     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2629    
2630     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2631     {
2632     emit_byte(0x08);
2633     emit_byte(0xc0+8*s+d);
2634     }
2635     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2636    
2637     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2638     {
2639     emit_byte(0x11);
2640     emit_byte(0xc0+8*s+d);
2641     }
2642     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2643    
2644     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2645     {
2646     emit_byte(0x66);
2647     emit_byte(0x11);
2648     emit_byte(0xc0+8*s+d);
2649     }
2650     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2651    
2652     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2653     {
2654     emit_byte(0x10);
2655     emit_byte(0xc0+8*s+d);
2656     }
2657     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2658    
2659     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2660     {
2661     emit_byte(0x01);
2662     emit_byte(0xc0+8*s+d);
2663     }
2664     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2665    
2666     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2667     {
2668     emit_byte(0x66);
2669     emit_byte(0x01);
2670     emit_byte(0xc0+8*s+d);
2671     }
2672     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2673    
2674     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2675     {
2676     emit_byte(0x00);
2677     emit_byte(0xc0+8*s+d);
2678     }
2679     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2680    
2681     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2682     {
2683     if (isbyte(i)) {
2684     emit_byte(0x83);
2685     emit_byte(0xe8+d);
2686     emit_byte(i);
2687     }
2688     else {
2689 gbeauche 1.2 if (optimize_accum && isaccum(d))
2690     emit_byte(0x2d);
2691     else {
2692 gbeauche 1.1 emit_byte(0x81);
2693     emit_byte(0xe8+d);
2694 gbeauche 1.2 }
2695 gbeauche 1.1 emit_long(i);
2696     }
2697     }
2698     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2699    
2700     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2701     {
2702 gbeauche 1.2 if (optimize_accum && isaccum(d))
2703     emit_byte(0x2c);
2704     else {
2705 gbeauche 1.1 emit_byte(0x80);
2706     emit_byte(0xe8+d);
2707 gbeauche 1.2 }
2708 gbeauche 1.1 emit_byte(i);
2709     }
2710     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2711    
2712     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2713     {
2714     if (isbyte(i)) {
2715     emit_byte(0x83);
2716     emit_byte(0xc0+d);
2717     emit_byte(i);
2718     }
2719     else {
2720 gbeauche 1.2 if (optimize_accum && isaccum(d))
2721     emit_byte(0x05);
2722     else {
2723 gbeauche 1.1 emit_byte(0x81);
2724     emit_byte(0xc0+d);
2725 gbeauche 1.2 }
2726 gbeauche 1.1 emit_long(i);
2727     }
2728     }
2729     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2730    
2731     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2732     {
2733 gbeauche 1.2 emit_byte(0x66);
2734 gbeauche 1.1 if (isbyte(i)) {
2735     emit_byte(0x83);
2736     emit_byte(0xc0+d);
2737     emit_byte(i);
2738     }
2739     else {
2740 gbeauche 1.2 if (optimize_accum && isaccum(d))
2741     emit_byte(0x05);
2742     else {
2743 gbeauche 1.1 emit_byte(0x81);
2744     emit_byte(0xc0+d);
2745 gbeauche 1.2 }
2746 gbeauche 1.1 emit_word(i);
2747     }
2748     }
2749     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2750    
2751     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2752     {
2753 gbeauche 1.2 if (optimize_accum && isaccum(d))
2754     emit_byte(0x04);
2755     else {
2756     emit_byte(0x80);
2757     emit_byte(0xc0+d);
2758     }
2759 gbeauche 1.1 emit_byte(i);
2760     }
2761     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2762    
2763     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2764     {
2765     emit_byte(0x19);
2766     emit_byte(0xc0+8*s+d);
2767     }
2768     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2769    
2770     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2771     {
2772     emit_byte(0x66);
2773     emit_byte(0x19);
2774     emit_byte(0xc0+8*s+d);
2775     }
2776     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2777    
2778     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2779     {
2780     emit_byte(0x18);
2781     emit_byte(0xc0+8*s+d);
2782     }
2783     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2784    
2785     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2786     {
2787     emit_byte(0x29);
2788     emit_byte(0xc0+8*s+d);
2789     }
2790     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2791    
2792     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2793     {
2794     emit_byte(0x66);
2795     emit_byte(0x29);
2796     emit_byte(0xc0+8*s+d);
2797     }
2798     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2799    
2800     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2801     {
2802     emit_byte(0x28);
2803     emit_byte(0xc0+8*s+d);
2804     }
2805     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2806    
2807     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2808     {
2809     emit_byte(0x39);
2810     emit_byte(0xc0+8*s+d);
2811     }
2812     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2813    
2814     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2815     {
2816     if (optimize_imm8 && isbyte(i)) {
2817     emit_byte(0x83);
2818     emit_byte(0xf8+r);
2819     emit_byte(i);
2820     }
2821     else {
2822 gbeauche 1.2 if (optimize_accum && isaccum(r))
2823     emit_byte(0x3d);
2824     else {
2825 gbeauche 1.1 emit_byte(0x81);
2826     emit_byte(0xf8+r);
2827 gbeauche 1.2 }
2828 gbeauche 1.1 emit_long(i);
2829     }
2830     }
2831     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2832    
2833     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2834     {
2835     emit_byte(0x66);
2836     emit_byte(0x39);
2837     emit_byte(0xc0+8*s+d);
2838     }
2839     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2840    
2841 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2842     {
2843     emit_byte(0x80);
2844     emit_byte(0x3d);
2845     emit_long(d);
2846     emit_byte(s);
2847     }
2848     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2849    
2850 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2851     {
2852 gbeauche 1.2 if (optimize_accum && isaccum(d))
2853     emit_byte(0x3c);
2854     else {
2855 gbeauche 1.1 emit_byte(0x80);
2856     emit_byte(0xf8+d);
2857 gbeauche 1.2 }
2858 gbeauche 1.1 emit_byte(i);
2859     }
2860     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2861    
2862     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2863     {
2864     emit_byte(0x38);
2865     emit_byte(0xc0+8*s+d);
2866     }
2867     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2868    
2869     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2870     {
2871     int fi;
2872    
2873     switch(factor) {
2874     case 1: fi=0; break;
2875     case 2: fi=1; break;
2876     case 4: fi=2; break;
2877     case 8: fi=3; break;
2878     default: abort();
2879     }
2880     emit_byte(0x39);
2881     emit_byte(0x04+8*d);
2882     emit_byte(5+8*index+0x40*fi);
2883     emit_long(offset);
2884     }
2885     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2886    
2887     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2888     {
2889     emit_byte(0x31);
2890     emit_byte(0xc0+8*s+d);
2891     }
2892     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2893    
2894     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2895     {
2896     emit_byte(0x66);
2897     emit_byte(0x31);
2898     emit_byte(0xc0+8*s+d);
2899     }
2900     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2901    
2902     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2903     {
2904     emit_byte(0x30);
2905     emit_byte(0xc0+8*s+d);
2906     }
2907     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2908    
2909     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2910     {
2911     if (optimize_imm8 && isbyte(s)) {
2912     emit_byte(0x83);
2913     emit_byte(0x2d);
2914     emit_long(d);
2915     emit_byte(s);
2916     }
2917     else {
2918     emit_byte(0x81);
2919     emit_byte(0x2d);
2920     emit_long(d);
2921     emit_long(s);
2922     }
2923     }
2924     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2925    
2926     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2927     {
2928     if (optimize_imm8 && isbyte(s)) {
2929     emit_byte(0x83);
2930     emit_byte(0x3d);
2931     emit_long(d);
2932     emit_byte(s);
2933     }
2934     else {
2935     emit_byte(0x81);
2936     emit_byte(0x3d);
2937     emit_long(d);
2938     emit_long(s);
2939     }
2940     }
2941     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2942    
2943     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2944     {
2945     emit_byte(0x87);
2946     emit_byte(0xc0+8*r1+r2);
2947     }
2948     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2949    
2950     /*************************************************************************
2951     * FIXME: mem access modes probably wrong *
2952     *************************************************************************/
2953    
2954     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2955     {
2956     emit_byte(0x9c);
2957     }
2958     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2959    
2960     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2961     {
2962     emit_byte(0x9d);
2963     }
2964     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2965 gbeauche 1.13
2966     #endif
2967 gbeauche 1.1
2968     /*************************************************************************
2969     * Unoptimizable stuff --- jump *
2970     *************************************************************************/
2971    
2972     static __inline__ void raw_call_r(R4 r)
2973     {
2974 gbeauche 1.20 #if USE_NEW_RTASM
2975     CALLsr(r);
2976     #else
2977 gbeauche 1.1 emit_byte(0xff);
2978     emit_byte(0xd0+r);
2979 gbeauche 1.20 #endif
2980 gbeauche 1.5 }
2981    
2982     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2983     {
2984 gbeauche 1.20 #if USE_NEW_RTASM
2985     CALLsm(base, X86_NOREG, r, m);
2986     #else
2987 gbeauche 1.5 int mu;
2988     switch(m) {
2989     case 1: mu=0; break;
2990     case 2: mu=1; break;
2991     case 4: mu=2; break;
2992     case 8: mu=3; break;
2993     default: abort();
2994     }
2995     emit_byte(0xff);
2996     emit_byte(0x14);
2997     emit_byte(0x05+8*r+0x40*mu);
2998     emit_long(base);
2999 gbeauche 1.20 #endif
3000 gbeauche 1.1 }
3001    
3002     static __inline__ void raw_jmp_r(R4 r)
3003     {
3004 gbeauche 1.20 #if USE_NEW_RTASM
3005     JMPsr(r);
3006     #else
3007 gbeauche 1.1 emit_byte(0xff);
3008     emit_byte(0xe0+r);
3009 gbeauche 1.20 #endif
3010 gbeauche 1.1 }
3011    
3012     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3013     {
3014 gbeauche 1.20 #if USE_NEW_RTASM
3015     JMPsm(base, X86_NOREG, r, m);
3016     #else
3017 gbeauche 1.1 int mu;
3018     switch(m) {
3019     case 1: mu=0; break;
3020     case 2: mu=1; break;
3021     case 4: mu=2; break;
3022     case 8: mu=3; break;
3023     default: abort();
3024     }
3025     emit_byte(0xff);
3026     emit_byte(0x24);
3027     emit_byte(0x05+8*r+0x40*mu);
3028     emit_long(base);
3029 gbeauche 1.20 #endif
3030 gbeauche 1.1 }
3031    
3032     static __inline__ void raw_jmp_m(uae_u32 base)
3033     {
3034     emit_byte(0xff);
3035     emit_byte(0x25);
3036     emit_long(base);
3037     }
3038    
3039    
3040     static __inline__ void raw_call(uae_u32 t)
3041     {
3042 gbeauche 1.20 #if USE_NEW_RTASM
3043     CALLm(t);
3044     #else
3045 gbeauche 1.1 emit_byte(0xe8);
3046     emit_long(t-(uae_u32)target-4);
3047 gbeauche 1.20 #endif
3048 gbeauche 1.1 }
3049    
3050     static __inline__ void raw_jmp(uae_u32 t)
3051     {
3052 gbeauche 1.20 #if USE_NEW_RTASM
3053     JMPm(t);
3054     #else
3055 gbeauche 1.1 emit_byte(0xe9);
3056     emit_long(t-(uae_u32)target-4);
3057 gbeauche 1.20 #endif
3058 gbeauche 1.1 }
3059    
3060     static __inline__ void raw_jl(uae_u32 t)
3061     {
3062     emit_byte(0x0f);
3063     emit_byte(0x8c);
3064 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3065 gbeauche 1.1 }
3066    
3067     static __inline__ void raw_jz(uae_u32 t)
3068     {
3069     emit_byte(0x0f);
3070     emit_byte(0x84);
3071 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3072 gbeauche 1.1 }
3073    
3074     static __inline__ void raw_jnz(uae_u32 t)
3075     {
3076     emit_byte(0x0f);
3077     emit_byte(0x85);
3078 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3079 gbeauche 1.1 }
3080    
3081     static __inline__ void raw_jnz_l_oponly(void)
3082     {
3083     emit_byte(0x0f);
3084     emit_byte(0x85);
3085     }
3086    
3087     static __inline__ void raw_jcc_l_oponly(int cc)
3088     {
3089     emit_byte(0x0f);
3090     emit_byte(0x80+cc);
3091     }
3092    
3093     static __inline__ void raw_jnz_b_oponly(void)
3094     {
3095     emit_byte(0x75);
3096     }
3097    
3098     static __inline__ void raw_jz_b_oponly(void)
3099     {
3100     emit_byte(0x74);
3101     }
3102    
3103     static __inline__ void raw_jcc_b_oponly(int cc)
3104     {
3105     emit_byte(0x70+cc);
3106     }
3107    
3108     static __inline__ void raw_jmp_l_oponly(void)
3109     {
3110     emit_byte(0xe9);
3111     }
3112    
3113     static __inline__ void raw_jmp_b_oponly(void)
3114     {
3115     emit_byte(0xeb);
3116     }
3117    
3118     static __inline__ void raw_ret(void)
3119     {
3120     emit_byte(0xc3);
3121     }
3122    
3123     static __inline__ void raw_nop(void)
3124     {
3125     emit_byte(0x90);
3126     }
3127    
3128 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3129     {
3130     /* Source: GNU Binutils 2.12.90.0.15 */
3131     /* Various efficient no-op patterns for aligning code labels.
3132     Note: Don't try to assemble the instructions in the comments.
3133     0L and 0w are not legal. */
3134     static const uae_u8 f32_1[] =
3135     {0x90}; /* nop */
3136     static const uae_u8 f32_2[] =
3137     {0x89,0xf6}; /* movl %esi,%esi */
3138     static const uae_u8 f32_3[] =
3139     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3140     static const uae_u8 f32_4[] =
3141     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3142     static const uae_u8 f32_5[] =
3143     {0x90, /* nop */
3144     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3145     static const uae_u8 f32_6[] =
3146     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3147     static const uae_u8 f32_7[] =
3148     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3149     static const uae_u8 f32_8[] =
3150     {0x90, /* nop */
3151     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3152     static const uae_u8 f32_9[] =
3153     {0x89,0xf6, /* movl %esi,%esi */
3154     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3155     static const uae_u8 f32_10[] =
3156     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3157     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3158     static const uae_u8 f32_11[] =
3159     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3160     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3161     static const uae_u8 f32_12[] =
3162     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3163     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3164     static const uae_u8 f32_13[] =
3165     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3166     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3167     static const uae_u8 f32_14[] =
3168     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3169     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3170     static const uae_u8 f32_15[] =
3171     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3172     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3173     static const uae_u8 f32_16[] =
3174     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3175     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3176     static const uae_u8 *const f32_patt[] = {
3177     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3178     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3179     };
3180 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3181 gbeauche 1.8
3182 gbeauche 1.21 #if defined(__x86_64__)
3183     /* The recommended way to pad 64bit code is to use NOPs preceded by
3184     maximally four 0x66 prefixes. Balance the size of nops. */
3185     if (nbytes == 0)
3186     return;
3187    
3188     int i;
3189     int nnops = (nbytes + 3) / 4;
3190     int len = nbytes / nnops;
3191     int remains = nbytes - nnops * len;
3192    
3193     for (i = 0; i < remains; i++) {
3194     emit_block(prefixes, len);
3195     raw_nop();
3196     }
3197     for (; i < nnops; i++) {
3198     emit_block(prefixes, len - 1);
3199     raw_nop();
3200     }
3201     #else
3202 gbeauche 1.8 int nloops = nbytes / 16;
3203     while (nloops-- > 0)
3204     emit_block(f32_16, sizeof(f32_16));
3205    
3206     nbytes %= 16;
3207     if (nbytes)
3208     emit_block(f32_patt[nbytes - 1], nbytes);
3209 gbeauche 1.21 #endif
3210 gbeauche 1.8 }
3211    
3212 gbeauche 1.1
3213     /*************************************************************************
3214     * Flag handling, to and fro UAE flag register *
3215     *************************************************************************/
3216    
3217     #ifdef SAHF_SETO_PROFITABLE
3218    
3219     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3220    
3221     static __inline__ void raw_flags_to_reg(int r)
3222     {
3223     raw_lahf(0); /* Most flags in AH */
3224     //raw_setcc(r,0); /* V flag in AL */
3225 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3226 gbeauche 1.1
3227     #if 1 /* Let's avoid those nasty partial register stalls */
3228 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3229     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3230 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3231     live.state[FLAGTMP].status=INMEM;
3232     live.state[FLAGTMP].realreg=-1;
3233     /* We just "evicted" FLAGTMP. */
3234     if (live.nat[r].nholds!=1) {
3235     /* Huh? */
3236     abort();
3237     }
3238     live.nat[r].nholds=0;
3239     #endif
3240     }
3241    
3242     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3243     static __inline__ void raw_reg_to_flags(int r)
3244     {
3245     raw_cmp_b_ri(r,-127); /* set V */
3246     raw_sahf(0);
3247     }
3248    
3249 gbeauche 1.24 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3250     static __inline__ void raw_flags_set_zero(int s, int tmp)
3251     {
3252     raw_mov_l_rr(tmp,s);
3253     raw_lahf(s); /* flags into ah */
3254     raw_and_l_ri(s,0xffffbfff);
3255     raw_and_l_ri(tmp,0x00004000);
3256     raw_xor_l_ri(tmp,0x00004000);
3257     raw_or_l(s,tmp);
3258     raw_sahf(s);
3259     }
3260    
3261 gbeauche 1.1 #else
3262    
3263     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3264     static __inline__ void raw_flags_to_reg(int r)
3265     {
3266     raw_pushfl();
3267     raw_pop_l_r(r);
3268 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3269 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3270     live.state[FLAGTMP].status=INMEM;
3271     live.state[FLAGTMP].realreg=-1;
3272     /* We just "evicted" FLAGTMP. */
3273     if (live.nat[r].nholds!=1) {
3274     /* Huh? */
3275     abort();
3276     }
3277     live.nat[r].nholds=0;
3278     }
3279    
3280     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3281     static __inline__ void raw_reg_to_flags(int r)
3282     {
3283     raw_push_l_r(r);
3284     raw_popfl();
3285     }
3286    
3287 gbeauche 1.24 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3288     static __inline__ void raw_flags_set_zero(int s, int tmp)
3289     {
3290     raw_mov_l_rr(tmp,s);
3291     raw_pushfl();
3292     raw_pop_l_r(s);
3293     raw_and_l_ri(s,0xffffffbf);
3294     raw_and_l_ri(tmp,0x00000040);
3295     raw_xor_l_ri(tmp,0x00000040);
3296     raw_or_l(s,tmp);
3297     raw_push_l_r(s);
3298     raw_popfl();
3299     }
3300 gbeauche 1.1 #endif
3301    
3302     /* Apparently, there are enough instructions between flag store and
3303     flag reload to avoid the partial memory stall */
3304     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3305     {
3306     #if 1
3307 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3308 gbeauche 1.1 #else
3309 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3310     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3311 gbeauche 1.1 #endif
3312     }
3313    
3314     /* FLAGX is byte sized, and we *do* write it at that size */
3315     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3316     {
3317     if (live.nat[target].canbyte)
3318 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3319 gbeauche 1.1 else if (live.nat[target].canword)
3320 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3321 gbeauche 1.1 else
3322 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3323 gbeauche 1.1 }
3324    
3325     static __inline__ void raw_inc_sp(int off)
3326     {
3327 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
3328 gbeauche 1.1 }
3329    
3330     /*************************************************************************
3331     * Handling mistaken direct memory access *
3332     *************************************************************************/
3333    
3334     // gb-- I don't need that part for JIT Basilisk II
3335     #if defined(NATMEM_OFFSET) && 0
3336     #include <asm/sigcontext.h>
3337     #include <signal.h>
3338    
3339     #define SIG_READ 1
3340     #define SIG_WRITE 2
3341    
3342     static int in_handler=0;
3343     static uae_u8 veccode[256];
3344    
3345     static void vec(int x, struct sigcontext sc)
3346     {
3347     uae_u8* i=(uae_u8*)sc.eip;
3348     uae_u32 addr=sc.cr2;
3349     int r=-1;
3350     int size=4;
3351     int dir=-1;
3352     int len=0;
3353     int j;
3354    
3355     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3356     if (!canbang)
3357     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3358     if (in_handler)
3359     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3360    
3361     if (canbang && i>=compiled_code && i<=current_compile_p) {
3362     if (*i==0x66) {
3363     i++;
3364     size=2;
3365     len++;
3366     }
3367    
3368     switch(i[0]) {
3369     case 0x8a:
3370     if ((i[1]&0xc0)==0x80) {
3371     r=(i[1]>>3)&7;
3372     dir=SIG_READ;
3373     size=1;
3374     len+=6;
3375     break;
3376     }
3377     break;
3378     case 0x88:
3379     if ((i[1]&0xc0)==0x80) {
3380     r=(i[1]>>3)&7;
3381     dir=SIG_WRITE;
3382     size=1;
3383     len+=6;
3384     break;
3385     }
3386     break;
3387     case 0x8b:
3388     if ((i[1]&0xc0)==0x80) {
3389     r=(i[1]>>3)&7;
3390     dir=SIG_READ;
3391     len+=6;
3392     break;
3393     }
3394     if ((i[1]&0xc0)==0x40) {
3395     r=(i[1]>>3)&7;
3396     dir=SIG_READ;
3397     len+=3;
3398     break;
3399     }
3400     break;
3401     case 0x89:
3402     if ((i[1]&0xc0)==0x80) {
3403     r=(i[1]>>3)&7;
3404     dir=SIG_WRITE;
3405     len+=6;
3406     break;
3407     }
3408     if ((i[1]&0xc0)==0x40) {
3409     r=(i[1]>>3)&7;
3410     dir=SIG_WRITE;
3411     len+=3;
3412     break;
3413     }
3414     break;
3415     }
3416     }
3417    
3418     if (r!=-1) {
3419     void* pr=NULL;
3420     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3421    
3422     switch(r) {
3423     case 0: pr=&(sc.eax); break;
3424     case 1: pr=&(sc.ecx); break;
3425     case 2: pr=&(sc.edx); break;
3426     case 3: pr=&(sc.ebx); break;
3427     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3428     case 5: pr=(size>1)?
3429     (void*)(&(sc.ebp)):
3430     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3431     case 6: pr=(size>1)?
3432     (void*)(&(sc.esi)):
3433     (void*)(((uae_u8*)&(sc.edx))+1); break;
3434     case 7: pr=(size>1)?
3435     (void*)(&(sc.edi)):
3436     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3437     default: abort();
3438     }
3439     if (pr) {
3440     blockinfo* bi;
3441    
3442     if (currprefs.comp_oldsegv) {
3443     addr-=NATMEM_OFFSET;
3444    
3445     if ((addr>=0x10000000 && addr<0x40000000) ||
3446     (addr>=0x50000000)) {
3447     write_log("Suspicious address in %x SEGV handler.\n",addr);
3448     }
3449     if (dir==SIG_READ) {
3450     switch(size) {
3451     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3452     case 2: *((uae_u16*)pr)=get_word(addr); break;
3453     case 4: *((uae_u32*)pr)=get_long(addr); break;
3454     default: abort();
3455     }
3456     }
3457     else { /* write */
3458     switch(size) {
3459     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3460     case 2: put_word(addr,*((uae_u16*)pr)); break;
3461     case 4: put_long(addr,*((uae_u32*)pr)); break;
3462     default: abort();
3463     }
3464     }
3465     write_log("Handled one access!\n");
3466     fflush(stdout);
3467     segvcount++;
3468     sc.eip+=len;
3469     }
3470     else {
3471     void* tmp=target;
3472     int i;
3473     uae_u8 vecbuf[5];
3474    
3475     addr-=NATMEM_OFFSET;
3476    
3477     if ((addr>=0x10000000 && addr<0x40000000) ||
3478     (addr>=0x50000000)) {
3479     write_log("Suspicious address in %x SEGV handler.\n",addr);
3480     }
3481    
3482     target=(uae_u8*)sc.eip;
3483     for (i=0;i<5;i++)
3484     vecbuf[i]=target[i];
3485     emit_byte(0xe9);
3486 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3487 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3488    
3489     write_log("Handled one access!\n");
3490     fflush(stdout);
3491     segvcount++;
3492    
3493     target=veccode;
3494    
3495     if (dir==SIG_READ) {
3496     switch(size) {
3497     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3498     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3499     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3500     default: abort();
3501     }
3502     }
3503     else { /* write */
3504     switch(size) {
3505     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3506     case 2: put_word(addr,*((uae_u16*)pr)); break;
3507     case 4: put_long(addr,*((uae_u32*)pr)); break;
3508     default: abort();
3509     }
3510     }
3511     for (i=0;i<5;i++)
3512     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3513 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3514 gbeauche 1.1 emit_byte(0xe9);
3515 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3516 gbeauche 1.1 in_handler=1;
3517     target=tmp;
3518     }
3519     bi=active;
3520     while (bi) {
3521     if (bi->handler &&
3522     (uae_u8*)bi->direct_handler<=i &&
3523     (uae_u8*)bi->nexthandler>i) {
3524     write_log("deleted trigger (%p<%p<%p) %p\n",
3525     bi->handler,
3526     i,
3527     bi->nexthandler,
3528     bi->pc_p);
3529     invalidate_block(bi);
3530     raise_in_cl_list(bi);
3531     set_special(0);
3532     return;
3533     }
3534     bi=bi->next;
3535     }
3536     /* Not found in the active list. Might be a rom routine that
3537     is in the dormant list */
3538     bi=dormant;
3539     while (bi) {
3540     if (bi->handler &&
3541     (uae_u8*)bi->direct_handler<=i &&
3542     (uae_u8*)bi->nexthandler>i) {
3543     write_log("deleted trigger (%p<%p<%p) %p\n",
3544     bi->handler,
3545     i,
3546     bi->nexthandler,
3547     bi->pc_p);
3548     invalidate_block(bi);
3549     raise_in_cl_list(bi);
3550     set_special(0);
3551     return;
3552     }
3553     bi=bi->next;
3554     }
3555     write_log("Huh? Could not find trigger!\n");
3556     return;
3557     }
3558     }
3559     write_log("Can't handle access!\n");
3560     for (j=0;j<10;j++) {
3561     write_log("instruction byte %2d is %02x\n",j,i[j]);
3562     }
3563     write_log("Please send the above info (starting at \"fault address\") to\n"
3564     "bmeyer@csse.monash.edu.au\n"
3565     "This shouldn't happen ;-)\n");
3566     fflush(stdout);
3567     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3568     }
3569     #endif
3570    
3571    
3572     /*************************************************************************
3573     * Checking for CPU features *
3574     *************************************************************************/
3575    
3576 gbeauche 1.3 struct cpuinfo_x86 {
3577     uae_u8 x86; // CPU family
3578     uae_u8 x86_vendor; // CPU vendor
3579     uae_u8 x86_processor; // CPU canonical processor type
3580     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3581     uae_u32 x86_hwcap;
3582     uae_u8 x86_model;
3583     uae_u8 x86_mask;
3584     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3585     char x86_vendor_id[16];
3586     };
3587     struct cpuinfo_x86 cpuinfo;
3588    
3589     enum {
3590     X86_VENDOR_INTEL = 0,
3591     X86_VENDOR_CYRIX = 1,
3592     X86_VENDOR_AMD = 2,
3593     X86_VENDOR_UMC = 3,
3594     X86_VENDOR_NEXGEN = 4,
3595     X86_VENDOR_CENTAUR = 5,
3596     X86_VENDOR_RISE = 6,
3597     X86_VENDOR_TRANSMETA = 7,
3598     X86_VENDOR_NSC = 8,
3599     X86_VENDOR_UNKNOWN = 0xff
3600     };
3601    
3602     enum {
3603     X86_PROCESSOR_I386, /* 80386 */
3604     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3605     X86_PROCESSOR_PENTIUM,
3606     X86_PROCESSOR_PENTIUMPRO,
3607     X86_PROCESSOR_K6,
3608     X86_PROCESSOR_ATHLON,
3609     X86_PROCESSOR_PENTIUM4,
3610 gbeauche 1.16 X86_PROCESSOR_K8,
3611 gbeauche 1.3 X86_PROCESSOR_max
3612     };
3613    
3614     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3615     "80386",
3616     "80486",
3617     "Pentium",
3618     "PentiumPro",
3619     "K6",
3620     "Athlon",
3621 gbeauche 1.16 "Pentium4",
3622     "K8"
3623 gbeauche 1.3 };
3624    
3625     static struct ptt {
3626     const int align_loop;
3627     const int align_loop_max_skip;
3628     const int align_jump;
3629     const int align_jump_max_skip;
3630     const int align_func;
3631     }
3632     x86_alignments[X86_PROCESSOR_max] = {
3633     { 4, 3, 4, 3, 4 },
3634     { 16, 15, 16, 15, 16 },
3635     { 16, 7, 16, 7, 16 },
3636     { 16, 15, 16, 7, 16 },
3637     { 32, 7, 32, 7, 32 },
3638 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3639 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3640     { 16, 7, 16, 7, 16 }
3641 gbeauche 1.3 };
3642 gbeauche 1.1
3643 gbeauche 1.3 static void
3644     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3645 gbeauche 1.1 {
3646 gbeauche 1.3 char *v = c->x86_vendor_id;
3647    
3648     if (!strcmp(v, "GenuineIntel"))
3649     c->x86_vendor = X86_VENDOR_INTEL;
3650     else if (!strcmp(v, "AuthenticAMD"))
3651     c->x86_vendor = X86_VENDOR_AMD;
3652     else if (!strcmp(v, "CyrixInstead"))
3653     c->x86_vendor = X86_VENDOR_CYRIX;
3654     else if (!strcmp(v, "Geode by NSC"))
3655     c->x86_vendor = X86_VENDOR_NSC;
3656     else if (!strcmp(v, "UMC UMC UMC "))
3657     c->x86_vendor = X86_VENDOR_UMC;
3658     else if (!strcmp(v, "CentaurHauls"))
3659     c->x86_vendor = X86_VENDOR_CENTAUR;
3660     else if (!strcmp(v, "NexGenDriven"))
3661     c->x86_vendor = X86_VENDOR_NEXGEN;
3662     else if (!strcmp(v, "RiseRiseRise"))
3663     c->x86_vendor = X86_VENDOR_RISE;
3664     else if (!strcmp(v, "GenuineTMx86") ||
3665     !strcmp(v, "TransmetaCPU"))
3666     c->x86_vendor = X86_VENDOR_TRANSMETA;
3667     else
3668     c->x86_vendor = X86_VENDOR_UNKNOWN;
3669     }
3670 gbeauche 1.1
3671 gbeauche 1.3 static void
3672     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3673     {
3674     static uae_u8 cpuid_space[256];
3675 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3676 gbeauche 1.3 uae_u8* tmp=get_target();
3677 gbeauche 1.1
3678 gbeauche 1.20 s_op = op;
3679 gbeauche 1.3 set_target(cpuid_space);
3680     raw_push_l_r(0); /* eax */
3681     raw_push_l_r(1); /* ecx */
3682     raw_push_l_r(2); /* edx */
3683     raw_push_l_r(3); /* ebx */
3684 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3685 gbeauche 1.3 raw_cpuid(0);
3686 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3687     raw_mov_l_mr((uintptr)&s_ebx,3);
3688     raw_mov_l_mr((uintptr)&s_ecx,1);
3689     raw_mov_l_mr((uintptr)&s_edx,2);
3690 gbeauche 1.3 raw_pop_l_r(3);
3691     raw_pop_l_r(2);
3692     raw_pop_l_r(1);
3693     raw_pop_l_r(0);
3694     raw_ret();
3695     set_target(tmp);
3696 gbeauche 1.1
3697 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3698 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3699     if (ebx != NULL) *ebx = s_ebx;
3700     if (ecx != NULL) *ecx = s_ecx;
3701     if (edx != NULL) *edx = s_edx;
3702 gbeauche 1.1 }
3703    
3704 gbeauche 1.3 static void
3705     raw_init_cpu(void)
3706 gbeauche 1.1 {
3707 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3708    
3709     /* Defaults */
3710 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3711 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3712     c->cpuid_level = -1; /* CPUID not detected */
3713     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3714     c->x86_vendor_id[0] = '\0'; /* Unset */
3715     c->x86_hwcap = 0;
3716    
3717     /* Get vendor name */
3718     c->x86_vendor_id[12] = '\0';
3719     cpuid(0x00000000,
3720     (uae_u32 *)&c->cpuid_level,
3721     (uae_u32 *)&c->x86_vendor_id[0],
3722     (uae_u32 *)&c->x86_vendor_id[8],
3723     (uae_u32 *)&c->x86_vendor_id[4]);
3724     x86_get_cpu_vendor(c);
3725    
3726     /* Intel-defined flags: level 0x00000001 */
3727     c->x86_brand_id = 0;
3728     if ( c->cpuid_level >= 0x00000001 ) {
3729     uae_u32 tfms, brand_id;
3730     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3731     c->x86 = (tfms >> 8) & 15;
3732     c->x86_model = (tfms >> 4) & 15;
3733     c->x86_brand_id = brand_id & 0xff;
3734     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
3735     (c->x86 == 0xf)) {
3736     /* AMD Extended Family and Model Values */
3737     c->x86 += (tfms >> 20) & 0xff;
3738     c->x86_model += (tfms >> 12) & 0xf0;
3739     }
3740     c->x86_mask = tfms & 15;
3741     } else {
3742     /* Have CPUID level 0 only - unheard of */
3743     c->x86 = 4;
3744     }
3745    
3746 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3747     uae_u32 xlvl;
3748     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3749     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3750     if ( xlvl >= 0x80000001 ) {
3751     uae_u32 features;
3752     cpuid(0x80000001, NULL, NULL, NULL, &features);
3753     if (features & (1 << 29)) {
3754     /* Assume x86-64 if long mode is supported */
3755     c->x86_processor = X86_PROCESSOR_K8;
3756     }
3757     }
3758     }
3759    
3760 gbeauche 1.3 /* Canonicalize processor ID */
3761     switch (c->x86) {
3762     case 3:
3763     c->x86_processor = X86_PROCESSOR_I386;
3764     break;
3765     case 4:
3766     c->x86_processor = X86_PROCESSOR_I486;
3767     break;
3768     case 5:
3769     if (c->x86_vendor == X86_VENDOR_AMD)
3770     c->x86_processor = X86_PROCESSOR_K6;
3771     else
3772     c->x86_processor = X86_PROCESSOR_PENTIUM;
3773     break;
3774     case 6:
3775     if (c->x86_vendor == X86_VENDOR_AMD)
3776     c->x86_processor = X86_PROCESSOR_ATHLON;
3777     else
3778     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3779     break;
3780     case 15:
3781     if (c->x86_vendor == X86_VENDOR_INTEL) {
3782 gbeauche 1.16 /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3783 gbeauche 1.3 if (c->x86_brand_id >= 8)
3784     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3785     }
3786 gbeauche 1.16 if (c->x86_vendor == X86_VENDOR_AMD) {
3787     /* Assume an Athlon processor if family == 15 and it was not
3788     detected as an x86-64 so far */
3789     if (c->x86_processor == X86_PROCESSOR_max)
3790     c->x86_processor = X86_PROCESSOR_ATHLON;
3791     }
3792 gbeauche 1.3 break;
3793     }
3794     if (c->x86_processor == X86_PROCESSOR_max) {
3795     fprintf(stderr, "Error: unknown processor type\n");
3796     fprintf(stderr, " Family : %d\n", c->x86);
3797     fprintf(stderr, " Model : %d\n", c->x86_model);
3798     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3799 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3800 gbeauche 1.3 if (c->x86_brand_id)
3801     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3802     abort();
3803     }
3804    
3805     /* Have CMOV support? */
3806 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3807 gbeauche 1.3
3808     /* Can the host CPU suffer from partial register stalls? */
3809     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3810     #if 1
3811     /* It appears that partial register writes are a bad idea even on
3812 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3813     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3814 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3815     have_rat_stall = true;
3816 gbeauche 1.1 #endif
3817 gbeauche 1.3
3818     /* Alignments */
3819     if (tune_alignment) {
3820     align_loops = x86_alignments[c->x86_processor].align_loop;
3821     align_jumps = x86_alignments[c->x86_processor].align_jump;
3822     }
3823    
3824     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3825     c->cpuid_level, c->x86_vendor_id,
3826     x86_processor_string_table[c->x86_processor]);
3827 gbeauche 1.1 }
3828    
3829 gbeauche 1.10 static bool target_check_bsf(void)
3830     {
3831     bool mismatch = false;
3832     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3833     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3834     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3835     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3836     for (int value = -1; value <= 1; value++) {
3837     int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3838     int tmp = value;
3839     __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3840 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3841 gbeauche 1.10 int OF = (flags >> 11) & 1;
3842     int SF = (flags >> 7) & 1;
3843     int ZF = (flags >> 6) & 1;
3844     int CF = flags & 1;
3845     tmp = (value == 0);
3846     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3847     mismatch = true;
3848     }
3849     }}}}
3850     if (mismatch)
3851     write_log("Target CPU defines all flags on BSF instruction\n");
3852     return !mismatch;
3853     }
3854    
3855 gbeauche 1.1
3856     /*************************************************************************
3857     * FPU stuff *
3858     *************************************************************************/
3859    
3860    
3861     static __inline__ void raw_fp_init(void)
3862     {
3863     int i;
3864    
3865     for (i=0;i<N_FREGS;i++)
3866     live.spos[i]=-2;
3867     live.tos=-1; /* Stack is empty */
3868     }
3869    
3870     static __inline__ void raw_fp_cleanup_drop(void)
3871     {
3872     #if 0
3873     /* using FINIT instead of popping all the entries.
3874     Seems to have side effects --- there is display corruption in
3875     Quake when this is used */
3876     if (live.tos>1) {
3877     emit_byte(0x9b);
3878     emit_byte(0xdb);
3879     emit_byte(0xe3);
3880     live.tos=-1;
3881     }
3882     #endif
3883     while (live.tos>=1) {
3884     emit_byte(0xde);
3885     emit_byte(0xd9);
3886     live.tos-=2;
3887     }
3888     while (live.tos>=0) {
3889     emit_byte(0xdd);
3890     emit_byte(0xd8);
3891     live.tos--;
3892     }
3893     raw_fp_init();
3894     }
3895    
3896     static __inline__ void make_tos(int r)
3897     {
3898     int p,q;
3899    
3900     if (live.spos[r]<0) { /* Register not yet on stack */
3901     emit_byte(0xd9);
3902     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3903     live.tos++;
3904     live.spos[r]=live.tos;
3905     live.onstack[live.tos]=r;
3906     return;
3907     }
3908     /* Register is on stack */
3909     if (live.tos==live.spos[r])
3910     return;
3911     p=live.spos[r];
3912     q=live.onstack[live.tos];
3913    
3914     emit_byte(0xd9);
3915     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3916     live.onstack[live.tos]=r;
3917     live.spos[r]=live.tos;
3918     live.onstack[p]=q;
3919     live.spos[q]=p;
3920     }
3921    
3922     static __inline__ void make_tos2(int r, int r2)
3923     {
3924     int q;
3925    
3926     make_tos(r2); /* Put the reg that's supposed to end up in position2
3927     on top */
3928    
3929     if (live.spos[r]<0) { /* Register not yet on stack */
3930     make_tos(r); /* This will extend the stack */
3931     return;
3932     }
3933     /* Register is on stack */
3934     emit_byte(0xd9);
3935     emit_byte(0xc9); /* Move r2 into position 2 */
3936    
3937     q=live.onstack[live.tos-1];
3938     live.onstack[live.tos]=q;
3939     live.spos[q]=live.tos;
3940     live.onstack[live.tos-1]=r2;
3941     live.spos[r2]=live.tos-1;
3942    
3943     make_tos(r); /* And r into 1 */
3944     }
3945    
3946     static __inline__ int stackpos(int r)
3947     {
3948     if (live.spos[r]<0)
3949     abort();
3950     if (live.tos<live.spos[r]) {
3951     printf("Looking for spos for fnreg %d\n",r);
3952     abort();
3953     }
3954     return live.tos-live.spos[r];
3955     }
3956    
3957     static __inline__ void usereg(int r)
3958     {
3959     if (live.spos[r]<0)
3960     make_tos(r);
3961     }
3962    
3963     /* This is called with one FP value in a reg *above* tos, which it will
3964     pop off the stack if necessary */
3965     static __inline__ void tos_make(int r)
3966     {
3967     if (live.spos[r]<0) {
3968     live.tos++;
3969     live.spos[r]=live.tos;
3970     live.onstack[live.tos]=r;
3971     return;
3972     }
3973     emit_byte(0xdd);
3974     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3975     and pop it*/
3976     }
3977 gbeauche 1.23
3978     /* FP helper functions */
3979     #if USE_NEW_RTASM
3980     #define DEFINE_OP(NAME, GEN) \
3981     static inline void raw_##NAME(uint32 m) \
3982     { \
3983     GEN(m, X86_NOREG, X86_NOREG, 1); \
3984     }
3985     DEFINE_OP(fstl, FSTLm);
3986     DEFINE_OP(fstpl, FSTPLm);
3987     DEFINE_OP(fldl, FLDLm);
3988     DEFINE_OP(fildl, FILDLm);
3989     DEFINE_OP(fistl, FISTLm);
3990     DEFINE_OP(flds, FLDSm);
3991     DEFINE_OP(fsts, FSTSm);
3992     DEFINE_OP(fstpt, FSTPTm);
3993     DEFINE_OP(fldt, FLDTm);
3994     #else
3995     #define DEFINE_OP(NAME, OP1, OP2) \
3996     static inline void raw_##NAME(uint32 m) \
3997     { \
3998     emit_byte(OP1); \
3999     emit_byte(OP2); \
4000     emit_long(m); \
4001     }
4002     DEFINE_OP(fstl, 0xdd, 0x15);
4003     DEFINE_OP(fstpl, 0xdd, 0x1d);
4004     DEFINE_OP(fldl, 0xdd, 0x05);
4005     DEFINE_OP(fildl, 0xdb, 0x05);
4006     DEFINE_OP(fistl, 0xdb, 0x15);
4007     DEFINE_OP(flds, 0xd9, 0x05);
4008     DEFINE_OP(fsts, 0xd9, 0x15);
4009     DEFINE_OP(fstpt, 0xdb, 0x3d);
4010     DEFINE_OP(fldt, 0xdb, 0x2d);
4011     #endif
4012     #undef DEFINE_OP
4013    
4014 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4015     {
4016     make_tos(r);
4017 gbeauche 1.23 raw_fstl(m);
4018 gbeauche 1.1 }
4019     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4020    
4021     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4022     {
4023     make_tos(r);
4024 gbeauche 1.23 raw_fstpl(m);
4025 gbeauche 1.1 live.onstack[live.tos]=-1;
4026     live.tos--;
4027     live.spos[r]=-2;
4028     }
4029     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4030    
4031     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4032     {
4033 gbeauche 1.23 raw_fldl(m);
4034 gbeauche 1.1 tos_make(r);
4035     }
4036     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4037    
4038     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4039     {
4040 gbeauche 1.23 raw_fildl(m);
4041 gbeauche 1.1 tos_make(r);
4042     }
4043     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4044    
4045     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4046     {
4047     make_tos(r);
4048 gbeauche 1.23 raw_fistl(m);
4049 gbeauche 1.1 }
4050     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4051    
4052     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4053     {
4054 gbeauche 1.23 raw_flds(m);
4055 gbeauche 1.1 tos_make(r);
4056     }
4057     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4058    
4059     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4060     {
4061     make_tos(r);
4062 gbeauche 1.23 raw_fsts(m);
4063 gbeauche 1.1 }
4064     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4065    
4066     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4067     {
4068     int rs;
4069    
4070     /* Stupid x87 can't write a long double to mem without popping the
4071     stack! */
4072     usereg(r);
4073     rs=stackpos(r);
4074     emit_byte(0xd9); /* Get a copy to the top of stack */
4075     emit_byte(0xc0+rs);
4076    
4077 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4078 gbeauche 1.1 }
4079     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4080    
4081     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4082     {
4083     int rs;
4084    
4085     make_tos(r);
4086 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4087 gbeauche 1.1 live.onstack[live.tos]=-1;
4088     live.tos--;
4089     live.spos[r]=-2;
4090     }
4091     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4092    
4093     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4094     {
4095 gbeauche 1.23 raw_fldt(m);
4096 gbeauche 1.1 tos_make(r);
4097     }
4098     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4099    
4100     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4101     {
4102     emit_byte(0xd9);
4103     emit_byte(0xeb);
4104     tos_make(r);
4105     }
4106     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4107    
4108     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4109     {
4110     emit_byte(0xd9);
4111     emit_byte(0xec);
4112     tos_make(r);
4113     }
4114     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4115    
4116     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4117     {
4118     emit_byte(0xd9);
4119     emit_byte(0xea);
4120     tos_make(r);
4121     }
4122     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4123    
4124     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4125     {
4126     emit_byte(0xd9);
4127     emit_byte(0xed);
4128     tos_make(r);
4129     }
4130     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4131    
4132     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4133     {
4134     emit_byte(0xd9);
4135     emit_byte(0xe8);
4136     tos_make(r);
4137     }
4138     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4139    
4140     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4141     {
4142     emit_byte(0xd9);
4143     emit_byte(0xee);
4144     tos_make(r);
4145     }
4146     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4147    
4148     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4149     {
4150     int ds;
4151    
4152     usereg(s);
4153     ds=stackpos(s);
4154     if (ds==0 && live.spos[d]>=0) {
4155     /* source is on top of stack, and we already have the dest */
4156     int dd=stackpos(d);
4157     emit_byte(0xdd);
4158     emit_byte(0xd0+dd);
4159     }
4160     else {
4161     emit_byte(0xd9);
4162     emit_byte(0xc0+ds); /* duplicate source on tos */
4163     tos_make(d); /* store to destination, pop if necessary */
4164     }
4165     }
4166     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4167    
4168     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4169     {
4170     emit_byte(0xd9);
4171     emit_byte(0xa8+index);
4172     emit_long(base);
4173     }
4174     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4175    
4176    
4177     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4178     {
4179     int ds;
4180    
4181     if (d!=s) {
4182     usereg(s);
4183     ds=stackpos(s);
4184     emit_byte(0xd9);
4185     emit_byte(0xc0+ds); /* duplicate source */
4186     emit_byte(0xd9);
4187     emit_byte(0xfa); /* take square root */
4188     tos_make(d); /* store to destination */
4189     }
4190     else {
4191     make_tos(d);
4192     emit_byte(0xd9);
4193     emit_byte(0xfa); /* take square root */
4194     }
4195     }
4196     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4197    
4198     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4199     {
4200     int ds;
4201    
4202     if (d!=s) {
4203     usereg(s);
4204     ds=stackpos(s);
4205     emit_byte(0xd9);
4206     emit_byte(0xc0+ds); /* duplicate source */
4207     emit_byte(0xd9);
4208     emit_byte(0xe1); /* take fabs */
4209     tos_make(d); /* store to destination */
4210     }
4211     else {
4212     make_tos(d);
4213     emit_byte(0xd9);
4214     emit_byte(0xe1); /* take fabs */
4215     }
4216     }
4217     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4218    
4219     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4220     {
4221     int ds;
4222    
4223     if (d!=s) {
4224     usereg(s);
4225     ds=stackpos(s);
4226     emit_byte(0xd9);
4227     emit_byte(0xc0+ds); /* duplicate source */
4228     emit_byte(0xd9);
4229     emit_byte(0xfc); /* take frndint */
4230     tos_make(d); /* store to destination */
4231     }
4232     else {
4233     make_tos(d);
4234     emit_byte(0xd9);
4235     emit_byte(0xfc); /* take frndint */
4236     }
4237     }
4238     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4239    
4240     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4241     {
4242     int ds;
4243    
4244     if (d!=s) {
4245     usereg(s);
4246     ds=stackpos(s);
4247     emit_byte(0xd9);
4248     emit_byte(0xc0+ds); /* duplicate source */
4249     emit_byte(0xd9);
4250     emit_byte(0xff); /* take cos */
4251     tos_make(d); /* store to destination */
4252     }
4253     else {
4254     make_tos(d);
4255     emit_byte(0xd9);
4256     emit_byte(0xff); /* take cos */
4257     }
4258     }
4259     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4260    
4261     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4262     {
4263     int ds;
4264    
4265     if (d!=s) {
4266     usereg(s);
4267     ds=stackpos(s);
4268     emit_byte(0xd9);
4269     emit_byte(0xc0+ds); /* duplicate source */
4270     emit_byte(0xd9);
4271     emit_byte(0xfe); /* take sin */
4272     tos_make(d); /* store to destination */
4273     }
4274     else {
4275     make_tos(d);
4276     emit_byte(0xd9);
4277     emit_byte(0xfe); /* take sin */
4278     }
4279     }
4280     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4281    
4282     double one=1;
4283     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4284     {
4285     int ds;
4286    
4287     usereg(s);
4288     ds=stackpos(s);
4289     emit_byte(0xd9);
4290     emit_byte(0xc0+ds); /* duplicate source */
4291    
4292     emit_byte(0xd9);
4293     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4294     emit_byte(0xd9);
4295     emit_byte(0xfc); /* rndint */
4296     emit_byte(0xd9);
4297     emit_byte(0xc9); /* swap top two elements */
4298     emit_byte(0xd8);
4299     emit_byte(0xe1); /* subtract rounded from original */
4300     emit_byte(0xd9);
4301     emit_byte(0xf0); /* f2xm1 */
4302     emit_byte(0xdc);
4303     emit_byte(0x05);
4304 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4305 gbeauche 1.1 emit_byte(0xd9);
4306     emit_byte(0xfd); /* and scale it */
4307     emit_byte(0xdd);
4308     emit_byte(0xd9); /* take he rounded value off */
4309     tos_make(d); /* store to destination */
4310     }
4311     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4312    
4313     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4314     {
4315     int ds;
4316    
4317     usereg(s);
4318     ds=stackpos(s);
4319     emit_byte(0xd9);
4320     emit_byte(0xc0+ds); /* duplicate source */
4321     emit_byte(0xd9);
4322     emit_byte(0xea); /* fldl2e */
4323     emit_byte(0xde);
4324     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4325    
4326     emit_byte(0xd9);
4327     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4328     emit_byte(0xd9);
4329     emit_byte(0xfc); /* rndint */
4330     emit_byte(0xd9);
4331     emit_byte(0xc9); /* swap top two elements */
4332     emit_byte(0xd8);
4333     emit_byte(0xe1); /* subtract rounded from original */
4334     emit_byte(0xd9);
4335     emit_byte(0xf0); /* f2xm1 */
4336     emit_byte(0xdc);
4337     emit_byte(0x05);
4338 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4339 gbeauche 1.1 emit_byte(0xd9);
4340     emit_byte(0xfd); /* and scale it */
4341     emit_byte(0xdd);
4342     emit_byte(0xd9); /* take he rounded value off */
4343     tos_make(d); /* store to destination */
4344     }
4345     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4346    
4347     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4348     {
4349     int ds;
4350    
4351     usereg(s);
4352     ds=stackpos(s);
4353     emit_byte(0xd9);
4354     emit_byte(0xc0+ds); /* duplicate source */
4355     emit_byte(0xd9);
4356     emit_byte(0xe8); /* push '1' */
4357     emit_byte(0xd9);
4358     emit_byte(0xc9); /* swap top two */
4359     emit_byte(0xd9);
4360     emit_byte(0xf1); /* take 1*log2(x) */
4361     tos_make(d); /* store to destination */
4362     }
4363     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4364    
4365    
4366     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4367     {
4368     int ds;
4369    
4370     if (d!=s) {
4371     usereg(s);
4372     ds=stackpos(s);
4373     emit_byte(0xd9);
4374     emit_byte(0xc0+ds); /* duplicate source */
4375     emit_byte(0xd9);
4376     emit_byte(0xe0); /* take fchs */
4377     tos_make(d); /* store to destination */
4378     }
4379     else {
4380     make_tos(d);
4381     emit_byte(0xd9);
4382     emit_byte(0xe0); /* take fchs */
4383     }
4384     }
4385     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4386    
4387     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4388     {
4389     int ds;
4390    
4391     usereg(s);
4392     usereg(d);
4393    
4394     if (live.spos[s]==live.tos) {
4395     /* Source is on top of stack */
4396     ds=stackpos(d);
4397     emit_byte(0xdc);
4398     emit_byte(0xc0+ds); /* add source to dest*/
4399     }
4400     else {
4401     make_tos(d);
4402     ds=stackpos(s);
4403    
4404     emit_byte(0xd8);
4405     emit_byte(0xc0+ds); /* add source to dest*/
4406     }
4407     }
4408     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4409    
4410     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4411     {
4412     int ds;
4413    
4414     usereg(s);
4415     usereg(d);
4416    
4417     if (live.spos[s]==live.tos) {
4418     /* Source is on top of stack */
4419     ds=stackpos(d);
4420     emit_byte(0xdc);
4421     emit_byte(0xe8+ds); /* sub source from dest*/
4422     }
4423     else {
4424     make_tos(d);
4425     ds=stackpos(s);
4426    
4427     emit_byte(0xd8);
4428     emit_byte(0xe0+ds); /* sub src from dest */
4429     }
4430     }
4431     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4432    
4433     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4434     {
4435     int ds;
4436    
4437     usereg(s);
4438     usereg(d);
4439    
4440     make_tos(d);
4441     ds=stackpos(s);
4442    
4443     emit_byte(0xdd);
4444     emit_byte(0xe0+ds); /* cmp dest with source*/
4445     }
4446     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4447    
4448     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4449     {
4450     int ds;
4451    
4452     usereg(s);
4453     usereg(d);
4454    
4455     if (live.spos[s]==live.tos) {
4456     /* Source is on top of stack */
4457     ds=stackpos(d);
4458     emit_byte(0xdc);
4459     emit_byte(0xc8+ds); /* mul dest by source*/
4460     }
4461     else {
4462     make_tos(d);
4463     ds=stackpos(s);
4464    
4465     emit_byte(0xd8);
4466     emit_byte(0xc8+ds); /* mul dest by source*/
4467     }
4468     }
4469     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4470    
4471     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4472     {
4473     int ds;
4474    
4475     usereg(s);
4476     usereg(d);
4477    
4478     if (live.spos[s]==live.tos) {
4479     /* Source is on top of stack */
4480     ds=stackpos(d);
4481     emit_byte(0xdc);
4482     emit_byte(0xf8+ds); /* div dest by source */
4483     }
4484     else {
4485     make_tos(d);
4486     ds=stackpos(s);
4487    
4488     emit_byte(0xd8);
4489     emit_byte(0xf0+ds); /* div dest by source*/
4490     }
4491     }
4492     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4493    
4494     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4495     {
4496     int ds;
4497    
4498     usereg(s);
4499     usereg(d);
4500    
4501     make_tos2(d,s);
4502     ds=stackpos(s);
4503    
4504     if (ds!=1) {
4505     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4506     abort();
4507     }
4508     emit_byte(0xd9);
4509     emit_byte(0xf8); /* take rem from dest by source */
4510     }
4511     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4512    
4513     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4514     {
4515     int ds;
4516    
4517     usereg(s);
4518     usereg(d);
4519    
4520     make_tos2(d,s);
4521     ds=stackpos(s);
4522    
4523     if (ds!=1) {
4524     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4525     abort();
4526     }
4527     emit_byte(0xd9);
4528     emit_byte(0xf5); /* take rem1 from dest by source */
4529     }
4530     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4531    
4532    
4533     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4534     {
4535     make_tos(r);
4536     emit_byte(0xd9); /* ftst */
4537     emit_byte(0xe4);
4538     }
4539     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4540    
4541     /* %eax register is clobbered if target processor doesn't support fucomi */
4542     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4543     #define FFLAG_NREG EAX_INDEX
4544    
4545     static __inline__ void raw_fflags_into_flags(int r)
4546     {
4547     int p;
4548    
4549     usereg(r);
4550     p=stackpos(r);
4551    
4552     emit_byte(0xd9);
4553     emit_byte(0xee); /* Push 0 */
4554     emit_byte(0xd9);
4555     emit_byte(0xc9+p); /* swap top two around */
4556     if (have_cmov) {
4557     // gb-- fucomi is for P6 cores only, not K6-2 then...
4558     emit_byte(0xdb);
4559     emit_byte(0xe9+p); /* fucomi them */
4560     }
4561     else {
4562     emit_byte(0xdd);
4563     emit_byte(0xe1+p); /* fucom them */
4564     emit_byte(0x9b);
4565     emit_byte(0xdf);
4566     emit_byte(0xe0); /* fstsw ax */
4567     raw_sahf(0); /* sahf */
4568     }
4569     emit_byte(0xdd);
4570     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4571     }