ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.34
Committed: 2006-02-26T18:49:55Z (18 years, 7 months ago) by gbeauche
Branch: MAIN
CVS Tags: nigel-build-19
Changes since 1.33: +17 -7 lines
Log Message:
fix FETOX & FTWOTOX translations for x86_64

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.26 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 gbeauche 1.26 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.33 /* XXX this has to match X86_Reg8H_Base + 4 */
56     #define AH_INDEX (0x10+4+EAX_INDEX)
57     #define CH_INDEX (0x10+4+ECX_INDEX)
58     #define DH_INDEX (0x10+4+EDX_INDEX)
59     #define BH_INDEX (0x10+4+EBX_INDEX)
60 gbeauche 1.1
61     /* The register in which subroutines return an integer return value */
62 gbeauche 1.20 #define REG_RESULT EAX_INDEX
63 gbeauche 1.1
64     /* The registers subroutines take their first and second argument in */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     /* Handle the _fastcall parameters of ECX and EDX */
67 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
68     #define REG_PAR2 EDX_INDEX
69     #elif defined(__x86_64__)
70     #define REG_PAR1 EDI_INDEX
71     #define REG_PAR2 ESI_INDEX
72 gbeauche 1.1 #else
73 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
74     #define REG_PAR2 EDX_INDEX
75 gbeauche 1.1 #endif
76    
77 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
78 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
79 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
80 gbeauche 1.1 #else
81 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
82 gbeauche 1.1 #endif
83    
84 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
85 gbeauche 1.1 -1 if any reg will do */
86 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
87     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
88 gbeauche 1.1
89 gbeauche 1.31 #define STACK_ALIGN 16
90     #define STACK_OFFSET sizeof(void *)
91    
92 gbeauche 1.1 uae_s8 always_used[]={4,-1};
93 gbeauche 1.20 #if defined(__x86_64__)
94     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
95     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
96     #else
97 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
98     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
99 gbeauche 1.20 #endif
100 gbeauche 1.1
101 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
102     /* Make sure interpretive core does not use cpuopti */
103     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
104 gbeauche 1.20 #error FIXME: code not ready
105 gbeauche 1.17 #else
106 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
107     by the caller */
108 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
109 gbeauche 1.17 #endif
110 gbeauche 1.1
111     /* This *should* be the same as call_saved. But:
112     - We might not really know which registers are saved, and which aren't,
113     so we need to preserve some, but don't want to rely on everyone else
114     also saving those registers
115     - Special registers (such like the stack pointer) should not be "preserved"
116     by pushing, even though they are "saved" across function calls
117     */
118 gbeauche 1.21 #if defined(__x86_64__)
119 gbeauche 1.32 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
120 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
121     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
122 gbeauche 1.21 #else
123 gbeauche 1.32 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
124     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
125 gbeauche 1.21 #endif
126 gbeauche 1.1
127     /* Whether classes of instructions do or don't clobber the native flags */
128     #define CLOBBER_MOV
129     #define CLOBBER_LEA
130     #define CLOBBER_CMOV
131     #define CLOBBER_POP
132     #define CLOBBER_PUSH
133     #define CLOBBER_SUB clobber_flags()
134     #define CLOBBER_SBB clobber_flags()
135     #define CLOBBER_CMP clobber_flags()
136     #define CLOBBER_ADD clobber_flags()
137     #define CLOBBER_ADC clobber_flags()
138     #define CLOBBER_AND clobber_flags()
139     #define CLOBBER_OR clobber_flags()
140     #define CLOBBER_XOR clobber_flags()
141    
142     #define CLOBBER_ROL clobber_flags()
143     #define CLOBBER_ROR clobber_flags()
144     #define CLOBBER_SHLL clobber_flags()
145     #define CLOBBER_SHRL clobber_flags()
146     #define CLOBBER_SHRA clobber_flags()
147     #define CLOBBER_TEST clobber_flags()
148     #define CLOBBER_CL16
149     #define CLOBBER_CL8
150 gbeauche 1.20 #define CLOBBER_SE32
151 gbeauche 1.1 #define CLOBBER_SE16
152     #define CLOBBER_SE8
153 gbeauche 1.20 #define CLOBBER_ZE32
154 gbeauche 1.1 #define CLOBBER_ZE16
155     #define CLOBBER_ZE8
156     #define CLOBBER_SW16 clobber_flags()
157     #define CLOBBER_SW32
158     #define CLOBBER_SETCC
159     #define CLOBBER_MUL clobber_flags()
160     #define CLOBBER_BT clobber_flags()
161     #define CLOBBER_BSF clobber_flags()
162    
163 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
164 gbeauche 1.20 #if defined(__x86_64__)
165     #define USE_NEW_RTASM 1
166     #endif
167    
168     #if USE_NEW_RTASM
169 gbeauche 1.13
170     #if defined(__x86_64__)
171     #define X86_TARGET_64BIT 1
172     #endif
173     #define X86_FLAT_REGISTERS 0
174 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
175     #define X86_OPTIMIZE_ROTSHI 1
176 gbeauche 1.13 #include "codegen_x86.h"
177    
178     #define x86_emit_byte(B) emit_byte(B)
179     #define x86_emit_word(W) emit_word(W)
180     #define x86_emit_long(L) emit_long(L)
181 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
182 gbeauche 1.13 #define x86_get_target() get_target()
183     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
184    
185     static void jit_fail(const char *msg, const char *file, int line, const char *function)
186     {
187     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
188     function, file, line, msg);
189     abort();
190     }
191    
192     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
193     {
194 gbeauche 1.20 #if defined(__x86_64__)
195     PUSHQr(r);
196     #else
197 gbeauche 1.13 PUSHLr(r);
198 gbeauche 1.20 #endif
199 gbeauche 1.13 }
200     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
201    
202     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
203     {
204 gbeauche 1.20 #if defined(__x86_64__)
205     POPQr(r);
206     #else
207 gbeauche 1.13 POPLr(r);
208 gbeauche 1.20 #endif
209 gbeauche 1.13 }
210     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
211    
212 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
213     {
214     #if defined(__x86_64__)
215     POPQm(d, X86_NOREG, X86_NOREG, 1);
216     #else
217     POPLm(d, X86_NOREG, X86_NOREG, 1);
218     #endif
219     }
220     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
221    
222 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
223     {
224     BTLir(i, r);
225     }
226     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
227    
228     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
229     {
230     BTLrr(b, r);
231     }
232     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
233    
234     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
235     {
236     BTCLir(i, r);
237     }
238     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
239    
240     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
241     {
242     BTCLrr(b, r);
243     }
244     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
245    
246     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
247     {
248     BTRLir(i, r);
249     }
250     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
251    
252     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
253     {
254     BTRLrr(b, r);
255     }
256     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
257    
258     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
259     {
260     BTSLir(i, r);
261     }
262     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
263    
264     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
265     {
266     BTSLrr(b, r);
267     }
268     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
269    
270     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
271     {
272     SUBWir(i, d);
273     }
274     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
275    
276     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
277     {
278     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
279     }
280     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
281    
282     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
283     {
284     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
285     }
286     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
287    
288     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
289     {
290     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
291     }
292     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
293    
294     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
295     {
296     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
297     }
298     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
299    
300     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
301     {
302     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
303     }
304     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
305    
306     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
307     {
308     ROLBir(i, r);
309     }
310     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
311    
312     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
313     {
314     ROLWir(i, r);
315     }
316     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
317    
318     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
319     {
320     ROLLir(i, r);
321     }
322     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
323    
324     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
325     {
326     ROLLrr(r, d);
327     }
328     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
329    
330     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
331     {
332     ROLWrr(r, d);
333     }
334     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
335    
336     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
337     {
338     ROLBrr(r, d);
339     }
340     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
341    
342     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
343     {
344     SHLLrr(r, d);
345     }
346     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
347    
348     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
349     {
350     SHLWrr(r, d);
351     }
352     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
353    
354     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
355     {
356     SHLBrr(r, d);
357     }
358     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
359    
360     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
361     {
362     RORBir(i, r);
363     }
364     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
365    
366     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
367     {
368     RORWir(i, r);
369     }
370     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
371    
372     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
373     {
374     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
375     }
376     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
377    
378     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
379     {
380     RORLir(i, r);
381     }
382     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
383    
384     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
385     {
386     RORLrr(r, d);
387     }
388     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
389    
390     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
391     {
392     RORWrr(r, d);
393     }
394     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
395    
396     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
397     {
398     RORBrr(r, d);
399     }
400     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
401    
402     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
403     {
404     SHRLrr(r, d);
405     }
406     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
407    
408     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
409     {
410     SHRWrr(r, d);
411     }
412     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
413    
414     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
415     {
416     SHRBrr(r, d);
417     }
418     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
419    
420     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
421     {
422 gbeauche 1.14 SARLrr(r, d);
423 gbeauche 1.13 }
424     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
425    
426     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
427     {
428 gbeauche 1.14 SARWrr(r, d);
429 gbeauche 1.13 }
430     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
431    
432     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
433     {
434 gbeauche 1.14 SARBrr(r, d);
435 gbeauche 1.13 }
436     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
437    
438     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
439     {
440     SHLLir(i, r);
441     }
442     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
443    
444     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
445     {
446     SHLWir(i, r);
447     }
448     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
449    
450     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
451     {
452     SHLBir(i, r);
453     }
454     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
455    
456     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
457     {
458     SHRLir(i, r);
459     }
460     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
461    
462     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
463     {
464     SHRWir(i, r);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
469     {
470     SHRBir(i, r);
471     }
472     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
473    
474     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
475     {
476 gbeauche 1.14 SARLir(i, r);
477 gbeauche 1.13 }
478     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
479    
480     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
481     {
482 gbeauche 1.14 SARWir(i, r);
483 gbeauche 1.13 }
484     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
485    
486     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
487     {
488 gbeauche 1.14 SARBir(i, r);
489 gbeauche 1.13 }
490     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
491    
492     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
493     {
494     SAHF();
495     }
496     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
497    
498     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
499     {
500     CPUID();
501     }
502     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
503    
504     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
505     {
506     LAHF();
507     }
508     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
509    
510     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
511     {
512     SETCCir(cc, d);
513     }
514     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
515    
516     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
517     {
518     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
519     }
520     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
521    
522     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
523     {
524 gbeauche 1.15 if (have_cmov)
525     CMOVLrr(cc, s, d);
526     else { /* replacement using branch and mov */
527     #if defined(__x86_64__)
528     write_log("x86-64 implementations are bound to have CMOV!\n");
529     abort();
530     #endif
531     JCCSii(cc^1, 2);
532     MOVLrr(s, d);
533     }
534 gbeauche 1.13 }
535     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
536    
537     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
538     {
539     BSFLrr(s, d);
540     }
541     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
542    
543 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
544     {
545     MOVSLQrr(s, d);
546     }
547     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
548    
549 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
550     {
551     MOVSWLrr(s, d);
552     }
553     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
554    
555     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
556     {
557     MOVSBLrr(s, d);
558     }
559     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
560    
561     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
562     {
563     MOVZWLrr(s, d);
564     }
565     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
566    
567     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
568     {
569     MOVZBLrr(s, d);
570     }
571     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
572    
573     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
574     {
575 gbeauche 1.14 IMULLrr(s, d);
576 gbeauche 1.13 }
577     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
578    
579     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
580     {
581 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
582     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
583 gbeauche 1.13 abort();
584 gbeauche 1.14 }
585     IMULLr(s);
586 gbeauche 1.13 }
587     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
588    
589     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
590     {
591 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
592     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
593 gbeauche 1.13 abort();
594 gbeauche 1.14 }
595     MULLr(s);
596 gbeauche 1.13 }
597     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
598    
599     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
600     {
601 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
602 gbeauche 1.13 }
603     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
604    
605     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
606     {
607     MOVBrr(s, d);
608     }
609     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
610    
611     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
612     {
613     MOVWrr(s, d);
614     }
615     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
616    
617     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
618     {
619     MOVLmr(0, baser, index, factor, d);
620     }
621     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
622    
623     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
624     {
625     MOVWmr(0, baser, index, factor, d);
626     }
627     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
628    
629     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
630     {
631     MOVBmr(0, baser, index, factor, d);
632     }
633     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
634    
635     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
636     {
637     MOVLrm(s, 0, baser, index, factor);
638     }
639     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
640    
641     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
642     {
643     MOVWrm(s, 0, baser, index, factor);
644     }
645     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
646    
647     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
648     {
649     MOVBrm(s, 0, baser, index, factor);
650     }
651     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
652    
653     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
654     {
655     MOVLrm(s, base, baser, index, factor);
656     }
657     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
658    
659     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
660     {
661     MOVWrm(s, base, baser, index, factor);
662     }
663     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
664    
665     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
666     {
667     MOVBrm(s, base, baser, index, factor);
668     }
669     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
670    
671     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
672     {
673     MOVLmr(base, baser, index, factor, d);
674     }
675     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
676    
677     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
678     {
679     MOVWmr(base, baser, index, factor, d);
680     }
681     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
682    
683     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
684     {
685     MOVBmr(base, baser, index, factor, d);
686     }
687     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
688    
689     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
690     {
691     MOVLmr(base, X86_NOREG, index, factor, d);
692     }
693     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
694    
695     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
696     {
697 gbeauche 1.15 if (have_cmov)
698     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
699     else { /* replacement using branch and mov */
700     #if defined(__x86_64__)
701     write_log("x86-64 implementations are bound to have CMOV!\n");
702     abort();
703     #endif
704     JCCSii(cond^1, 7);
705     MOVLmr(base, X86_NOREG, index, factor, d);
706     }
707 gbeauche 1.13 }
708     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
709    
710     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
711     {
712 gbeauche 1.15 if (have_cmov)
713     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
714     else { /* replacement using branch and mov */
715     #if defined(__x86_64__)
716     write_log("x86-64 implementations are bound to have CMOV!\n");
717     abort();
718     #endif
719     JCCSii(cond^1, 6);
720     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
721     }
722 gbeauche 1.13 }
723     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
724    
725     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
726     {
727     MOVLmr(offset, s, X86_NOREG, 1, d);
728     }
729     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
730    
731     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
732     {
733     MOVWmr(offset, s, X86_NOREG, 1, d);
734     }
735     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
736    
737     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
738     {
739     MOVBmr(offset, s, X86_NOREG, 1, d);
740     }
741     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
742    
743     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
744     {
745     MOVLmr(offset, s, X86_NOREG, 1, d);
746     }
747     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
748    
749     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
750     {
751     MOVWmr(offset, s, X86_NOREG, 1, d);
752     }
753     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
754    
755     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
756     {
757     MOVBmr(offset, s, X86_NOREG, 1, d);
758     }
759     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
760    
761     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
762     {
763     MOVLim(i, offset, d, X86_NOREG, 1);
764     }
765     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
766    
767     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
768     {
769     MOVWim(i, offset, d, X86_NOREG, 1);
770     }
771     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
772    
773     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
774     {
775     MOVBim(i, offset, d, X86_NOREG, 1);
776     }
777     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
778    
779     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
780     {
781     MOVLrm(s, offset, d, X86_NOREG, 1);
782     }
783     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
784    
785     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
786     {
787     MOVWrm(s, offset, d, X86_NOREG, 1);
788     }
789     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
790    
791     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
792     {
793     MOVBrm(s, offset, d, X86_NOREG, 1);
794     }
795     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
796    
797     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
798     {
799     LEALmr(offset, s, X86_NOREG, 1, d);
800     }
801     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
802    
803     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
804     {
805     LEALmr(offset, s, index, factor, d);
806     }
807     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
808    
809     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
810     {
811     LEALmr(0, s, index, factor, d);
812     }
813     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
814    
815     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
816     {
817     MOVLrm(s, offset, d, X86_NOREG, 1);
818     }
819     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
820    
821     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
822     {
823     MOVWrm(s, offset, d, X86_NOREG, 1);
824     }
825     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
826    
827     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
828     {
829     MOVBrm(s, offset, d, X86_NOREG, 1);
830     }
831     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
832    
833     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
834     {
835     BSWAPLr(r);
836     }
837     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
838    
839     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
840     {
841     ROLWir(8, r);
842     }
843     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
844    
845     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
846     {
847     MOVLrr(s, d);
848     }
849     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
850    
851     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
852     {
853     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
854     }
855     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
856    
857     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
858     {
859     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
860     }
861     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
862    
863     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
864     {
865     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
866     }
867     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
868    
869     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
870     {
871     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
872     }
873     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
874    
875     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
876     {
877     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
878     }
879     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
880    
881     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
882     {
883     MOVLir(s, d);
884     }
885     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
886    
887     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
888     {
889     MOVWir(s, d);
890     }
891     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
892    
893     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
894     {
895     MOVBir(s, d);
896     }
897     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
898    
899     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
900     {
901     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
902     }
903     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
904    
905     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
906     {
907     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
908     }
909     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
910    
911     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
912     {
913     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
914     }
915     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
916    
917     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
918     {
919     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
920     }
921     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
922    
923     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
924     {
925     TESTLir(i, d);
926     }
927     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
928    
929     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
930     {
931     TESTLrr(s, d);
932     }
933     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
934    
935     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
936     {
937     TESTWrr(s, d);
938     }
939     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
940    
941     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
942     {
943     TESTBrr(s, d);
944     }
945     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
946    
947 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
948     {
949     XORLir(i, d);
950     }
951     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
952    
953 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
954     {
955     ANDLir(i, d);
956     }
957     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
958    
959     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
960     {
961     ANDWir(i, d);
962     }
963     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
964    
965     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
966     {
967     ANDLrr(s, d);
968     }
969     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
970    
971     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
972     {
973     ANDWrr(s, d);
974     }
975     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
976    
977     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
978     {
979     ANDBrr(s, d);
980     }
981     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
982    
983     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
984     {
985     ORLir(i, d);
986     }
987     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
988    
989     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
990     {
991     ORLrr(s, d);
992     }
993     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
994    
995     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
996     {
997     ORWrr(s, d);
998     }
999     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1000    
1001     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1002     {
1003     ORBrr(s, d);
1004     }
1005     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1006    
1007     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1008     {
1009     ADCLrr(s, d);
1010     }
1011     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1012    
1013     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1014     {
1015     ADCWrr(s, d);
1016     }
1017     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1018    
1019     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1020     {
1021     ADCBrr(s, d);
1022     }
1023     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1024    
1025     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1026     {
1027     ADDLrr(s, d);
1028     }
1029     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1030    
1031     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1032     {
1033     ADDWrr(s, d);
1034     }
1035     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1036    
1037     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1038     {
1039     ADDBrr(s, d);
1040     }
1041     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1042    
1043     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1044     {
1045     SUBLir(i, d);
1046     }
1047     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1048    
1049     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1050     {
1051     SUBBir(i, d);
1052     }
1053     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1054    
1055     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1056     {
1057     ADDLir(i, d);
1058     }
1059     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1060    
1061     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1062     {
1063     ADDWir(i, d);
1064     }
1065     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1066    
1067     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1068     {
1069     ADDBir(i, d);
1070     }
1071     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1072    
1073     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1074     {
1075     SBBLrr(s, d);
1076     }
1077     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1078    
1079     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1080     {
1081     SBBWrr(s, d);
1082     }
1083     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1084    
1085     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1086     {
1087     SBBBrr(s, d);
1088     }
1089     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1090    
1091     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1092     {
1093     SUBLrr(s, d);
1094     }
1095     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1096    
1097     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1098     {
1099     SUBWrr(s, d);
1100     }
1101     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1102    
1103     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1104     {
1105     SUBBrr(s, d);
1106     }
1107     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1108    
1109     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1110     {
1111     CMPLrr(s, d);
1112     }
1113     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1114    
1115     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1116     {
1117     CMPLir(i, r);
1118     }
1119     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1120    
1121     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1122     {
1123     CMPWrr(s, d);
1124     }
1125     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1126    
1127     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1128     {
1129     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1130     }
1131     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1132    
1133     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1134     {
1135     CMPBir(i, d);
1136     }
1137     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1138    
1139     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1140     {
1141     CMPBrr(s, d);
1142     }
1143     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1144    
1145     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1146     {
1147     CMPLmr(offset, X86_NOREG, index, factor, d);
1148     }
1149     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1150    
1151     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1152     {
1153     XORLrr(s, d);
1154     }
1155     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1156    
1157     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1158     {
1159     XORWrr(s, d);
1160     }
1161     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1162    
1163     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1164     {
1165     XORBrr(s, d);
1166     }
1167     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1168    
1169     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1170     {
1171     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1172     }
1173     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1174    
1175     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1176     {
1177     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1178     }
1179     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1180    
1181     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1182     {
1183     XCHGLrr(r2, r1);
1184     }
1185     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1186    
1187     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1188     {
1189 gbeauche 1.18 PUSHF();
1190 gbeauche 1.13 }
1191     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1192    
1193     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1194     {
1195 gbeauche 1.18 POPF();
1196 gbeauche 1.13 }
1197     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1198    
1199 gbeauche 1.34 /* Generate floating-point instructions */
1200     static inline void x86_fadd_m(MEMR s)
1201     {
1202     FADDLm(s,X86_NOREG,X86_NOREG,1);
1203     }
1204    
1205 gbeauche 1.13 #else
1206    
1207 gbeauche 1.2 const bool optimize_accum = true;
1208 gbeauche 1.1 const bool optimize_imm8 = true;
1209     const bool optimize_shift_once = true;
1210    
1211     /*************************************************************************
1212     * Actual encoding of the instructions on the target CPU *
1213     *************************************************************************/
1214    
1215 gbeauche 1.2 static __inline__ int isaccum(int r)
1216     {
1217     return (r == EAX_INDEX);
1218     }
1219    
1220 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1221     {
1222     return (x>=-128 && x<=127);
1223     }
1224    
1225     static __inline__ int isword(uae_s32 x)
1226     {
1227     return (x>=-32768 && x<=32767);
1228     }
1229    
1230     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1231     {
1232     emit_byte(0x50+r);
1233     }
1234     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1235    
1236     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1237     {
1238     emit_byte(0x58+r);
1239     }
1240     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1241    
1242 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1243     {
1244     emit_byte(0x8f);
1245     emit_byte(0x05);
1246     emit_long(d);
1247     }
1248     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1249    
1250 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1251     {
1252     emit_byte(0x0f);
1253     emit_byte(0xba);
1254     emit_byte(0xe0+r);
1255     emit_byte(i);
1256     }
1257     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1258    
1259     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1260     {
1261     emit_byte(0x0f);
1262     emit_byte(0xa3);
1263     emit_byte(0xc0+8*b+r);
1264     }
1265     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1266    
1267     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1268     {
1269     emit_byte(0x0f);
1270     emit_byte(0xba);
1271     emit_byte(0xf8+r);
1272     emit_byte(i);
1273     }
1274     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1275    
1276     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1277     {
1278     emit_byte(0x0f);
1279     emit_byte(0xbb);
1280     emit_byte(0xc0+8*b+r);
1281     }
1282     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1283    
1284    
1285     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1286     {
1287     emit_byte(0x0f);
1288     emit_byte(0xba);
1289     emit_byte(0xf0+r);
1290     emit_byte(i);
1291     }
1292     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1293    
1294     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1295     {
1296     emit_byte(0x0f);
1297     emit_byte(0xb3);
1298     emit_byte(0xc0+8*b+r);
1299     }
1300     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1301    
1302     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1303     {
1304     emit_byte(0x0f);
1305     emit_byte(0xba);
1306     emit_byte(0xe8+r);
1307     emit_byte(i);
1308     }
1309     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1310    
1311     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1312     {
1313     emit_byte(0x0f);
1314     emit_byte(0xab);
1315     emit_byte(0xc0+8*b+r);
1316     }
1317     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1318    
1319     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1320     {
1321     emit_byte(0x66);
1322     if (isbyte(i)) {
1323     emit_byte(0x83);
1324     emit_byte(0xe8+d);
1325     emit_byte(i);
1326     }
1327     else {
1328 gbeauche 1.2 if (optimize_accum && isaccum(d))
1329     emit_byte(0x2d);
1330     else {
1331 gbeauche 1.1 emit_byte(0x81);
1332     emit_byte(0xe8+d);
1333 gbeauche 1.2 }
1334 gbeauche 1.1 emit_word(i);
1335     }
1336     }
1337     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1338    
1339    
1340     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1341     {
1342     emit_byte(0x8b);
1343     emit_byte(0x05+8*d);
1344     emit_long(s);
1345     }
1346     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1347    
1348     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1349     {
1350     emit_byte(0xc7);
1351     emit_byte(0x05);
1352     emit_long(d);
1353     emit_long(s);
1354     }
1355     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1356    
1357     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1358     {
1359     emit_byte(0x66);
1360     emit_byte(0xc7);
1361     emit_byte(0x05);
1362     emit_long(d);
1363     emit_word(s);
1364     }
1365     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1366    
1367     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1368     {
1369     emit_byte(0xc6);
1370     emit_byte(0x05);
1371     emit_long(d);
1372     emit_byte(s);
1373     }
1374     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1375    
1376     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1377     {
1378     if (optimize_shift_once && (i == 1)) {
1379     emit_byte(0xd0);
1380     emit_byte(0x05);
1381     emit_long(d);
1382     }
1383     else {
1384     emit_byte(0xc0);
1385     emit_byte(0x05);
1386     emit_long(d);
1387     emit_byte(i);
1388     }
1389     }
1390     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1391    
1392     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1393     {
1394     if (optimize_shift_once && (i == 1)) {
1395     emit_byte(0xd0);
1396     emit_byte(0xc0+r);
1397     }
1398     else {
1399     emit_byte(0xc0);
1400     emit_byte(0xc0+r);
1401     emit_byte(i);
1402     }
1403     }
1404     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1405    
1406     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1407     {
1408     emit_byte(0x66);
1409     emit_byte(0xc1);
1410     emit_byte(0xc0+r);
1411     emit_byte(i);
1412     }
1413     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1414    
1415     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1416     {
1417     if (optimize_shift_once && (i == 1)) {
1418     emit_byte(0xd1);
1419     emit_byte(0xc0+r);
1420     }
1421     else {
1422     emit_byte(0xc1);
1423     emit_byte(0xc0+r);
1424     emit_byte(i);
1425     }
1426     }
1427     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1428    
1429     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1430     {
1431     emit_byte(0xd3);
1432     emit_byte(0xc0+d);
1433     }
1434     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1435    
1436     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1437     {
1438     emit_byte(0x66);
1439     emit_byte(0xd3);
1440     emit_byte(0xc0+d);
1441     }
1442     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1443    
1444     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1445     {
1446     emit_byte(0xd2);
1447     emit_byte(0xc0+d);
1448     }
1449     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1450    
1451     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1452     {
1453     emit_byte(0xd3);
1454     emit_byte(0xe0+d);
1455     }
1456     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1457    
1458     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1459     {
1460     emit_byte(0x66);
1461     emit_byte(0xd3);
1462     emit_byte(0xe0+d);
1463     }
1464     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1465    
1466     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1467     {
1468     emit_byte(0xd2);
1469     emit_byte(0xe0+d);
1470     }
1471     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1472    
1473     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1474     {
1475     if (optimize_shift_once && (i == 1)) {
1476     emit_byte(0xd0);
1477     emit_byte(0xc8+r);
1478     }
1479     else {
1480     emit_byte(0xc0);
1481     emit_byte(0xc8+r);
1482     emit_byte(i);
1483     }
1484     }
1485     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1486    
1487     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1488     {
1489     emit_byte(0x66);
1490     emit_byte(0xc1);
1491     emit_byte(0xc8+r);
1492     emit_byte(i);
1493     }
1494     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1495    
1496     // gb-- used for making an fpcr value in compemu_fpp.cpp
1497     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1498     {
1499     emit_byte(0x0b);
1500     emit_byte(0x05+8*d);
1501     emit_long(s);
1502     }
1503     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1504    
1505     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1506     {
1507     if (optimize_shift_once && (i == 1)) {
1508     emit_byte(0xd1);
1509     emit_byte(0xc8+r);
1510     }
1511     else {
1512     emit_byte(0xc1);
1513     emit_byte(0xc8+r);
1514     emit_byte(i);
1515     }
1516     }
1517     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1518    
1519     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1520     {
1521     emit_byte(0xd3);
1522     emit_byte(0xc8+d);
1523     }
1524     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1525    
1526     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1527     {
1528     emit_byte(0x66);
1529     emit_byte(0xd3);
1530     emit_byte(0xc8+d);
1531     }
1532     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1533    
1534     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1535     {
1536     emit_byte(0xd2);
1537     emit_byte(0xc8+d);
1538     }
1539     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1540    
1541     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1542     {
1543     emit_byte(0xd3);
1544     emit_byte(0xe8+d);
1545     }
1546     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1547    
1548     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1549     {
1550     emit_byte(0x66);
1551     emit_byte(0xd3);
1552     emit_byte(0xe8+d);
1553     }
1554     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1555    
1556     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1557     {
1558     emit_byte(0xd2);
1559     emit_byte(0xe8+d);
1560     }
1561     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1562    
1563     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1564     {
1565     emit_byte(0xd3);
1566     emit_byte(0xf8+d);
1567     }
1568     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1569    
1570     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1571     {
1572     emit_byte(0x66);
1573     emit_byte(0xd3);
1574     emit_byte(0xf8+d);
1575     }
1576     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1577    
1578     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1579     {
1580     emit_byte(0xd2);
1581     emit_byte(0xf8+d);
1582     }
1583     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1584    
1585     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1586     {
1587     if (optimize_shift_once && (i == 1)) {
1588     emit_byte(0xd1);
1589     emit_byte(0xe0+r);
1590     }
1591     else {
1592     emit_byte(0xc1);
1593     emit_byte(0xe0+r);
1594     emit_byte(i);
1595     }
1596     }
1597     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1598    
1599     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1600     {
1601     emit_byte(0x66);
1602     emit_byte(0xc1);
1603     emit_byte(0xe0+r);
1604     emit_byte(i);
1605     }
1606     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1607    
1608     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1609     {
1610     if (optimize_shift_once && (i == 1)) {
1611     emit_byte(0xd0);
1612     emit_byte(0xe0+r);
1613     }
1614     else {
1615     emit_byte(0xc0);
1616     emit_byte(0xe0+r);
1617     emit_byte(i);
1618     }
1619     }
1620     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1621    
1622     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1623     {
1624     if (optimize_shift_once && (i == 1)) {
1625     emit_byte(0xd1);
1626     emit_byte(0xe8+r);
1627     }
1628     else {
1629     emit_byte(0xc1);
1630     emit_byte(0xe8+r);
1631     emit_byte(i);
1632     }
1633     }
1634     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1635    
1636     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1637     {
1638     emit_byte(0x66);
1639     emit_byte(0xc1);
1640     emit_byte(0xe8+r);
1641     emit_byte(i);
1642     }
1643     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1644    
1645     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1646     {
1647     if (optimize_shift_once && (i == 1)) {
1648     emit_byte(0xd0);
1649     emit_byte(0xe8+r);
1650     }
1651     else {
1652     emit_byte(0xc0);
1653     emit_byte(0xe8+r);
1654     emit_byte(i);
1655     }
1656     }
1657     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1658    
1659     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1660     {
1661     if (optimize_shift_once && (i == 1)) {
1662     emit_byte(0xd1);
1663     emit_byte(0xf8+r);
1664     }
1665     else {
1666     emit_byte(0xc1);
1667     emit_byte(0xf8+r);
1668     emit_byte(i);
1669     }
1670     }
1671     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1672    
1673     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1674     {
1675     emit_byte(0x66);
1676     emit_byte(0xc1);
1677     emit_byte(0xf8+r);
1678     emit_byte(i);
1679     }
1680     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1681    
1682     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1683     {
1684     if (optimize_shift_once && (i == 1)) {
1685     emit_byte(0xd0);
1686     emit_byte(0xf8+r);
1687     }
1688     else {
1689     emit_byte(0xc0);
1690     emit_byte(0xf8+r);
1691     emit_byte(i);
1692     }
1693     }
1694     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1695    
1696     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1697     {
1698     emit_byte(0x9e);
1699     }
1700     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1701    
1702     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1703     {
1704     emit_byte(0x0f);
1705     emit_byte(0xa2);
1706     }
1707     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1708    
1709     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1710     {
1711     emit_byte(0x9f);
1712     }
1713     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1714    
1715     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1716     {
1717     emit_byte(0x0f);
1718     emit_byte(0x90+cc);
1719     emit_byte(0xc0+d);
1720     }
1721     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1722    
1723     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1724     {
1725     emit_byte(0x0f);
1726     emit_byte(0x90+cc);
1727     emit_byte(0x05);
1728     emit_long(d);
1729     }
1730     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1731    
1732     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1733     {
1734     if (have_cmov) {
1735     emit_byte(0x0f);
1736     emit_byte(0x40+cc);
1737     emit_byte(0xc0+8*d+s);
1738     }
1739     else { /* replacement using branch and mov */
1740     int uncc=(cc^1);
1741     emit_byte(0x70+uncc);
1742     emit_byte(2); /* skip next 2 bytes if not cc=true */
1743     emit_byte(0x89);
1744     emit_byte(0xc0+8*s+d);
1745     }
1746     }
1747     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1748    
1749     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1750     {
1751     emit_byte(0x0f);
1752     emit_byte(0xbc);
1753     emit_byte(0xc0+8*d+s);
1754     }
1755     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1756    
1757     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1758     {
1759     emit_byte(0x0f);
1760     emit_byte(0xbf);
1761     emit_byte(0xc0+8*d+s);
1762     }
1763     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1764    
1765     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1766     {
1767     emit_byte(0x0f);
1768     emit_byte(0xbe);
1769     emit_byte(0xc0+8*d+s);
1770     }
1771     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1772    
1773     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1774     {
1775     emit_byte(0x0f);
1776     emit_byte(0xb7);
1777     emit_byte(0xc0+8*d+s);
1778     }
1779     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1780    
1781     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1782     {
1783     emit_byte(0x0f);
1784     emit_byte(0xb6);
1785     emit_byte(0xc0+8*d+s);
1786     }
1787     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1788    
1789     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1790     {
1791     emit_byte(0x0f);
1792     emit_byte(0xaf);
1793     emit_byte(0xc0+8*d+s);
1794     }
1795     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1796    
1797     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1798     {
1799     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1800     abort();
1801     emit_byte(0xf7);
1802     emit_byte(0xea);
1803     }
1804     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1805    
1806     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1807     {
1808     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1809     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1810     abort();
1811     }
1812     emit_byte(0xf7);
1813     emit_byte(0xe2);
1814     }
1815     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1816    
1817     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1818     {
1819     abort(); /* %^$&%^$%#^ x86! */
1820     emit_byte(0x0f);
1821     emit_byte(0xaf);
1822     emit_byte(0xc0+8*d+s);
1823     }
1824     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1825    
1826     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1827     {
1828     emit_byte(0x88);
1829     emit_byte(0xc0+8*s+d);
1830     }
1831     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1832    
1833     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1834     {
1835     emit_byte(0x66);
1836     emit_byte(0x89);
1837     emit_byte(0xc0+8*s+d);
1838     }
1839     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1840    
1841     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1842     {
1843     int isebp=(baser==5)?0x40:0;
1844     int fi;
1845    
1846     switch(factor) {
1847     case 1: fi=0; break;
1848     case 2: fi=1; break;
1849     case 4: fi=2; break;
1850     case 8: fi=3; break;
1851     default: abort();
1852     }
1853    
1854    
1855     emit_byte(0x8b);
1856     emit_byte(0x04+8*d+isebp);
1857     emit_byte(baser+8*index+0x40*fi);
1858     if (isebp)
1859     emit_byte(0x00);
1860     }
1861     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1862    
1863     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1864     {
1865     int fi;
1866     int isebp;
1867    
1868     switch(factor) {
1869     case 1: fi=0; break;
1870     case 2: fi=1; break;
1871     case 4: fi=2; break;
1872     case 8: fi=3; break;
1873     default: abort();
1874     }
1875     isebp=(baser==5)?0x40:0;
1876    
1877     emit_byte(0x66);
1878     emit_byte(0x8b);
1879     emit_byte(0x04+8*d+isebp);
1880     emit_byte(baser+8*index+0x40*fi);
1881     if (isebp)
1882     emit_byte(0x00);
1883     }
1884     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1885    
1886     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1887     {
1888     int fi;
1889     int isebp;
1890    
1891     switch(factor) {
1892     case 1: fi=0; break;
1893     case 2: fi=1; break;
1894     case 4: fi=2; break;
1895     case 8: fi=3; break;
1896     default: abort();
1897     }
1898     isebp=(baser==5)?0x40:0;
1899    
1900     emit_byte(0x8a);
1901     emit_byte(0x04+8*d+isebp);
1902     emit_byte(baser+8*index+0x40*fi);
1903     if (isebp)
1904     emit_byte(0x00);
1905     }
1906     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1907    
1908     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1909     {
1910     int fi;
1911     int isebp;
1912    
1913     switch(factor) {
1914     case 1: fi=0; break;
1915     case 2: fi=1; break;
1916     case 4: fi=2; break;
1917     case 8: fi=3; break;
1918     default: abort();
1919     }
1920    
1921    
1922     isebp=(baser==5)?0x40:0;
1923    
1924     emit_byte(0x89);
1925     emit_byte(0x04+8*s+isebp);
1926     emit_byte(baser+8*index+0x40*fi);
1927     if (isebp)
1928     emit_byte(0x00);
1929     }
1930     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1931    
1932     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1933     {
1934     int fi;
1935     int isebp;
1936    
1937     switch(factor) {
1938     case 1: fi=0; break;
1939     case 2: fi=1; break;
1940     case 4: fi=2; break;
1941     case 8: fi=3; break;
1942     default: abort();
1943     }
1944     isebp=(baser==5)?0x40:0;
1945    
1946     emit_byte(0x66);
1947     emit_byte(0x89);
1948     emit_byte(0x04+8*s+isebp);
1949     emit_byte(baser+8*index+0x40*fi);
1950     if (isebp)
1951     emit_byte(0x00);
1952     }
1953     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1954    
1955     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1956     {
1957     int fi;
1958     int isebp;
1959    
1960     switch(factor) {
1961     case 1: fi=0; break;
1962     case 2: fi=1; break;
1963     case 4: fi=2; break;
1964     case 8: fi=3; break;
1965     default: abort();
1966     }
1967     isebp=(baser==5)?0x40:0;
1968    
1969     emit_byte(0x88);
1970     emit_byte(0x04+8*s+isebp);
1971     emit_byte(baser+8*index+0x40*fi);
1972     if (isebp)
1973     emit_byte(0x00);
1974     }
1975     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1976    
1977     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1978     {
1979     int fi;
1980    
1981     switch(factor) {
1982     case 1: fi=0; break;
1983     case 2: fi=1; break;
1984     case 4: fi=2; break;
1985     case 8: fi=3; break;
1986     default: abort();
1987     }
1988    
1989     emit_byte(0x89);
1990     emit_byte(0x84+8*s);
1991     emit_byte(baser+8*index+0x40*fi);
1992     emit_long(base);
1993     }
1994     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1995    
1996     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1997     {
1998     int fi;
1999    
2000     switch(factor) {
2001     case 1: fi=0; break;
2002     case 2: fi=1; break;
2003     case 4: fi=2; break;
2004     case 8: fi=3; break;
2005     default: abort();
2006     }
2007    
2008     emit_byte(0x66);
2009     emit_byte(0x89);
2010     emit_byte(0x84+8*s);
2011     emit_byte(baser+8*index+0x40*fi);
2012     emit_long(base);
2013     }
2014     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2015    
2016     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2017     {
2018     int fi;
2019    
2020     switch(factor) {
2021     case 1: fi=0; break;
2022     case 2: fi=1; break;
2023     case 4: fi=2; break;
2024     case 8: fi=3; break;
2025     default: abort();
2026     }
2027    
2028     emit_byte(0x88);
2029     emit_byte(0x84+8*s);
2030     emit_byte(baser+8*index+0x40*fi);
2031     emit_long(base);
2032     }
2033     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2034    
2035     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2036     {
2037     int fi;
2038    
2039     switch(factor) {
2040     case 1: fi=0; break;
2041     case 2: fi=1; break;
2042     case 4: fi=2; break;
2043     case 8: fi=3; break;
2044     default: abort();
2045     }
2046    
2047     emit_byte(0x8b);
2048     emit_byte(0x84+8*d);
2049     emit_byte(baser+8*index+0x40*fi);
2050     emit_long(base);
2051     }
2052     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2053    
2054     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2055     {
2056     int fi;
2057    
2058     switch(factor) {
2059     case 1: fi=0; break;
2060     case 2: fi=1; break;
2061     case 4: fi=2; break;
2062     case 8: fi=3; break;
2063     default: abort();
2064     }
2065    
2066     emit_byte(0x66);
2067     emit_byte(0x8b);
2068     emit_byte(0x84+8*d);
2069     emit_byte(baser+8*index+0x40*fi);
2070     emit_long(base);
2071     }
2072     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2073    
2074     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2075     {
2076     int fi;
2077    
2078     switch(factor) {
2079     case 1: fi=0; break;
2080     case 2: fi=1; break;
2081     case 4: fi=2; break;
2082     case 8: fi=3; break;
2083     default: abort();
2084     }
2085    
2086     emit_byte(0x8a);
2087     emit_byte(0x84+8*d);
2088     emit_byte(baser+8*index+0x40*fi);
2089     emit_long(base);
2090     }
2091     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2092    
2093     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2094     {
2095     int fi;
2096     switch(factor) {
2097     case 1: fi=0; break;
2098     case 2: fi=1; break;
2099     case 4: fi=2; break;
2100     case 8: fi=3; break;
2101     default:
2102     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2103     abort();
2104     }
2105     emit_byte(0x8b);
2106     emit_byte(0x04+8*d);
2107     emit_byte(0x05+8*index+64*fi);
2108     emit_long(base);
2109     }
2110     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2111    
2112     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2113     {
2114     int fi;
2115     switch(factor) {
2116     case 1: fi=0; break;
2117     case 2: fi=1; break;
2118     case 4: fi=2; break;
2119     case 8: fi=3; break;
2120     default:
2121     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2122     abort();
2123     }
2124     if (have_cmov) {
2125     emit_byte(0x0f);
2126     emit_byte(0x40+cond);
2127     emit_byte(0x04+8*d);
2128     emit_byte(0x05+8*index+64*fi);
2129     emit_long(base);
2130     }
2131     else { /* replacement using branch and mov */
2132     int uncc=(cond^1);
2133     emit_byte(0x70+uncc);
2134     emit_byte(7); /* skip next 7 bytes if not cc=true */
2135     emit_byte(0x8b);
2136     emit_byte(0x04+8*d);
2137     emit_byte(0x05+8*index+64*fi);
2138     emit_long(base);
2139     }
2140     }
2141     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2142    
2143     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2144     {
2145     if (have_cmov) {
2146     emit_byte(0x0f);
2147     emit_byte(0x40+cond);
2148     emit_byte(0x05+8*d);
2149     emit_long(mem);
2150     }
2151     else { /* replacement using branch and mov */
2152     int uncc=(cond^1);
2153     emit_byte(0x70+uncc);
2154     emit_byte(6); /* skip next 6 bytes if not cc=true */
2155     emit_byte(0x8b);
2156     emit_byte(0x05+8*d);
2157     emit_long(mem);
2158     }
2159     }
2160     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2161    
2162     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2163     {
2164 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2165 gbeauche 1.1 emit_byte(0x8b);
2166     emit_byte(0x40+8*d+s);
2167     emit_byte(offset);
2168     }
2169     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2170    
2171     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2172     {
2173 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2174 gbeauche 1.1 emit_byte(0x66);
2175     emit_byte(0x8b);
2176     emit_byte(0x40+8*d+s);
2177     emit_byte(offset);
2178     }
2179     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2180    
2181     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2182     {
2183 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2184 gbeauche 1.1 emit_byte(0x8a);
2185     emit_byte(0x40+8*d+s);
2186     emit_byte(offset);
2187     }
2188     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2189    
2190     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2191     {
2192     emit_byte(0x8b);
2193     emit_byte(0x80+8*d+s);
2194     emit_long(offset);
2195     }
2196     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2197    
2198     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2199     {
2200     emit_byte(0x66);
2201     emit_byte(0x8b);
2202     emit_byte(0x80+8*d+s);
2203     emit_long(offset);
2204     }
2205     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2206    
2207     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2208     {
2209     emit_byte(0x8a);
2210     emit_byte(0x80+8*d+s);
2211     emit_long(offset);
2212     }
2213     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2214    
2215     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2216     {
2217 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2218 gbeauche 1.1 emit_byte(0xc7);
2219     emit_byte(0x40+d);
2220     emit_byte(offset);
2221     emit_long(i);
2222     }
2223     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2224    
2225     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2226     {
2227 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2228 gbeauche 1.1 emit_byte(0x66);
2229     emit_byte(0xc7);
2230     emit_byte(0x40+d);
2231     emit_byte(offset);
2232     emit_word(i);
2233     }
2234     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2235    
2236     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2237     {
2238 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2239 gbeauche 1.1 emit_byte(0xc6);
2240     emit_byte(0x40+d);
2241     emit_byte(offset);
2242     emit_byte(i);
2243     }
2244     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2245    
2246     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2247     {
2248 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2249 gbeauche 1.1 emit_byte(0x89);
2250     emit_byte(0x40+8*s+d);
2251     emit_byte(offset);
2252     }
2253     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2254    
2255     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2256     {
2257 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2258 gbeauche 1.1 emit_byte(0x66);
2259     emit_byte(0x89);
2260     emit_byte(0x40+8*s+d);
2261     emit_byte(offset);
2262     }
2263     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2264    
2265     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2266     {
2267 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2268 gbeauche 1.1 emit_byte(0x88);
2269     emit_byte(0x40+8*s+d);
2270     emit_byte(offset);
2271     }
2272     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2273    
2274     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2275     {
2276     if (optimize_imm8 && isbyte(offset)) {
2277     emit_byte(0x8d);
2278     emit_byte(0x40+8*d+s);
2279     emit_byte(offset);
2280     }
2281     else {
2282     emit_byte(0x8d);
2283     emit_byte(0x80+8*d+s);
2284     emit_long(offset);
2285     }
2286     }
2287     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2288    
2289     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2290     {
2291     int fi;
2292    
2293     switch(factor) {
2294     case 1: fi=0; break;
2295     case 2: fi=1; break;
2296     case 4: fi=2; break;
2297     case 8: fi=3; break;
2298     default: abort();
2299     }
2300    
2301     if (optimize_imm8 && isbyte(offset)) {
2302     emit_byte(0x8d);
2303     emit_byte(0x44+8*d);
2304     emit_byte(0x40*fi+8*index+s);
2305     emit_byte(offset);
2306     }
2307     else {
2308     emit_byte(0x8d);
2309     emit_byte(0x84+8*d);
2310     emit_byte(0x40*fi+8*index+s);
2311     emit_long(offset);
2312     }
2313     }
2314     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2315    
2316     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2317     {
2318     int isebp=(s==5)?0x40:0;
2319     int fi;
2320    
2321     switch(factor) {
2322     case 1: fi=0; break;
2323     case 2: fi=1; break;
2324     case 4: fi=2; break;
2325     case 8: fi=3; break;
2326     default: abort();
2327     }
2328    
2329     emit_byte(0x8d);
2330     emit_byte(0x04+8*d+isebp);
2331     emit_byte(0x40*fi+8*index+s);
2332     if (isebp)
2333     emit_byte(0);
2334     }
2335     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2336    
2337     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2338     {
2339     if (optimize_imm8 && isbyte(offset)) {
2340     emit_byte(0x89);
2341     emit_byte(0x40+8*s+d);
2342     emit_byte(offset);
2343     }
2344     else {
2345     emit_byte(0x89);
2346     emit_byte(0x80+8*s+d);
2347     emit_long(offset);
2348     }
2349     }
2350     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2351    
2352     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2353     {
2354     emit_byte(0x66);
2355     emit_byte(0x89);
2356     emit_byte(0x80+8*s+d);
2357     emit_long(offset);
2358     }
2359     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2360    
2361     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2362     {
2363     if (optimize_imm8 && isbyte(offset)) {
2364     emit_byte(0x88);
2365     emit_byte(0x40+8*s+d);
2366     emit_byte(offset);
2367     }
2368     else {
2369     emit_byte(0x88);
2370     emit_byte(0x80+8*s+d);
2371     emit_long(offset);
2372     }
2373     }
2374     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2375    
2376     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2377     {
2378     emit_byte(0x0f);
2379     emit_byte(0xc8+r);
2380     }
2381     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2382    
2383     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2384     {
2385     emit_byte(0x66);
2386     emit_byte(0xc1);
2387     emit_byte(0xc0+r);
2388     emit_byte(0x08);
2389     }
2390     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2391    
2392     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2393     {
2394     emit_byte(0x89);
2395     emit_byte(0xc0+8*s+d);
2396     }
2397     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2398    
2399     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2400     {
2401     emit_byte(0x89);
2402     emit_byte(0x05+8*s);
2403     emit_long(d);
2404     }
2405     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2406    
2407     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2408     {
2409     emit_byte(0x66);
2410     emit_byte(0x89);
2411     emit_byte(0x05+8*s);
2412     emit_long(d);
2413     }
2414     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2415    
2416     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2417     {
2418     emit_byte(0x66);
2419     emit_byte(0x8b);
2420     emit_byte(0x05+8*d);
2421     emit_long(s);
2422     }
2423     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2424    
2425     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2426     {
2427     emit_byte(0x88);
2428 gbeauche 1.33 emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
2429 gbeauche 1.1 emit_long(d);
2430     }
2431     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2432    
2433     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2434     {
2435     emit_byte(0x8a);
2436     emit_byte(0x05+8*d);
2437     emit_long(s);
2438     }
2439     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2440    
2441     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2442     {
2443     emit_byte(0xb8+d);
2444     emit_long(s);
2445     }
2446     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2447    
2448     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2449     {
2450     emit_byte(0x66);
2451     emit_byte(0xb8+d);
2452     emit_word(s);
2453     }
2454     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2455    
2456     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2457     {
2458     emit_byte(0xb0+d);
2459     emit_byte(s);
2460     }
2461     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2462    
2463     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2464     {
2465     emit_byte(0x81);
2466     emit_byte(0x15);
2467     emit_long(d);
2468     emit_long(s);
2469     }
2470     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2471    
2472     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2473     {
2474     if (optimize_imm8 && isbyte(s)) {
2475     emit_byte(0x83);
2476     emit_byte(0x05);
2477     emit_long(d);
2478     emit_byte(s);
2479     }
2480     else {
2481     emit_byte(0x81);
2482     emit_byte(0x05);
2483     emit_long(d);
2484     emit_long(s);
2485     }
2486     }
2487     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2488    
2489     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2490     {
2491     emit_byte(0x66);
2492     emit_byte(0x81);
2493     emit_byte(0x05);
2494     emit_long(d);
2495     emit_word(s);
2496     }
2497     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2498    
2499     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2500     {
2501     emit_byte(0x80);
2502     emit_byte(0x05);
2503     emit_long(d);
2504     emit_byte(s);
2505     }
2506     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2507    
2508     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2509     {
2510 gbeauche 1.2 if (optimize_accum && isaccum(d))
2511     emit_byte(0xa9);
2512     else {
2513 gbeauche 1.1 emit_byte(0xf7);
2514     emit_byte(0xc0+d);
2515 gbeauche 1.2 }
2516 gbeauche 1.1 emit_long(i);
2517     }
2518     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2519    
2520     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2521     {
2522     emit_byte(0x85);
2523     emit_byte(0xc0+8*s+d);
2524     }
2525     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2526    
2527     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2528     {
2529     emit_byte(0x66);
2530     emit_byte(0x85);
2531     emit_byte(0xc0+8*s+d);
2532     }
2533     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2534    
2535     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2536     {
2537     emit_byte(0x84);
2538     emit_byte(0xc0+8*s+d);
2539     }
2540     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2541    
2542 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2543     {
2544     emit_byte(0x81);
2545     emit_byte(0xf0+d);
2546     emit_long(i);
2547     }
2548     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2549    
2550 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2551     {
2552     if (optimize_imm8 && isbyte(i)) {
2553 gbeauche 1.2 emit_byte(0x83);
2554     emit_byte(0xe0+d);
2555     emit_byte(i);
2556 gbeauche 1.1 }
2557     else {
2558 gbeauche 1.2 if (optimize_accum && isaccum(d))
2559     emit_byte(0x25);
2560     else {
2561     emit_byte(0x81);
2562     emit_byte(0xe0+d);
2563     }
2564     emit_long(i);
2565 gbeauche 1.1 }
2566     }
2567     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2568    
2569     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2570     {
2571 gbeauche 1.2 emit_byte(0x66);
2572     if (optimize_imm8 && isbyte(i)) {
2573     emit_byte(0x83);
2574     emit_byte(0xe0+d);
2575     emit_byte(i);
2576     }
2577     else {
2578     if (optimize_accum && isaccum(d))
2579     emit_byte(0x25);
2580     else {
2581     emit_byte(0x81);
2582     emit_byte(0xe0+d);
2583     }
2584     emit_word(i);
2585     }
2586 gbeauche 1.1 }
2587     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2588    
2589     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2590     {
2591     emit_byte(0x21);
2592     emit_byte(0xc0+8*s+d);
2593     }
2594     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2595    
2596     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2597     {
2598     emit_byte(0x66);
2599     emit_byte(0x21);
2600     emit_byte(0xc0+8*s+d);
2601     }
2602     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2603    
2604     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2605     {
2606     emit_byte(0x20);
2607     emit_byte(0xc0+8*s+d);
2608     }
2609     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2610    
2611     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2612     {
2613     if (optimize_imm8 && isbyte(i)) {
2614     emit_byte(0x83);
2615     emit_byte(0xc8+d);
2616     emit_byte(i);
2617     }
2618     else {
2619 gbeauche 1.2 if (optimize_accum && isaccum(d))
2620     emit_byte(0x0d);
2621     else {
2622 gbeauche 1.1 emit_byte(0x81);
2623     emit_byte(0xc8+d);
2624 gbeauche 1.2 }
2625 gbeauche 1.1 emit_long(i);
2626     }
2627     }
2628     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2629    
2630     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2631     {
2632     emit_byte(0x09);
2633     emit_byte(0xc0+8*s+d);
2634     }
2635     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2636    
2637     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2638     {
2639     emit_byte(0x66);
2640     emit_byte(0x09);
2641     emit_byte(0xc0+8*s+d);
2642     }
2643     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2644    
2645     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2646     {
2647     emit_byte(0x08);
2648     emit_byte(0xc0+8*s+d);
2649     }
2650     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2651    
2652     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2653     {
2654     emit_byte(0x11);
2655     emit_byte(0xc0+8*s+d);
2656     }
2657     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2658    
2659     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2660     {
2661     emit_byte(0x66);
2662     emit_byte(0x11);
2663     emit_byte(0xc0+8*s+d);
2664     }
2665     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2666    
2667     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2668     {
2669     emit_byte(0x10);
2670     emit_byte(0xc0+8*s+d);
2671     }
2672     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2673    
2674     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2675     {
2676     emit_byte(0x01);
2677     emit_byte(0xc0+8*s+d);
2678     }
2679     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2680    
2681     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2682     {
2683     emit_byte(0x66);
2684     emit_byte(0x01);
2685     emit_byte(0xc0+8*s+d);
2686     }
2687     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2688    
2689     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2690     {
2691     emit_byte(0x00);
2692     emit_byte(0xc0+8*s+d);
2693     }
2694     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2695    
2696     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2697     {
2698     if (isbyte(i)) {
2699     emit_byte(0x83);
2700     emit_byte(0xe8+d);
2701     emit_byte(i);
2702     }
2703     else {
2704 gbeauche 1.2 if (optimize_accum && isaccum(d))
2705     emit_byte(0x2d);
2706     else {
2707 gbeauche 1.1 emit_byte(0x81);
2708     emit_byte(0xe8+d);
2709 gbeauche 1.2 }
2710 gbeauche 1.1 emit_long(i);
2711     }
2712     }
2713     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2714    
2715     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2716     {
2717 gbeauche 1.2 if (optimize_accum && isaccum(d))
2718     emit_byte(0x2c);
2719     else {
2720 gbeauche 1.1 emit_byte(0x80);
2721     emit_byte(0xe8+d);
2722 gbeauche 1.2 }
2723 gbeauche 1.1 emit_byte(i);
2724     }
2725     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2726    
2727     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2728     {
2729     if (isbyte(i)) {
2730     emit_byte(0x83);
2731     emit_byte(0xc0+d);
2732     emit_byte(i);
2733     }
2734     else {
2735 gbeauche 1.2 if (optimize_accum && isaccum(d))
2736     emit_byte(0x05);
2737     else {
2738 gbeauche 1.1 emit_byte(0x81);
2739     emit_byte(0xc0+d);
2740 gbeauche 1.2 }
2741 gbeauche 1.1 emit_long(i);
2742     }
2743     }
2744     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2745    
2746     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2747     {
2748 gbeauche 1.2 emit_byte(0x66);
2749 gbeauche 1.1 if (isbyte(i)) {
2750     emit_byte(0x83);
2751     emit_byte(0xc0+d);
2752     emit_byte(i);
2753     }
2754     else {
2755 gbeauche 1.2 if (optimize_accum && isaccum(d))
2756     emit_byte(0x05);
2757     else {
2758 gbeauche 1.1 emit_byte(0x81);
2759     emit_byte(0xc0+d);
2760 gbeauche 1.2 }
2761 gbeauche 1.1 emit_word(i);
2762     }
2763     }
2764     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2765    
2766     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2767     {
2768 gbeauche 1.2 if (optimize_accum && isaccum(d))
2769     emit_byte(0x04);
2770     else {
2771     emit_byte(0x80);
2772     emit_byte(0xc0+d);
2773     }
2774 gbeauche 1.1 emit_byte(i);
2775     }
2776     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2777    
2778     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2779     {
2780     emit_byte(0x19);
2781     emit_byte(0xc0+8*s+d);
2782     }
2783     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2784    
2785     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2786     {
2787     emit_byte(0x66);
2788     emit_byte(0x19);
2789     emit_byte(0xc0+8*s+d);
2790     }
2791     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2792    
2793     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2794     {
2795     emit_byte(0x18);
2796     emit_byte(0xc0+8*s+d);
2797     }
2798     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2799    
2800     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2801     {
2802     emit_byte(0x29);
2803     emit_byte(0xc0+8*s+d);
2804     }
2805     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2806    
2807     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2808     {
2809     emit_byte(0x66);
2810     emit_byte(0x29);
2811     emit_byte(0xc0+8*s+d);
2812     }
2813     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2814    
2815     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2816     {
2817     emit_byte(0x28);
2818     emit_byte(0xc0+8*s+d);
2819     }
2820     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2821    
2822     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2823     {
2824     emit_byte(0x39);
2825     emit_byte(0xc0+8*s+d);
2826     }
2827     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2828    
2829     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2830     {
2831     if (optimize_imm8 && isbyte(i)) {
2832     emit_byte(0x83);
2833     emit_byte(0xf8+r);
2834     emit_byte(i);
2835     }
2836     else {
2837 gbeauche 1.2 if (optimize_accum && isaccum(r))
2838     emit_byte(0x3d);
2839     else {
2840 gbeauche 1.1 emit_byte(0x81);
2841     emit_byte(0xf8+r);
2842 gbeauche 1.2 }
2843 gbeauche 1.1 emit_long(i);
2844     }
2845     }
2846     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2847    
2848     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2849     {
2850     emit_byte(0x66);
2851     emit_byte(0x39);
2852     emit_byte(0xc0+8*s+d);
2853     }
2854     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2855    
2856 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2857     {
2858     emit_byte(0x80);
2859     emit_byte(0x3d);
2860     emit_long(d);
2861     emit_byte(s);
2862     }
2863     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2864    
2865 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2866     {
2867 gbeauche 1.2 if (optimize_accum && isaccum(d))
2868     emit_byte(0x3c);
2869     else {
2870 gbeauche 1.1 emit_byte(0x80);
2871     emit_byte(0xf8+d);
2872 gbeauche 1.2 }
2873 gbeauche 1.1 emit_byte(i);
2874     }
2875     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2876    
2877     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2878     {
2879     emit_byte(0x38);
2880     emit_byte(0xc0+8*s+d);
2881     }
2882     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2883    
2884     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2885     {
2886     int fi;
2887    
2888     switch(factor) {
2889     case 1: fi=0; break;
2890     case 2: fi=1; break;
2891     case 4: fi=2; break;
2892     case 8: fi=3; break;
2893     default: abort();
2894     }
2895     emit_byte(0x39);
2896     emit_byte(0x04+8*d);
2897     emit_byte(5+8*index+0x40*fi);
2898     emit_long(offset);
2899     }
2900     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2901    
2902     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2903     {
2904     emit_byte(0x31);
2905     emit_byte(0xc0+8*s+d);
2906     }
2907     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2908    
2909     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2910     {
2911     emit_byte(0x66);
2912     emit_byte(0x31);
2913     emit_byte(0xc0+8*s+d);
2914     }
2915     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2916    
2917     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2918     {
2919     emit_byte(0x30);
2920     emit_byte(0xc0+8*s+d);
2921     }
2922     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2923    
2924     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2925     {
2926     if (optimize_imm8 && isbyte(s)) {
2927     emit_byte(0x83);
2928     emit_byte(0x2d);
2929     emit_long(d);
2930     emit_byte(s);
2931     }
2932     else {
2933     emit_byte(0x81);
2934     emit_byte(0x2d);
2935     emit_long(d);
2936     emit_long(s);
2937     }
2938     }
2939     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2940    
2941     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2942     {
2943     if (optimize_imm8 && isbyte(s)) {
2944     emit_byte(0x83);
2945     emit_byte(0x3d);
2946     emit_long(d);
2947     emit_byte(s);
2948     }
2949     else {
2950     emit_byte(0x81);
2951     emit_byte(0x3d);
2952     emit_long(d);
2953     emit_long(s);
2954     }
2955     }
2956     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2957    
2958     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2959     {
2960     emit_byte(0x87);
2961     emit_byte(0xc0+8*r1+r2);
2962     }
2963     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2964    
2965     /*************************************************************************
2966     * FIXME: mem access modes probably wrong *
2967     *************************************************************************/
2968    
2969     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2970     {
2971     emit_byte(0x9c);
2972     }
2973     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2974    
2975     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2976     {
2977     emit_byte(0x9d);
2978     }
2979     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2980 gbeauche 1.13
2981 gbeauche 1.34 /* Generate floating-point instructions */
2982     static inline void x86_fadd_m(MEMR s)
2983     {
2984     emit_byte(0xdc);
2985     emit_byte(0x05);
2986     emit_long(s);
2987     }
2988    
2989 gbeauche 1.13 #endif
2990 gbeauche 1.1
2991     /*************************************************************************
2992     * Unoptimizable stuff --- jump *
2993     *************************************************************************/
2994    
2995     static __inline__ void raw_call_r(R4 r)
2996     {
2997 gbeauche 1.20 #if USE_NEW_RTASM
2998     CALLsr(r);
2999     #else
3000 gbeauche 1.1 emit_byte(0xff);
3001     emit_byte(0xd0+r);
3002 gbeauche 1.20 #endif
3003 gbeauche 1.5 }
3004    
3005     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3006     {
3007 gbeauche 1.20 #if USE_NEW_RTASM
3008     CALLsm(base, X86_NOREG, r, m);
3009     #else
3010 gbeauche 1.5 int mu;
3011     switch(m) {
3012     case 1: mu=0; break;
3013     case 2: mu=1; break;
3014     case 4: mu=2; break;
3015     case 8: mu=3; break;
3016     default: abort();
3017     }
3018     emit_byte(0xff);
3019     emit_byte(0x14);
3020     emit_byte(0x05+8*r+0x40*mu);
3021     emit_long(base);
3022 gbeauche 1.20 #endif
3023 gbeauche 1.1 }
3024    
3025     static __inline__ void raw_jmp_r(R4 r)
3026     {
3027 gbeauche 1.20 #if USE_NEW_RTASM
3028     JMPsr(r);
3029     #else
3030 gbeauche 1.1 emit_byte(0xff);
3031     emit_byte(0xe0+r);
3032 gbeauche 1.20 #endif
3033 gbeauche 1.1 }
3034    
3035     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3036     {
3037 gbeauche 1.20 #if USE_NEW_RTASM
3038     JMPsm(base, X86_NOREG, r, m);
3039     #else
3040 gbeauche 1.1 int mu;
3041     switch(m) {
3042     case 1: mu=0; break;
3043     case 2: mu=1; break;
3044     case 4: mu=2; break;
3045     case 8: mu=3; break;
3046     default: abort();
3047     }
3048     emit_byte(0xff);
3049     emit_byte(0x24);
3050     emit_byte(0x05+8*r+0x40*mu);
3051     emit_long(base);
3052 gbeauche 1.20 #endif
3053 gbeauche 1.1 }
3054    
3055     static __inline__ void raw_jmp_m(uae_u32 base)
3056     {
3057     emit_byte(0xff);
3058     emit_byte(0x25);
3059     emit_long(base);
3060     }
3061    
3062    
3063     static __inline__ void raw_call(uae_u32 t)
3064     {
3065 gbeauche 1.20 #if USE_NEW_RTASM
3066     CALLm(t);
3067     #else
3068 gbeauche 1.1 emit_byte(0xe8);
3069     emit_long(t-(uae_u32)target-4);
3070 gbeauche 1.20 #endif
3071 gbeauche 1.1 }
3072    
3073     static __inline__ void raw_jmp(uae_u32 t)
3074     {
3075 gbeauche 1.20 #if USE_NEW_RTASM
3076     JMPm(t);
3077     #else
3078 gbeauche 1.1 emit_byte(0xe9);
3079     emit_long(t-(uae_u32)target-4);
3080 gbeauche 1.20 #endif
3081 gbeauche 1.1 }
3082    
3083     static __inline__ void raw_jl(uae_u32 t)
3084     {
3085     emit_byte(0x0f);
3086     emit_byte(0x8c);
3087 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3088 gbeauche 1.1 }
3089    
3090     static __inline__ void raw_jz(uae_u32 t)
3091     {
3092     emit_byte(0x0f);
3093     emit_byte(0x84);
3094 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3095 gbeauche 1.1 }
3096    
3097     static __inline__ void raw_jnz(uae_u32 t)
3098     {
3099     emit_byte(0x0f);
3100     emit_byte(0x85);
3101 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3102 gbeauche 1.1 }
3103    
3104     static __inline__ void raw_jnz_l_oponly(void)
3105     {
3106     emit_byte(0x0f);
3107     emit_byte(0x85);
3108     }
3109    
3110     static __inline__ void raw_jcc_l_oponly(int cc)
3111     {
3112     emit_byte(0x0f);
3113     emit_byte(0x80+cc);
3114     }
3115    
3116     static __inline__ void raw_jnz_b_oponly(void)
3117     {
3118     emit_byte(0x75);
3119     }
3120    
3121     static __inline__ void raw_jz_b_oponly(void)
3122     {
3123     emit_byte(0x74);
3124     }
3125    
3126     static __inline__ void raw_jcc_b_oponly(int cc)
3127     {
3128     emit_byte(0x70+cc);
3129     }
3130    
3131     static __inline__ void raw_jmp_l_oponly(void)
3132     {
3133     emit_byte(0xe9);
3134     }
3135    
3136     static __inline__ void raw_jmp_b_oponly(void)
3137     {
3138     emit_byte(0xeb);
3139     }
3140    
3141     static __inline__ void raw_ret(void)
3142     {
3143     emit_byte(0xc3);
3144     }
3145    
3146     static __inline__ void raw_nop(void)
3147     {
3148     emit_byte(0x90);
3149     }
3150    
3151 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3152     {
3153     /* Source: GNU Binutils 2.12.90.0.15 */
3154     /* Various efficient no-op patterns for aligning code labels.
3155     Note: Don't try to assemble the instructions in the comments.
3156     0L and 0w are not legal. */
3157     static const uae_u8 f32_1[] =
3158     {0x90}; /* nop */
3159     static const uae_u8 f32_2[] =
3160     {0x89,0xf6}; /* movl %esi,%esi */
3161     static const uae_u8 f32_3[] =
3162     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3163     static const uae_u8 f32_4[] =
3164     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3165     static const uae_u8 f32_5[] =
3166     {0x90, /* nop */
3167     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3168     static const uae_u8 f32_6[] =
3169     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3170     static const uae_u8 f32_7[] =
3171     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3172     static const uae_u8 f32_8[] =
3173     {0x90, /* nop */
3174     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3175     static const uae_u8 f32_9[] =
3176     {0x89,0xf6, /* movl %esi,%esi */
3177     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3178     static const uae_u8 f32_10[] =
3179     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3180     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3181     static const uae_u8 f32_11[] =
3182     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3183     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3184     static const uae_u8 f32_12[] =
3185     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3186     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3187     static const uae_u8 f32_13[] =
3188     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3189     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3190     static const uae_u8 f32_14[] =
3191     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3192     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3193     static const uae_u8 f32_15[] =
3194     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3195     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3196     static const uae_u8 f32_16[] =
3197     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3198     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3199     static const uae_u8 *const f32_patt[] = {
3200     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3201     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3202     };
3203 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3204 gbeauche 1.8
3205 gbeauche 1.21 #if defined(__x86_64__)
3206     /* The recommended way to pad 64bit code is to use NOPs preceded by
3207     maximally four 0x66 prefixes. Balance the size of nops. */
3208     if (nbytes == 0)
3209     return;
3210    
3211     int i;
3212     int nnops = (nbytes + 3) / 4;
3213     int len = nbytes / nnops;
3214     int remains = nbytes - nnops * len;
3215    
3216     for (i = 0; i < remains; i++) {
3217     emit_block(prefixes, len);
3218     raw_nop();
3219     }
3220     for (; i < nnops; i++) {
3221     emit_block(prefixes, len - 1);
3222     raw_nop();
3223     }
3224     #else
3225 gbeauche 1.8 int nloops = nbytes / 16;
3226     while (nloops-- > 0)
3227     emit_block(f32_16, sizeof(f32_16));
3228    
3229     nbytes %= 16;
3230     if (nbytes)
3231     emit_block(f32_patt[nbytes - 1], nbytes);
3232 gbeauche 1.21 #endif
3233 gbeauche 1.8 }
3234    
3235 gbeauche 1.1
3236     /*************************************************************************
3237     * Flag handling, to and fro UAE flag register *
3238     *************************************************************************/
3239    
3240     #ifdef SAHF_SETO_PROFITABLE
3241    
3242     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3243     static __inline__ void raw_flags_to_reg(int r)
3244     {
3245     raw_lahf(0); /* Most flags in AH */
3246     //raw_setcc(r,0); /* V flag in AL */
3247 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3248 gbeauche 1.1
3249     #if 1 /* Let's avoid those nasty partial register stalls */
3250 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3251 gbeauche 1.33 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
3252 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3253     live.state[FLAGTMP].status=INMEM;
3254     live.state[FLAGTMP].realreg=-1;
3255     /* We just "evicted" FLAGTMP. */
3256     if (live.nat[r].nholds!=1) {
3257     /* Huh? */
3258     abort();
3259     }
3260     live.nat[r].nholds=0;
3261     #endif
3262     }
3263    
3264     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3265     static __inline__ void raw_reg_to_flags(int r)
3266     {
3267     raw_cmp_b_ri(r,-127); /* set V */
3268     raw_sahf(0);
3269     }
3270    
3271 gbeauche 1.24 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3272     static __inline__ void raw_flags_set_zero(int s, int tmp)
3273     {
3274     raw_mov_l_rr(tmp,s);
3275     raw_lahf(s); /* flags into ah */
3276     raw_and_l_ri(s,0xffffbfff);
3277     raw_and_l_ri(tmp,0x00004000);
3278     raw_xor_l_ri(tmp,0x00004000);
3279     raw_or_l(s,tmp);
3280     raw_sahf(s);
3281     }
3282    
3283 gbeauche 1.1 #else
3284    
3285     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3286     static __inline__ void raw_flags_to_reg(int r)
3287     {
3288     raw_pushfl();
3289     raw_pop_l_r(r);
3290 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3291 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3292     live.state[FLAGTMP].status=INMEM;
3293     live.state[FLAGTMP].realreg=-1;
3294     /* We just "evicted" FLAGTMP. */
3295     if (live.nat[r].nholds!=1) {
3296     /* Huh? */
3297     abort();
3298     }
3299     live.nat[r].nholds=0;
3300     }
3301    
3302     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3303     static __inline__ void raw_reg_to_flags(int r)
3304     {
3305     raw_push_l_r(r);
3306     raw_popfl();
3307     }
3308    
3309 gbeauche 1.24 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3310     static __inline__ void raw_flags_set_zero(int s, int tmp)
3311     {
3312     raw_mov_l_rr(tmp,s);
3313     raw_pushfl();
3314     raw_pop_l_r(s);
3315     raw_and_l_ri(s,0xffffffbf);
3316     raw_and_l_ri(tmp,0x00000040);
3317     raw_xor_l_ri(tmp,0x00000040);
3318     raw_or_l(s,tmp);
3319     raw_push_l_r(s);
3320     raw_popfl();
3321     }
3322 gbeauche 1.1 #endif
3323    
3324     /* Apparently, there are enough instructions between flag store and
3325     flag reload to avoid the partial memory stall */
3326     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3327     {
3328     #if 1
3329 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3330 gbeauche 1.1 #else
3331 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3332     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3333 gbeauche 1.1 #endif
3334     }
3335    
3336     /* FLAGX is byte sized, and we *do* write it at that size */
3337     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3338     {
3339     if (live.nat[target].canbyte)
3340 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3341 gbeauche 1.1 else if (live.nat[target].canword)
3342 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3343 gbeauche 1.1 else
3344 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3345 gbeauche 1.1 }
3346    
3347 gbeauche 1.31 static __inline__ void raw_dec_sp(int off)
3348     {
3349     if (off) raw_sub_l_ri(ESP_INDEX,off);
3350     }
3351    
3352 gbeauche 1.1 static __inline__ void raw_inc_sp(int off)
3353     {
3354 gbeauche 1.31 if (off) raw_add_l_ri(ESP_INDEX,off);
3355 gbeauche 1.1 }
3356    
3357     /*************************************************************************
3358     * Handling mistaken direct memory access *
3359     *************************************************************************/
3360    
3361     // gb-- I don't need that part for JIT Basilisk II
3362     #if defined(NATMEM_OFFSET) && 0
3363     #include <asm/sigcontext.h>
3364     #include <signal.h>
3365    
3366     #define SIG_READ 1
3367     #define SIG_WRITE 2
3368    
3369     static int in_handler=0;
3370     static uae_u8 veccode[256];
3371    
3372     static void vec(int x, struct sigcontext sc)
3373     {
3374     uae_u8* i=(uae_u8*)sc.eip;
3375     uae_u32 addr=sc.cr2;
3376     int r=-1;
3377     int size=4;
3378     int dir=-1;
3379     int len=0;
3380     int j;
3381    
3382     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3383     if (!canbang)
3384     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3385     if (in_handler)
3386     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3387    
3388     if (canbang && i>=compiled_code && i<=current_compile_p) {
3389     if (*i==0x66) {
3390     i++;
3391     size=2;
3392     len++;
3393     }
3394    
3395     switch(i[0]) {
3396     case 0x8a:
3397     if ((i[1]&0xc0)==0x80) {
3398     r=(i[1]>>3)&7;
3399     dir=SIG_READ;
3400     size=1;
3401     len+=6;
3402     break;
3403     }
3404     break;
3405     case 0x88:
3406     if ((i[1]&0xc0)==0x80) {
3407     r=(i[1]>>3)&7;
3408     dir=SIG_WRITE;
3409     size=1;
3410     len+=6;
3411     break;
3412     }
3413     break;
3414     case 0x8b:
3415     if ((i[1]&0xc0)==0x80) {
3416     r=(i[1]>>3)&7;
3417     dir=SIG_READ;
3418     len+=6;
3419     break;
3420     }
3421     if ((i[1]&0xc0)==0x40) {
3422     r=(i[1]>>3)&7;
3423     dir=SIG_READ;
3424     len+=3;
3425     break;
3426     }
3427     break;
3428     case 0x89:
3429     if ((i[1]&0xc0)==0x80) {
3430     r=(i[1]>>3)&7;
3431     dir=SIG_WRITE;
3432     len+=6;
3433     break;
3434     }
3435     if ((i[1]&0xc0)==0x40) {
3436     r=(i[1]>>3)&7;
3437     dir=SIG_WRITE;
3438     len+=3;
3439     break;
3440     }
3441     break;
3442     }
3443     }
3444    
3445     if (r!=-1) {
3446     void* pr=NULL;
3447     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3448    
3449     switch(r) {
3450     case 0: pr=&(sc.eax); break;
3451     case 1: pr=&(sc.ecx); break;
3452     case 2: pr=&(sc.edx); break;
3453     case 3: pr=&(sc.ebx); break;
3454     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3455     case 5: pr=(size>1)?
3456     (void*)(&(sc.ebp)):
3457     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3458     case 6: pr=(size>1)?
3459     (void*)(&(sc.esi)):
3460     (void*)(((uae_u8*)&(sc.edx))+1); break;
3461     case 7: pr=(size>1)?
3462     (void*)(&(sc.edi)):
3463     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3464     default: abort();
3465     }
3466     if (pr) {
3467     blockinfo* bi;
3468    
3469     if (currprefs.comp_oldsegv) {
3470     addr-=NATMEM_OFFSET;
3471    
3472     if ((addr>=0x10000000 && addr<0x40000000) ||
3473     (addr>=0x50000000)) {
3474     write_log("Suspicious address in %x SEGV handler.\n",addr);
3475     }
3476     if (dir==SIG_READ) {
3477     switch(size) {
3478     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3479     case 2: *((uae_u16*)pr)=get_word(addr); break;
3480     case 4: *((uae_u32*)pr)=get_long(addr); break;
3481     default: abort();
3482     }
3483     }
3484     else { /* write */
3485     switch(size) {
3486     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3487     case 2: put_word(addr,*((uae_u16*)pr)); break;
3488     case 4: put_long(addr,*((uae_u32*)pr)); break;
3489     default: abort();
3490     }
3491     }
3492     write_log("Handled one access!\n");
3493     fflush(stdout);
3494     segvcount++;
3495     sc.eip+=len;
3496     }
3497     else {
3498     void* tmp=target;
3499     int i;
3500     uae_u8 vecbuf[5];
3501    
3502     addr-=NATMEM_OFFSET;
3503    
3504     if ((addr>=0x10000000 && addr<0x40000000) ||
3505     (addr>=0x50000000)) {
3506     write_log("Suspicious address in %x SEGV handler.\n",addr);
3507     }
3508    
3509     target=(uae_u8*)sc.eip;
3510     for (i=0;i<5;i++)
3511     vecbuf[i]=target[i];
3512     emit_byte(0xe9);
3513 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3514 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3515    
3516     write_log("Handled one access!\n");
3517     fflush(stdout);
3518     segvcount++;
3519    
3520     target=veccode;
3521    
3522     if (dir==SIG_READ) {
3523     switch(size) {
3524     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3525     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3526     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3527     default: abort();
3528     }
3529     }
3530     else { /* write */
3531     switch(size) {
3532     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3533     case 2: put_word(addr,*((uae_u16*)pr)); break;
3534     case 4: put_long(addr,*((uae_u32*)pr)); break;
3535     default: abort();
3536     }
3537     }
3538     for (i=0;i<5;i++)
3539     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3540 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3541 gbeauche 1.1 emit_byte(0xe9);
3542 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3543 gbeauche 1.1 in_handler=1;
3544     target=tmp;
3545     }
3546     bi=active;
3547     while (bi) {
3548     if (bi->handler &&
3549     (uae_u8*)bi->direct_handler<=i &&
3550     (uae_u8*)bi->nexthandler>i) {
3551     write_log("deleted trigger (%p<%p<%p) %p\n",
3552     bi->handler,
3553     i,
3554     bi->nexthandler,
3555     bi->pc_p);
3556     invalidate_block(bi);
3557     raise_in_cl_list(bi);
3558     set_special(0);
3559     return;
3560     }
3561     bi=bi->next;
3562     }
3563     /* Not found in the active list. Might be a rom routine that
3564     is in the dormant list */
3565     bi=dormant;
3566     while (bi) {
3567     if (bi->handler &&
3568     (uae_u8*)bi->direct_handler<=i &&
3569     (uae_u8*)bi->nexthandler>i) {
3570     write_log("deleted trigger (%p<%p<%p) %p\n",
3571     bi->handler,
3572     i,
3573     bi->nexthandler,
3574     bi->pc_p);
3575     invalidate_block(bi);
3576     raise_in_cl_list(bi);
3577     set_special(0);
3578     return;
3579     }
3580     bi=bi->next;
3581     }
3582     write_log("Huh? Could not find trigger!\n");
3583     return;
3584     }
3585     }
3586     write_log("Can't handle access!\n");
3587     for (j=0;j<10;j++) {
3588     write_log("instruction byte %2d is %02x\n",j,i[j]);
3589     }
3590     write_log("Please send the above info (starting at \"fault address\") to\n"
3591     "bmeyer@csse.monash.edu.au\n"
3592     "This shouldn't happen ;-)\n");
3593     fflush(stdout);
3594     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3595     }
3596     #endif
3597    
3598    
3599     /*************************************************************************
3600     * Checking for CPU features *
3601     *************************************************************************/
3602    
3603 gbeauche 1.3 struct cpuinfo_x86 {
3604     uae_u8 x86; // CPU family
3605     uae_u8 x86_vendor; // CPU vendor
3606     uae_u8 x86_processor; // CPU canonical processor type
3607     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3608     uae_u32 x86_hwcap;
3609     uae_u8 x86_model;
3610     uae_u8 x86_mask;
3611     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3612     char x86_vendor_id[16];
3613     };
3614     struct cpuinfo_x86 cpuinfo;
3615    
3616     enum {
3617     X86_VENDOR_INTEL = 0,
3618     X86_VENDOR_CYRIX = 1,
3619     X86_VENDOR_AMD = 2,
3620     X86_VENDOR_UMC = 3,
3621     X86_VENDOR_NEXGEN = 4,
3622     X86_VENDOR_CENTAUR = 5,
3623     X86_VENDOR_RISE = 6,
3624     X86_VENDOR_TRANSMETA = 7,
3625     X86_VENDOR_NSC = 8,
3626     X86_VENDOR_UNKNOWN = 0xff
3627     };
3628    
3629     enum {
3630     X86_PROCESSOR_I386, /* 80386 */
3631     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3632     X86_PROCESSOR_PENTIUM,
3633     X86_PROCESSOR_PENTIUMPRO,
3634     X86_PROCESSOR_K6,
3635     X86_PROCESSOR_ATHLON,
3636     X86_PROCESSOR_PENTIUM4,
3637 gbeauche 1.28 X86_PROCESSOR_X86_64,
3638 gbeauche 1.3 X86_PROCESSOR_max
3639     };
3640    
3641     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3642     "80386",
3643     "80486",
3644     "Pentium",
3645     "PentiumPro",
3646     "K6",
3647     "Athlon",
3648 gbeauche 1.16 "Pentium4",
3649 gbeauche 1.28 "x86-64"
3650 gbeauche 1.3 };
3651    
3652     static struct ptt {
3653     const int align_loop;
3654     const int align_loop_max_skip;
3655     const int align_jump;
3656     const int align_jump_max_skip;
3657     const int align_func;
3658     }
3659     x86_alignments[X86_PROCESSOR_max] = {
3660     { 4, 3, 4, 3, 4 },
3661     { 16, 15, 16, 15, 16 },
3662     { 16, 7, 16, 7, 16 },
3663     { 16, 15, 16, 7, 16 },
3664     { 32, 7, 32, 7, 32 },
3665 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3666 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3667     { 16, 7, 16, 7, 16 }
3668 gbeauche 1.3 };
3669 gbeauche 1.1
3670 gbeauche 1.3 static void
3671     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3672 gbeauche 1.1 {
3673 gbeauche 1.3 char *v = c->x86_vendor_id;
3674    
3675     if (!strcmp(v, "GenuineIntel"))
3676     c->x86_vendor = X86_VENDOR_INTEL;
3677     else if (!strcmp(v, "AuthenticAMD"))
3678     c->x86_vendor = X86_VENDOR_AMD;
3679     else if (!strcmp(v, "CyrixInstead"))
3680     c->x86_vendor = X86_VENDOR_CYRIX;
3681     else if (!strcmp(v, "Geode by NSC"))
3682     c->x86_vendor = X86_VENDOR_NSC;
3683     else if (!strcmp(v, "UMC UMC UMC "))
3684     c->x86_vendor = X86_VENDOR_UMC;
3685     else if (!strcmp(v, "CentaurHauls"))
3686     c->x86_vendor = X86_VENDOR_CENTAUR;
3687     else if (!strcmp(v, "NexGenDriven"))
3688     c->x86_vendor = X86_VENDOR_NEXGEN;
3689     else if (!strcmp(v, "RiseRiseRise"))
3690     c->x86_vendor = X86_VENDOR_RISE;
3691     else if (!strcmp(v, "GenuineTMx86") ||
3692     !strcmp(v, "TransmetaCPU"))
3693     c->x86_vendor = X86_VENDOR_TRANSMETA;
3694     else
3695     c->x86_vendor = X86_VENDOR_UNKNOWN;
3696     }
3697 gbeauche 1.1
3698 gbeauche 1.3 static void
3699     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3700     {
3701 gbeauche 1.27 const int CPUID_SPACE = 4096;
3702     uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3703     if (cpuid_space == VM_MAP_FAILED)
3704     abort();
3705     vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3706    
3707 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3708 gbeauche 1.3 uae_u8* tmp=get_target();
3709 gbeauche 1.1
3710 gbeauche 1.20 s_op = op;
3711 gbeauche 1.3 set_target(cpuid_space);
3712     raw_push_l_r(0); /* eax */
3713     raw_push_l_r(1); /* ecx */
3714     raw_push_l_r(2); /* edx */
3715     raw_push_l_r(3); /* ebx */
3716 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3717 gbeauche 1.3 raw_cpuid(0);
3718 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3719     raw_mov_l_mr((uintptr)&s_ebx,3);
3720     raw_mov_l_mr((uintptr)&s_ecx,1);
3721     raw_mov_l_mr((uintptr)&s_edx,2);
3722 gbeauche 1.3 raw_pop_l_r(3);
3723     raw_pop_l_r(2);
3724     raw_pop_l_r(1);
3725     raw_pop_l_r(0);
3726     raw_ret();
3727     set_target(tmp);
3728 gbeauche 1.1
3729 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3730 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3731     if (ebx != NULL) *ebx = s_ebx;
3732     if (ecx != NULL) *ecx = s_ecx;
3733     if (edx != NULL) *edx = s_edx;
3734 gbeauche 1.27
3735     vm_release(cpuid_space, CPUID_SPACE);
3736 gbeauche 1.1 }
3737    
3738 gbeauche 1.3 static void
3739     raw_init_cpu(void)
3740 gbeauche 1.1 {
3741 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3742    
3743     /* Defaults */
3744 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3745 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3746     c->cpuid_level = -1; /* CPUID not detected */
3747     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3748     c->x86_vendor_id[0] = '\0'; /* Unset */
3749     c->x86_hwcap = 0;
3750    
3751     /* Get vendor name */
3752     c->x86_vendor_id[12] = '\0';
3753     cpuid(0x00000000,
3754     (uae_u32 *)&c->cpuid_level,
3755     (uae_u32 *)&c->x86_vendor_id[0],
3756     (uae_u32 *)&c->x86_vendor_id[8],
3757     (uae_u32 *)&c->x86_vendor_id[4]);
3758     x86_get_cpu_vendor(c);
3759    
3760     /* Intel-defined flags: level 0x00000001 */
3761     c->x86_brand_id = 0;
3762     if ( c->cpuid_level >= 0x00000001 ) {
3763     uae_u32 tfms, brand_id;
3764     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3765     c->x86 = (tfms >> 8) & 15;
3766 gbeauche 1.29 if (c->x86 == 0xf)
3767     c->x86 += (tfms >> 20) & 0xff; /* extended family */
3768 gbeauche 1.3 c->x86_model = (tfms >> 4) & 15;
3769 gbeauche 1.29 if (c->x86_model == 0xf)
3770     c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3771 gbeauche 1.3 c->x86_brand_id = brand_id & 0xff;
3772     c->x86_mask = tfms & 15;
3773     } else {
3774     /* Have CPUID level 0 only - unheard of */
3775     c->x86 = 4;
3776     }
3777    
3778 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3779     uae_u32 xlvl;
3780     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3781     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3782     if ( xlvl >= 0x80000001 ) {
3783 gbeauche 1.28 uae_u32 features, extra_features;
3784     cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3785 gbeauche 1.16 if (features & (1 << 29)) {
3786     /* Assume x86-64 if long mode is supported */
3787 gbeauche 1.28 c->x86_processor = X86_PROCESSOR_X86_64;
3788 gbeauche 1.16 }
3789 gbeauche 1.28 if (extra_features & (1 << 0))
3790     have_lahf_lm = true;
3791 gbeauche 1.16 }
3792     }
3793    
3794 gbeauche 1.3 /* Canonicalize processor ID */
3795     switch (c->x86) {
3796     case 3:
3797     c->x86_processor = X86_PROCESSOR_I386;
3798     break;
3799     case 4:
3800     c->x86_processor = X86_PROCESSOR_I486;
3801     break;
3802     case 5:
3803     if (c->x86_vendor == X86_VENDOR_AMD)
3804     c->x86_processor = X86_PROCESSOR_K6;
3805     else
3806     c->x86_processor = X86_PROCESSOR_PENTIUM;
3807     break;
3808     case 6:
3809     if (c->x86_vendor == X86_VENDOR_AMD)
3810     c->x86_processor = X86_PROCESSOR_ATHLON;
3811     else
3812     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3813     break;
3814     case 15:
3815 gbeauche 1.29 if (c->x86_processor == X86_PROCESSOR_max) {
3816     switch (c->x86_vendor) {
3817     case X86_VENDOR_INTEL:
3818     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3819     break;
3820     case X86_VENDOR_AMD:
3821     /* Assume a 32-bit Athlon processor if not in long mode */
3822     c->x86_processor = X86_PROCESSOR_ATHLON;
3823     break;
3824     }
3825     }
3826     break;
3827 gbeauche 1.3 }
3828     if (c->x86_processor == X86_PROCESSOR_max) {
3829 gbeauche 1.30 c->x86_processor = X86_PROCESSOR_I386;
3830     fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3831 gbeauche 1.3 fprintf(stderr, " Family : %d\n", c->x86);
3832     fprintf(stderr, " Model : %d\n", c->x86_model);
3833     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3834 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3835 gbeauche 1.3 if (c->x86_brand_id)
3836     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3837     }
3838    
3839     /* Have CMOV support? */
3840 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3841 gbeauche 1.3
3842     /* Can the host CPU suffer from partial register stalls? */
3843     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3844     #if 1
3845     /* It appears that partial register writes are a bad idea even on
3846 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3847     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3848 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3849     have_rat_stall = true;
3850 gbeauche 1.1 #endif
3851 gbeauche 1.3
3852     /* Alignments */
3853     if (tune_alignment) {
3854     align_loops = x86_alignments[c->x86_processor].align_loop;
3855     align_jumps = x86_alignments[c->x86_processor].align_jump;
3856     }
3857    
3858     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3859     c->cpuid_level, c->x86_vendor_id,
3860     x86_processor_string_table[c->x86_processor]);
3861 gbeauche 1.1 }
3862    
3863 gbeauche 1.10 static bool target_check_bsf(void)
3864     {
3865     bool mismatch = false;
3866     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3867     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3868     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3869     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3870     for (int value = -1; value <= 1; value++) {
3871 gbeauche 1.25 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3872     unsigned long tmp = value;
3873 gbeauche 1.10 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3874 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3875 gbeauche 1.10 int OF = (flags >> 11) & 1;
3876     int SF = (flags >> 7) & 1;
3877     int ZF = (flags >> 6) & 1;
3878     int CF = flags & 1;
3879     tmp = (value == 0);
3880     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3881     mismatch = true;
3882     }
3883     }}}}
3884     if (mismatch)
3885     write_log("Target CPU defines all flags on BSF instruction\n");
3886     return !mismatch;
3887     }
3888    
3889 gbeauche 1.1
3890     /*************************************************************************
3891     * FPU stuff *
3892     *************************************************************************/
3893    
3894    
3895     static __inline__ void raw_fp_init(void)
3896     {
3897     int i;
3898    
3899     for (i=0;i<N_FREGS;i++)
3900     live.spos[i]=-2;
3901     live.tos=-1; /* Stack is empty */
3902     }
3903    
3904     static __inline__ void raw_fp_cleanup_drop(void)
3905     {
3906     #if 0
3907     /* using FINIT instead of popping all the entries.
3908     Seems to have side effects --- there is display corruption in
3909     Quake when this is used */
3910     if (live.tos>1) {
3911     emit_byte(0x9b);
3912     emit_byte(0xdb);
3913     emit_byte(0xe3);
3914     live.tos=-1;
3915     }
3916     #endif
3917     while (live.tos>=1) {
3918     emit_byte(0xde);
3919     emit_byte(0xd9);
3920     live.tos-=2;
3921     }
3922     while (live.tos>=0) {
3923     emit_byte(0xdd);
3924     emit_byte(0xd8);
3925     live.tos--;
3926     }
3927     raw_fp_init();
3928     }
3929    
3930     static __inline__ void make_tos(int r)
3931     {
3932     int p,q;
3933    
3934     if (live.spos[r]<0) { /* Register not yet on stack */
3935     emit_byte(0xd9);
3936     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3937     live.tos++;
3938     live.spos[r]=live.tos;
3939     live.onstack[live.tos]=r;
3940     return;
3941     }
3942     /* Register is on stack */
3943     if (live.tos==live.spos[r])
3944     return;
3945     p=live.spos[r];
3946     q=live.onstack[live.tos];
3947    
3948     emit_byte(0xd9);
3949     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3950     live.onstack[live.tos]=r;
3951     live.spos[r]=live.tos;
3952     live.onstack[p]=q;
3953     live.spos[q]=p;
3954     }
3955    
3956     static __inline__ void make_tos2(int r, int r2)
3957     {
3958     int q;
3959    
3960     make_tos(r2); /* Put the reg that's supposed to end up in position2
3961     on top */
3962    
3963     if (live.spos[r]<0) { /* Register not yet on stack */
3964     make_tos(r); /* This will extend the stack */
3965     return;
3966     }
3967     /* Register is on stack */
3968     emit_byte(0xd9);
3969     emit_byte(0xc9); /* Move r2 into position 2 */
3970    
3971     q=live.onstack[live.tos-1];
3972     live.onstack[live.tos]=q;
3973     live.spos[q]=live.tos;
3974     live.onstack[live.tos-1]=r2;
3975     live.spos[r2]=live.tos-1;
3976    
3977     make_tos(r); /* And r into 1 */
3978     }
3979    
3980     static __inline__ int stackpos(int r)
3981     {
3982     if (live.spos[r]<0)
3983     abort();
3984     if (live.tos<live.spos[r]) {
3985     printf("Looking for spos for fnreg %d\n",r);
3986     abort();
3987     }
3988     return live.tos-live.spos[r];
3989     }
3990    
3991     static __inline__ void usereg(int r)
3992     {
3993     if (live.spos[r]<0)
3994     make_tos(r);
3995     }
3996    
3997     /* This is called with one FP value in a reg *above* tos, which it will
3998     pop off the stack if necessary */
3999     static __inline__ void tos_make(int r)
4000     {
4001     if (live.spos[r]<0) {
4002     live.tos++;
4003     live.spos[r]=live.tos;
4004     live.onstack[live.tos]=r;
4005     return;
4006     }
4007     emit_byte(0xdd);
4008     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
4009     and pop it*/
4010     }
4011 gbeauche 1.23
4012     /* FP helper functions */
4013     #if USE_NEW_RTASM
4014     #define DEFINE_OP(NAME, GEN) \
4015     static inline void raw_##NAME(uint32 m) \
4016     { \
4017     GEN(m, X86_NOREG, X86_NOREG, 1); \
4018     }
4019     DEFINE_OP(fstl, FSTLm);
4020     DEFINE_OP(fstpl, FSTPLm);
4021     DEFINE_OP(fldl, FLDLm);
4022     DEFINE_OP(fildl, FILDLm);
4023     DEFINE_OP(fistl, FISTLm);
4024     DEFINE_OP(flds, FLDSm);
4025     DEFINE_OP(fsts, FSTSm);
4026     DEFINE_OP(fstpt, FSTPTm);
4027     DEFINE_OP(fldt, FLDTm);
4028     #else
4029     #define DEFINE_OP(NAME, OP1, OP2) \
4030     static inline void raw_##NAME(uint32 m) \
4031     { \
4032     emit_byte(OP1); \
4033     emit_byte(OP2); \
4034     emit_long(m); \
4035     }
4036     DEFINE_OP(fstl, 0xdd, 0x15);
4037     DEFINE_OP(fstpl, 0xdd, 0x1d);
4038     DEFINE_OP(fldl, 0xdd, 0x05);
4039     DEFINE_OP(fildl, 0xdb, 0x05);
4040     DEFINE_OP(fistl, 0xdb, 0x15);
4041     DEFINE_OP(flds, 0xd9, 0x05);
4042     DEFINE_OP(fsts, 0xd9, 0x15);
4043     DEFINE_OP(fstpt, 0xdb, 0x3d);
4044     DEFINE_OP(fldt, 0xdb, 0x2d);
4045     #endif
4046     #undef DEFINE_OP
4047    
4048 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4049     {
4050     make_tos(r);
4051 gbeauche 1.23 raw_fstl(m);
4052 gbeauche 1.1 }
4053     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4054    
4055     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4056     {
4057     make_tos(r);
4058 gbeauche 1.23 raw_fstpl(m);
4059 gbeauche 1.1 live.onstack[live.tos]=-1;
4060     live.tos--;
4061     live.spos[r]=-2;
4062     }
4063     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4064    
4065     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4066     {
4067 gbeauche 1.23 raw_fldl(m);
4068 gbeauche 1.1 tos_make(r);
4069     }
4070     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4071    
4072     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4073     {
4074 gbeauche 1.23 raw_fildl(m);
4075 gbeauche 1.1 tos_make(r);
4076     }
4077     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4078    
4079     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4080     {
4081     make_tos(r);
4082 gbeauche 1.23 raw_fistl(m);
4083 gbeauche 1.1 }
4084     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4085    
4086     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4087     {
4088 gbeauche 1.23 raw_flds(m);
4089 gbeauche 1.1 tos_make(r);
4090     }
4091     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4092    
4093     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4094     {
4095     make_tos(r);
4096 gbeauche 1.23 raw_fsts(m);
4097 gbeauche 1.1 }
4098     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4099    
4100     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4101     {
4102     int rs;
4103    
4104     /* Stupid x87 can't write a long double to mem without popping the
4105     stack! */
4106     usereg(r);
4107     rs=stackpos(r);
4108     emit_byte(0xd9); /* Get a copy to the top of stack */
4109     emit_byte(0xc0+rs);
4110    
4111 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4112 gbeauche 1.1 }
4113     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4114    
4115     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4116     {
4117     int rs;
4118    
4119     make_tos(r);
4120 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4121 gbeauche 1.1 live.onstack[live.tos]=-1;
4122     live.tos--;
4123     live.spos[r]=-2;
4124     }
4125     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4126    
4127     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4128     {
4129 gbeauche 1.23 raw_fldt(m);
4130 gbeauche 1.1 tos_make(r);
4131     }
4132     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4133    
4134     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4135     {
4136     emit_byte(0xd9);
4137     emit_byte(0xeb);
4138     tos_make(r);
4139     }
4140     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4141    
4142     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4143     {
4144     emit_byte(0xd9);
4145     emit_byte(0xec);
4146     tos_make(r);
4147     }
4148     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4149    
4150     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4151     {
4152     emit_byte(0xd9);
4153     emit_byte(0xea);
4154     tos_make(r);
4155     }
4156     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4157    
4158     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4159     {
4160     emit_byte(0xd9);
4161     emit_byte(0xed);
4162     tos_make(r);
4163     }
4164     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4165    
4166     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4167     {
4168     emit_byte(0xd9);
4169     emit_byte(0xe8);
4170     tos_make(r);
4171     }
4172     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4173    
4174     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4175     {
4176     emit_byte(0xd9);
4177     emit_byte(0xee);
4178     tos_make(r);
4179     }
4180     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4181    
4182     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4183     {
4184     int ds;
4185    
4186     usereg(s);
4187     ds=stackpos(s);
4188     if (ds==0 && live.spos[d]>=0) {
4189     /* source is on top of stack, and we already have the dest */
4190     int dd=stackpos(d);
4191     emit_byte(0xdd);
4192     emit_byte(0xd0+dd);
4193     }
4194     else {
4195     emit_byte(0xd9);
4196     emit_byte(0xc0+ds); /* duplicate source on tos */
4197     tos_make(d); /* store to destination, pop if necessary */
4198     }
4199     }
4200     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4201    
4202     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4203     {
4204     emit_byte(0xd9);
4205     emit_byte(0xa8+index);
4206     emit_long(base);
4207     }
4208     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4209    
4210    
4211     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4212     {
4213     int ds;
4214    
4215     if (d!=s) {
4216     usereg(s);
4217     ds=stackpos(s);
4218     emit_byte(0xd9);
4219     emit_byte(0xc0+ds); /* duplicate source */
4220     emit_byte(0xd9);
4221     emit_byte(0xfa); /* take square root */
4222     tos_make(d); /* store to destination */
4223     }
4224     else {
4225     make_tos(d);
4226     emit_byte(0xd9);
4227     emit_byte(0xfa); /* take square root */
4228     }
4229     }
4230     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4231    
4232     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4233     {
4234     int ds;
4235    
4236     if (d!=s) {
4237     usereg(s);
4238     ds=stackpos(s);
4239     emit_byte(0xd9);
4240     emit_byte(0xc0+ds); /* duplicate source */
4241     emit_byte(0xd9);
4242     emit_byte(0xe1); /* take fabs */
4243     tos_make(d); /* store to destination */
4244     }
4245     else {
4246     make_tos(d);
4247     emit_byte(0xd9);
4248     emit_byte(0xe1); /* take fabs */
4249     }
4250     }
4251     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4252    
4253     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4254     {
4255     int ds;
4256    
4257     if (d!=s) {
4258     usereg(s);
4259     ds=stackpos(s);
4260     emit_byte(0xd9);
4261     emit_byte(0xc0+ds); /* duplicate source */
4262     emit_byte(0xd9);
4263     emit_byte(0xfc); /* take frndint */
4264     tos_make(d); /* store to destination */
4265     }
4266     else {
4267     make_tos(d);
4268     emit_byte(0xd9);
4269     emit_byte(0xfc); /* take frndint */
4270     }
4271     }
4272     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4273    
4274     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4275     {
4276     int ds;
4277    
4278     if (d!=s) {
4279     usereg(s);
4280     ds=stackpos(s);
4281     emit_byte(0xd9);
4282     emit_byte(0xc0+ds); /* duplicate source */
4283     emit_byte(0xd9);
4284     emit_byte(0xff); /* take cos */
4285     tos_make(d); /* store to destination */
4286     }
4287     else {
4288     make_tos(d);
4289     emit_byte(0xd9);
4290     emit_byte(0xff); /* take cos */
4291     }
4292     }
4293     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4294    
4295     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4296     {
4297     int ds;
4298    
4299     if (d!=s) {
4300     usereg(s);
4301     ds=stackpos(s);
4302     emit_byte(0xd9);
4303     emit_byte(0xc0+ds); /* duplicate source */
4304     emit_byte(0xd9);
4305     emit_byte(0xfe); /* take sin */
4306     tos_make(d); /* store to destination */
4307     }
4308     else {
4309     make_tos(d);
4310     emit_byte(0xd9);
4311     emit_byte(0xfe); /* take sin */
4312     }
4313     }
4314     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4315    
4316 gbeauche 1.34 static const double one=1;
4317 gbeauche 1.1 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4318     {
4319     int ds;
4320    
4321     usereg(s);
4322     ds=stackpos(s);
4323     emit_byte(0xd9);
4324     emit_byte(0xc0+ds); /* duplicate source */
4325    
4326     emit_byte(0xd9);
4327     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4328     emit_byte(0xd9);
4329     emit_byte(0xfc); /* rndint */
4330     emit_byte(0xd9);
4331     emit_byte(0xc9); /* swap top two elements */
4332     emit_byte(0xd8);
4333     emit_byte(0xe1); /* subtract rounded from original */
4334     emit_byte(0xd9);
4335     emit_byte(0xf0); /* f2xm1 */
4336 gbeauche 1.34 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4337 gbeauche 1.1 emit_byte(0xd9);
4338     emit_byte(0xfd); /* and scale it */
4339     emit_byte(0xdd);
4340     emit_byte(0xd9); /* take he rounded value off */
4341     tos_make(d); /* store to destination */
4342     }
4343     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4344    
4345     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4346     {
4347     int ds;
4348    
4349     usereg(s);
4350     ds=stackpos(s);
4351     emit_byte(0xd9);
4352     emit_byte(0xc0+ds); /* duplicate source */
4353     emit_byte(0xd9);
4354     emit_byte(0xea); /* fldl2e */
4355     emit_byte(0xde);
4356     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4357    
4358     emit_byte(0xd9);
4359     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4360     emit_byte(0xd9);
4361     emit_byte(0xfc); /* rndint */
4362     emit_byte(0xd9);
4363     emit_byte(0xc9); /* swap top two elements */
4364     emit_byte(0xd8);
4365     emit_byte(0xe1); /* subtract rounded from original */
4366     emit_byte(0xd9);
4367     emit_byte(0xf0); /* f2xm1 */
4368 gbeauche 1.34 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4369 gbeauche 1.1 emit_byte(0xd9);
4370     emit_byte(0xfd); /* and scale it */
4371     emit_byte(0xdd);
4372     emit_byte(0xd9); /* take he rounded value off */
4373     tos_make(d); /* store to destination */
4374     }
4375     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4376    
4377     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4378     {
4379     int ds;
4380    
4381     usereg(s);
4382     ds=stackpos(s);
4383     emit_byte(0xd9);
4384     emit_byte(0xc0+ds); /* duplicate source */
4385     emit_byte(0xd9);
4386     emit_byte(0xe8); /* push '1' */
4387     emit_byte(0xd9);
4388     emit_byte(0xc9); /* swap top two */
4389     emit_byte(0xd9);
4390     emit_byte(0xf1); /* take 1*log2(x) */
4391     tos_make(d); /* store to destination */
4392     }
4393     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4394    
4395    
4396     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4397     {
4398     int ds;
4399    
4400     if (d!=s) {
4401     usereg(s);
4402     ds=stackpos(s);
4403     emit_byte(0xd9);
4404     emit_byte(0xc0+ds); /* duplicate source */
4405     emit_byte(0xd9);
4406     emit_byte(0xe0); /* take fchs */
4407     tos_make(d); /* store to destination */
4408     }
4409     else {
4410     make_tos(d);
4411     emit_byte(0xd9);
4412     emit_byte(0xe0); /* take fchs */
4413     }
4414     }
4415     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4416    
4417     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4418     {
4419     int ds;
4420    
4421     usereg(s);
4422     usereg(d);
4423    
4424     if (live.spos[s]==live.tos) {
4425     /* Source is on top of stack */
4426     ds=stackpos(d);
4427     emit_byte(0xdc);
4428     emit_byte(0xc0+ds); /* add source to dest*/
4429     }
4430     else {
4431     make_tos(d);
4432     ds=stackpos(s);
4433    
4434     emit_byte(0xd8);
4435     emit_byte(0xc0+ds); /* add source to dest*/
4436     }
4437     }
4438     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4439    
4440     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4441     {
4442     int ds;
4443    
4444     usereg(s);
4445     usereg(d);
4446    
4447     if (live.spos[s]==live.tos) {
4448     /* Source is on top of stack */
4449     ds=stackpos(d);
4450     emit_byte(0xdc);
4451     emit_byte(0xe8+ds); /* sub source from dest*/
4452     }
4453     else {
4454     make_tos(d);
4455     ds=stackpos(s);
4456    
4457     emit_byte(0xd8);
4458     emit_byte(0xe0+ds); /* sub src from dest */
4459     }
4460     }
4461     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4462    
4463     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4464     {
4465     int ds;
4466    
4467     usereg(s);
4468     usereg(d);
4469    
4470     make_tos(d);
4471     ds=stackpos(s);
4472    
4473     emit_byte(0xdd);
4474     emit_byte(0xe0+ds); /* cmp dest with source*/
4475     }
4476     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4477    
4478     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4479     {
4480     int ds;
4481    
4482     usereg(s);
4483     usereg(d);
4484    
4485     if (live.spos[s]==live.tos) {
4486     /* Source is on top of stack */
4487     ds=stackpos(d);
4488     emit_byte(0xdc);
4489     emit_byte(0xc8+ds); /* mul dest by source*/
4490     }
4491     else {
4492     make_tos(d);
4493     ds=stackpos(s);
4494    
4495     emit_byte(0xd8);
4496     emit_byte(0xc8+ds); /* mul dest by source*/
4497     }
4498     }
4499     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4500    
4501     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4502     {
4503     int ds;
4504    
4505     usereg(s);
4506     usereg(d);
4507    
4508     if (live.spos[s]==live.tos) {
4509     /* Source is on top of stack */
4510     ds=stackpos(d);
4511     emit_byte(0xdc);
4512     emit_byte(0xf8+ds); /* div dest by source */
4513     }
4514     else {
4515     make_tos(d);
4516     ds=stackpos(s);
4517    
4518     emit_byte(0xd8);
4519     emit_byte(0xf0+ds); /* div dest by source*/
4520     }
4521     }
4522     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4523    
4524     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4525     {
4526     int ds;
4527    
4528     usereg(s);
4529     usereg(d);
4530    
4531     make_tos2(d,s);
4532     ds=stackpos(s);
4533    
4534     if (ds!=1) {
4535     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4536     abort();
4537     }
4538     emit_byte(0xd9);
4539     emit_byte(0xf8); /* take rem from dest by source */
4540     }
4541     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4542    
4543     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4544     {
4545     int ds;
4546    
4547     usereg(s);
4548     usereg(d);
4549    
4550     make_tos2(d,s);
4551     ds=stackpos(s);
4552    
4553     if (ds!=1) {
4554     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4555     abort();
4556     }
4557     emit_byte(0xd9);
4558     emit_byte(0xf5); /* take rem1 from dest by source */
4559     }
4560     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4561    
4562    
4563     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4564     {
4565     make_tos(r);
4566     emit_byte(0xd9); /* ftst */
4567     emit_byte(0xe4);
4568     }
4569     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4570    
4571     /* %eax register is clobbered if target processor doesn't support fucomi */
4572     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4573     #define FFLAG_NREG EAX_INDEX
4574    
4575     static __inline__ void raw_fflags_into_flags(int r)
4576     {
4577     int p;
4578    
4579     usereg(r);
4580     p=stackpos(r);
4581    
4582     emit_byte(0xd9);
4583     emit_byte(0xee); /* Push 0 */
4584     emit_byte(0xd9);
4585     emit_byte(0xc9+p); /* swap top two around */
4586     if (have_cmov) {
4587     // gb-- fucomi is for P6 cores only, not K6-2 then...
4588     emit_byte(0xdb);
4589     emit_byte(0xe9+p); /* fucomi them */
4590     }
4591     else {
4592     emit_byte(0xdd);
4593     emit_byte(0xe1+p); /* fucom them */
4594     emit_byte(0x9b);
4595     emit_byte(0xdf);
4596     emit_byte(0xe0); /* fstsw ax */
4597     raw_sahf(0); /* sahf */
4598     }
4599     emit_byte(0xdd);
4600     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4601     }