ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.31
Committed: 2006-01-15T22:42:51Z (18 years, 8 months ago) by gbeauche
Branch: MAIN
Changes since 1.30: +9 -1 lines
Log Message:
fix stack alignment (theoritically but it was OK in practise) in generated
functions, move m68k_compile_execute() to compiler/ dir since it's JIT
generic and it now depends on USE_PUSH_POP (as it should)

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.26 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 gbeauche 1.26 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.1
56     /* The register in which subroutines return an integer return value */
57 gbeauche 1.20 #define REG_RESULT EAX_INDEX
58 gbeauche 1.1
59     /* The registers subroutines take their first and second argument in */
60     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61     /* Handle the _fastcall parameters of ECX and EDX */
62 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
63     #define REG_PAR2 EDX_INDEX
64     #elif defined(__x86_64__)
65     #define REG_PAR1 EDI_INDEX
66     #define REG_PAR2 ESI_INDEX
67 gbeauche 1.1 #else
68 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
69     #define REG_PAR2 EDX_INDEX
70 gbeauche 1.1 #endif
71    
72 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
75 gbeauche 1.1 #else
76 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 gbeauche 1.1 #endif
78    
79 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 gbeauche 1.1 -1 if any reg will do */
81 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83 gbeauche 1.1
84 gbeauche 1.31 #define STACK_ALIGN 16
85     #define STACK_OFFSET sizeof(void *)
86    
87 gbeauche 1.1 uae_s8 always_used[]={4,-1};
88 gbeauche 1.20 #if defined(__x86_64__)
89     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
90     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
91     #else
92 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
93     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
94 gbeauche 1.20 #endif
95 gbeauche 1.1
96 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
97     /* Make sure interpretive core does not use cpuopti */
98     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
99 gbeauche 1.20 #error FIXME: code not ready
100 gbeauche 1.17 #else
101 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
102     by the caller */
103 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
104 gbeauche 1.17 #endif
105 gbeauche 1.1
106     /* This *should* be the same as call_saved. But:
107     - We might not really know which registers are saved, and which aren't,
108     so we need to preserve some, but don't want to rely on everyone else
109     also saving those registers
110     - Special registers (such like the stack pointer) should not be "preserved"
111     by pushing, even though they are "saved" across function calls
112     */
113 gbeauche 1.21 #if defined(__x86_64__)
114     /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
115 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
116     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
117 gbeauche 1.21 #else
118     static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
119     #endif
120 gbeauche 1.1
121     /* Whether classes of instructions do or don't clobber the native flags */
122     #define CLOBBER_MOV
123     #define CLOBBER_LEA
124     #define CLOBBER_CMOV
125     #define CLOBBER_POP
126     #define CLOBBER_PUSH
127     #define CLOBBER_SUB clobber_flags()
128     #define CLOBBER_SBB clobber_flags()
129     #define CLOBBER_CMP clobber_flags()
130     #define CLOBBER_ADD clobber_flags()
131     #define CLOBBER_ADC clobber_flags()
132     #define CLOBBER_AND clobber_flags()
133     #define CLOBBER_OR clobber_flags()
134     #define CLOBBER_XOR clobber_flags()
135    
136     #define CLOBBER_ROL clobber_flags()
137     #define CLOBBER_ROR clobber_flags()
138     #define CLOBBER_SHLL clobber_flags()
139     #define CLOBBER_SHRL clobber_flags()
140     #define CLOBBER_SHRA clobber_flags()
141     #define CLOBBER_TEST clobber_flags()
142     #define CLOBBER_CL16
143     #define CLOBBER_CL8
144 gbeauche 1.20 #define CLOBBER_SE32
145 gbeauche 1.1 #define CLOBBER_SE16
146     #define CLOBBER_SE8
147 gbeauche 1.20 #define CLOBBER_ZE32
148 gbeauche 1.1 #define CLOBBER_ZE16
149     #define CLOBBER_ZE8
150     #define CLOBBER_SW16 clobber_flags()
151     #define CLOBBER_SW32
152     #define CLOBBER_SETCC
153     #define CLOBBER_MUL clobber_flags()
154     #define CLOBBER_BT clobber_flags()
155     #define CLOBBER_BSF clobber_flags()
156    
157 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
158 gbeauche 1.20 #if defined(__x86_64__)
159     #define USE_NEW_RTASM 1
160     #endif
161    
162     #if USE_NEW_RTASM
163 gbeauche 1.13
164     #if defined(__x86_64__)
165     #define X86_TARGET_64BIT 1
166     #endif
167     #define X86_FLAT_REGISTERS 0
168 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
169     #define X86_OPTIMIZE_ROTSHI 1
170 gbeauche 1.13 #include "codegen_x86.h"
171    
172     #define x86_emit_byte(B) emit_byte(B)
173     #define x86_emit_word(W) emit_word(W)
174     #define x86_emit_long(L) emit_long(L)
175 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
176 gbeauche 1.13 #define x86_get_target() get_target()
177     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
178    
179     static void jit_fail(const char *msg, const char *file, int line, const char *function)
180     {
181     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
182     function, file, line, msg);
183     abort();
184     }
185    
186     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
187     {
188 gbeauche 1.20 #if defined(__x86_64__)
189     PUSHQr(r);
190     #else
191 gbeauche 1.13 PUSHLr(r);
192 gbeauche 1.20 #endif
193 gbeauche 1.13 }
194     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
195    
196     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
197     {
198 gbeauche 1.20 #if defined(__x86_64__)
199     POPQr(r);
200     #else
201 gbeauche 1.13 POPLr(r);
202 gbeauche 1.20 #endif
203 gbeauche 1.13 }
204     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
205    
206 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
207     {
208     #if defined(__x86_64__)
209     POPQm(d, X86_NOREG, X86_NOREG, 1);
210     #else
211     POPLm(d, X86_NOREG, X86_NOREG, 1);
212     #endif
213     }
214     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
215    
216 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
217     {
218     BTLir(i, r);
219     }
220     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
221    
222     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
223     {
224     BTLrr(b, r);
225     }
226     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
227    
228     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
229     {
230     BTCLir(i, r);
231     }
232     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
233    
234     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
235     {
236     BTCLrr(b, r);
237     }
238     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
239    
240     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
241     {
242     BTRLir(i, r);
243     }
244     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
245    
246     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
247     {
248     BTRLrr(b, r);
249     }
250     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
251    
252     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
253     {
254     BTSLir(i, r);
255     }
256     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
257    
258     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
259     {
260     BTSLrr(b, r);
261     }
262     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
263    
264     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
265     {
266     SUBWir(i, d);
267     }
268     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
269    
270     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
271     {
272     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
273     }
274     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
275    
276     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
277     {
278     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
279     }
280     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
281    
282     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
283     {
284     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
285     }
286     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
287    
288     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
289     {
290     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
291     }
292     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
293    
294     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
295     {
296     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
297     }
298     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
299    
300     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
301     {
302     ROLBir(i, r);
303     }
304     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
305    
306     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
307     {
308     ROLWir(i, r);
309     }
310     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
311    
312     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
313     {
314     ROLLir(i, r);
315     }
316     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
317    
318     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
319     {
320     ROLLrr(r, d);
321     }
322     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
323    
324     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
325     {
326     ROLWrr(r, d);
327     }
328     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
329    
330     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
331     {
332     ROLBrr(r, d);
333     }
334     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
335    
336     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
337     {
338     SHLLrr(r, d);
339     }
340     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
341    
342     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
343     {
344     SHLWrr(r, d);
345     }
346     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
347    
348     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
349     {
350     SHLBrr(r, d);
351     }
352     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
353    
354     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
355     {
356     RORBir(i, r);
357     }
358     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
359    
360     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
361     {
362     RORWir(i, r);
363     }
364     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
365    
366     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
367     {
368     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
369     }
370     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
371    
372     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
373     {
374     RORLir(i, r);
375     }
376     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
377    
378     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
379     {
380     RORLrr(r, d);
381     }
382     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
383    
384     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
385     {
386     RORWrr(r, d);
387     }
388     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
389    
390     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
391     {
392     RORBrr(r, d);
393     }
394     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
395    
396     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
397     {
398     SHRLrr(r, d);
399     }
400     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
401    
402     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
403     {
404     SHRWrr(r, d);
405     }
406     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
407    
408     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
409     {
410     SHRBrr(r, d);
411     }
412     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
413    
414     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
415     {
416 gbeauche 1.14 SARLrr(r, d);
417 gbeauche 1.13 }
418     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
419    
420     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
421     {
422 gbeauche 1.14 SARWrr(r, d);
423 gbeauche 1.13 }
424     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
425    
426     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
427     {
428 gbeauche 1.14 SARBrr(r, d);
429 gbeauche 1.13 }
430     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
431    
432     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
433     {
434     SHLLir(i, r);
435     }
436     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
437    
438     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
439     {
440     SHLWir(i, r);
441     }
442     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
443    
444     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
445     {
446     SHLBir(i, r);
447     }
448     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
449    
450     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
451     {
452     SHRLir(i, r);
453     }
454     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
455    
456     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
457     {
458     SHRWir(i, r);
459     }
460     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
461    
462     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
463     {
464     SHRBir(i, r);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
469     {
470 gbeauche 1.14 SARLir(i, r);
471 gbeauche 1.13 }
472     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
473    
474     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
475     {
476 gbeauche 1.14 SARWir(i, r);
477 gbeauche 1.13 }
478     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
479    
480     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
481     {
482 gbeauche 1.14 SARBir(i, r);
483 gbeauche 1.13 }
484     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
485    
486     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
487     {
488     SAHF();
489     }
490     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
491    
492     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
493     {
494     CPUID();
495     }
496     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
497    
498     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
499     {
500     LAHF();
501     }
502     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
503    
504     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
505     {
506     SETCCir(cc, d);
507     }
508     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
509    
510     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
511     {
512     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
513     }
514     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
515    
516     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
517     {
518 gbeauche 1.15 if (have_cmov)
519     CMOVLrr(cc, s, d);
520     else { /* replacement using branch and mov */
521     #if defined(__x86_64__)
522     write_log("x86-64 implementations are bound to have CMOV!\n");
523     abort();
524     #endif
525     JCCSii(cc^1, 2);
526     MOVLrr(s, d);
527     }
528 gbeauche 1.13 }
529     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
530    
531     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
532     {
533     BSFLrr(s, d);
534     }
535     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
536    
537 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
538     {
539     MOVSLQrr(s, d);
540     }
541     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
542    
543 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
544     {
545     MOVSWLrr(s, d);
546     }
547     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
548    
549     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
550     {
551     MOVSBLrr(s, d);
552     }
553     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
554    
555     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
556     {
557     MOVZWLrr(s, d);
558     }
559     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
560    
561     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
562     {
563     MOVZBLrr(s, d);
564     }
565     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
566    
567     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
568     {
569 gbeauche 1.14 IMULLrr(s, d);
570 gbeauche 1.13 }
571     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
572    
573     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
574     {
575 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
576     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
577 gbeauche 1.13 abort();
578 gbeauche 1.14 }
579     IMULLr(s);
580 gbeauche 1.13 }
581     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
582    
583     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
584     {
585 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
586     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
587 gbeauche 1.13 abort();
588 gbeauche 1.14 }
589     MULLr(s);
590 gbeauche 1.13 }
591     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
592    
593     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
594     {
595 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
596 gbeauche 1.13 }
597     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
598    
599     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
600     {
601     MOVBrr(s, d);
602     }
603     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
604    
605     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
606     {
607     MOVWrr(s, d);
608     }
609     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
610    
611     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
612     {
613     MOVLmr(0, baser, index, factor, d);
614     }
615     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
616    
617     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
618     {
619     MOVWmr(0, baser, index, factor, d);
620     }
621     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
622    
623     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
624     {
625     MOVBmr(0, baser, index, factor, d);
626     }
627     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
628    
629     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
630     {
631     MOVLrm(s, 0, baser, index, factor);
632     }
633     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
634    
635     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
636     {
637     MOVWrm(s, 0, baser, index, factor);
638     }
639     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
640    
641     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
642     {
643     MOVBrm(s, 0, baser, index, factor);
644     }
645     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
646    
647     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
648     {
649     MOVLrm(s, base, baser, index, factor);
650     }
651     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
652    
653     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
654     {
655     MOVWrm(s, base, baser, index, factor);
656     }
657     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
658    
659     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
660     {
661     MOVBrm(s, base, baser, index, factor);
662     }
663     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
664    
665     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
666     {
667     MOVLmr(base, baser, index, factor, d);
668     }
669     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
670    
671     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
672     {
673     MOVWmr(base, baser, index, factor, d);
674     }
675     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
676    
677     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
678     {
679     MOVBmr(base, baser, index, factor, d);
680     }
681     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
682    
683     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
684     {
685     MOVLmr(base, X86_NOREG, index, factor, d);
686     }
687     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
688    
689     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
690     {
691 gbeauche 1.15 if (have_cmov)
692     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
693     else { /* replacement using branch and mov */
694     #if defined(__x86_64__)
695     write_log("x86-64 implementations are bound to have CMOV!\n");
696     abort();
697     #endif
698     JCCSii(cond^1, 7);
699     MOVLmr(base, X86_NOREG, index, factor, d);
700     }
701 gbeauche 1.13 }
702     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
703    
704     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
705     {
706 gbeauche 1.15 if (have_cmov)
707     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
708     else { /* replacement using branch and mov */
709     #if defined(__x86_64__)
710     write_log("x86-64 implementations are bound to have CMOV!\n");
711     abort();
712     #endif
713     JCCSii(cond^1, 6);
714     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
715     }
716 gbeauche 1.13 }
717     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
718    
719     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
720     {
721     MOVLmr(offset, s, X86_NOREG, 1, d);
722     }
723     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
724    
725     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
726     {
727     MOVWmr(offset, s, X86_NOREG, 1, d);
728     }
729     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
730    
731     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
732     {
733     MOVBmr(offset, s, X86_NOREG, 1, d);
734     }
735     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
736    
737     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
738     {
739     MOVLmr(offset, s, X86_NOREG, 1, d);
740     }
741     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
742    
743     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
744     {
745     MOVWmr(offset, s, X86_NOREG, 1, d);
746     }
747     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
748    
749     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
750     {
751     MOVBmr(offset, s, X86_NOREG, 1, d);
752     }
753     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
754    
755     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
756     {
757     MOVLim(i, offset, d, X86_NOREG, 1);
758     }
759     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
760    
761     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
762     {
763     MOVWim(i, offset, d, X86_NOREG, 1);
764     }
765     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
766    
767     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
768     {
769     MOVBim(i, offset, d, X86_NOREG, 1);
770     }
771     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
772    
773     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
774     {
775     MOVLrm(s, offset, d, X86_NOREG, 1);
776     }
777     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
778    
779     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
780     {
781     MOVWrm(s, offset, d, X86_NOREG, 1);
782     }
783     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
784    
785     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
786     {
787     MOVBrm(s, offset, d, X86_NOREG, 1);
788     }
789     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
790    
791     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
792     {
793     LEALmr(offset, s, X86_NOREG, 1, d);
794     }
795     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
796    
797     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
798     {
799     LEALmr(offset, s, index, factor, d);
800     }
801     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
802    
803     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
804     {
805     LEALmr(0, s, index, factor, d);
806     }
807     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
808    
809     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
810     {
811     MOVLrm(s, offset, d, X86_NOREG, 1);
812     }
813     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
814    
815     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
816     {
817     MOVWrm(s, offset, d, X86_NOREG, 1);
818     }
819     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
820    
821     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
822     {
823     MOVBrm(s, offset, d, X86_NOREG, 1);
824     }
825     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
826    
827     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
828     {
829     BSWAPLr(r);
830     }
831     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
832    
833     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
834     {
835     ROLWir(8, r);
836     }
837     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
838    
839     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
840     {
841     MOVLrr(s, d);
842     }
843     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
844    
845     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
846     {
847     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
848     }
849     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
850    
851     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
852     {
853     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
854     }
855     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
856    
857     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
858     {
859     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
860     }
861     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
862    
863     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
864     {
865     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
866     }
867     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
868    
869     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
870     {
871     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
872     }
873     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
874    
875     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
876     {
877     MOVLir(s, d);
878     }
879     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
880    
881     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
882     {
883     MOVWir(s, d);
884     }
885     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
886    
887     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
888     {
889     MOVBir(s, d);
890     }
891     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
892    
893     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
894     {
895     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
896     }
897     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
898    
899     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
900     {
901     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
902     }
903     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
904    
905     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
906     {
907     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
908     }
909     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
910    
911     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
912     {
913     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
914     }
915     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
916    
917     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
918     {
919     TESTLir(i, d);
920     }
921     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
922    
923     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
924     {
925     TESTLrr(s, d);
926     }
927     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
928    
929     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
930     {
931     TESTWrr(s, d);
932     }
933     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
934    
935     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
936     {
937     TESTBrr(s, d);
938     }
939     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
940    
941 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
942     {
943     XORLir(i, d);
944     }
945     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
946    
947 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
948     {
949     ANDLir(i, d);
950     }
951     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
952    
953     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
954     {
955     ANDWir(i, d);
956     }
957     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
958    
959     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
960     {
961     ANDLrr(s, d);
962     }
963     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
964    
965     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
966     {
967     ANDWrr(s, d);
968     }
969     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
970    
971     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
972     {
973     ANDBrr(s, d);
974     }
975     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
976    
977     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
978     {
979     ORLir(i, d);
980     }
981     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
982    
983     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
984     {
985     ORLrr(s, d);
986     }
987     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
988    
989     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
990     {
991     ORWrr(s, d);
992     }
993     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
994    
995     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
996     {
997     ORBrr(s, d);
998     }
999     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1000    
1001     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1002     {
1003     ADCLrr(s, d);
1004     }
1005     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1006    
1007     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1008     {
1009     ADCWrr(s, d);
1010     }
1011     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1012    
1013     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1014     {
1015     ADCBrr(s, d);
1016     }
1017     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1018    
1019     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1020     {
1021     ADDLrr(s, d);
1022     }
1023     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1024    
1025     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1026     {
1027     ADDWrr(s, d);
1028     }
1029     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1030    
1031     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1032     {
1033     ADDBrr(s, d);
1034     }
1035     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1036    
1037     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1038     {
1039     SUBLir(i, d);
1040     }
1041     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1042    
1043     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1044     {
1045     SUBBir(i, d);
1046     }
1047     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1048    
1049     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1050     {
1051     ADDLir(i, d);
1052     }
1053     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1054    
1055     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1056     {
1057     ADDWir(i, d);
1058     }
1059     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1060    
1061     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1062     {
1063     ADDBir(i, d);
1064     }
1065     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1066    
1067     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1068     {
1069     SBBLrr(s, d);
1070     }
1071     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1072    
1073     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1074     {
1075     SBBWrr(s, d);
1076     }
1077     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1078    
1079     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1080     {
1081     SBBBrr(s, d);
1082     }
1083     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1084    
1085     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1086     {
1087     SUBLrr(s, d);
1088     }
1089     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1090    
1091     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1092     {
1093     SUBWrr(s, d);
1094     }
1095     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1096    
1097     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1098     {
1099     SUBBrr(s, d);
1100     }
1101     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1102    
1103     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1104     {
1105     CMPLrr(s, d);
1106     }
1107     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1108    
1109     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1110     {
1111     CMPLir(i, r);
1112     }
1113     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1114    
1115     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1116     {
1117     CMPWrr(s, d);
1118     }
1119     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1120    
1121     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1122     {
1123     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1124     }
1125     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1126    
1127     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1128     {
1129     CMPBir(i, d);
1130     }
1131     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1132    
1133     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1134     {
1135     CMPBrr(s, d);
1136     }
1137     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1138    
1139     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1140     {
1141     CMPLmr(offset, X86_NOREG, index, factor, d);
1142     }
1143     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1144    
1145     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1146     {
1147     XORLrr(s, d);
1148     }
1149     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1150    
1151     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1152     {
1153     XORWrr(s, d);
1154     }
1155     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1156    
1157     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1158     {
1159     XORBrr(s, d);
1160     }
1161     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1162    
1163     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1164     {
1165     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1166     }
1167     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1168    
1169     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1170     {
1171     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1172     }
1173     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1174    
1175     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1176     {
1177     XCHGLrr(r2, r1);
1178     }
1179     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1180    
1181     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1182     {
1183 gbeauche 1.18 PUSHF();
1184 gbeauche 1.13 }
1185     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1186    
1187     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1188     {
1189 gbeauche 1.18 POPF();
1190 gbeauche 1.13 }
1191     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1192    
1193     #else
1194    
1195 gbeauche 1.2 const bool optimize_accum = true;
1196 gbeauche 1.1 const bool optimize_imm8 = true;
1197     const bool optimize_shift_once = true;
1198    
1199     /*************************************************************************
1200     * Actual encoding of the instructions on the target CPU *
1201     *************************************************************************/
1202    
1203 gbeauche 1.2 static __inline__ int isaccum(int r)
1204     {
1205     return (r == EAX_INDEX);
1206     }
1207    
1208 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1209     {
1210     return (x>=-128 && x<=127);
1211     }
1212    
1213     static __inline__ int isword(uae_s32 x)
1214     {
1215     return (x>=-32768 && x<=32767);
1216     }
1217    
1218     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1219     {
1220     emit_byte(0x50+r);
1221     }
1222     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1223    
1224     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1225     {
1226     emit_byte(0x58+r);
1227     }
1228     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1229    
1230 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1231     {
1232     emit_byte(0x8f);
1233     emit_byte(0x05);
1234     emit_long(d);
1235     }
1236     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1237    
1238 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1239     {
1240     emit_byte(0x0f);
1241     emit_byte(0xba);
1242     emit_byte(0xe0+r);
1243     emit_byte(i);
1244     }
1245     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1246    
1247     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1248     {
1249     emit_byte(0x0f);
1250     emit_byte(0xa3);
1251     emit_byte(0xc0+8*b+r);
1252     }
1253     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1254    
1255     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1256     {
1257     emit_byte(0x0f);
1258     emit_byte(0xba);
1259     emit_byte(0xf8+r);
1260     emit_byte(i);
1261     }
1262     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1263    
1264     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1265     {
1266     emit_byte(0x0f);
1267     emit_byte(0xbb);
1268     emit_byte(0xc0+8*b+r);
1269     }
1270     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1271    
1272    
1273     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1274     {
1275     emit_byte(0x0f);
1276     emit_byte(0xba);
1277     emit_byte(0xf0+r);
1278     emit_byte(i);
1279     }
1280     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1281    
1282     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1283     {
1284     emit_byte(0x0f);
1285     emit_byte(0xb3);
1286     emit_byte(0xc0+8*b+r);
1287     }
1288     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1289    
1290     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1291     {
1292     emit_byte(0x0f);
1293     emit_byte(0xba);
1294     emit_byte(0xe8+r);
1295     emit_byte(i);
1296     }
1297     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1298    
1299     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1300     {
1301     emit_byte(0x0f);
1302     emit_byte(0xab);
1303     emit_byte(0xc0+8*b+r);
1304     }
1305     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1306    
1307     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1308     {
1309     emit_byte(0x66);
1310     if (isbyte(i)) {
1311     emit_byte(0x83);
1312     emit_byte(0xe8+d);
1313     emit_byte(i);
1314     }
1315     else {
1316 gbeauche 1.2 if (optimize_accum && isaccum(d))
1317     emit_byte(0x2d);
1318     else {
1319 gbeauche 1.1 emit_byte(0x81);
1320     emit_byte(0xe8+d);
1321 gbeauche 1.2 }
1322 gbeauche 1.1 emit_word(i);
1323     }
1324     }
1325     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1326    
1327    
1328     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1329     {
1330     emit_byte(0x8b);
1331     emit_byte(0x05+8*d);
1332     emit_long(s);
1333     }
1334     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1335    
1336     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1337     {
1338     emit_byte(0xc7);
1339     emit_byte(0x05);
1340     emit_long(d);
1341     emit_long(s);
1342     }
1343     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1344    
1345     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1346     {
1347     emit_byte(0x66);
1348     emit_byte(0xc7);
1349     emit_byte(0x05);
1350     emit_long(d);
1351     emit_word(s);
1352     }
1353     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1354    
1355     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1356     {
1357     emit_byte(0xc6);
1358     emit_byte(0x05);
1359     emit_long(d);
1360     emit_byte(s);
1361     }
1362     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1363    
1364     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1365     {
1366     if (optimize_shift_once && (i == 1)) {
1367     emit_byte(0xd0);
1368     emit_byte(0x05);
1369     emit_long(d);
1370     }
1371     else {
1372     emit_byte(0xc0);
1373     emit_byte(0x05);
1374     emit_long(d);
1375     emit_byte(i);
1376     }
1377     }
1378     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1379    
1380     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1381     {
1382     if (optimize_shift_once && (i == 1)) {
1383     emit_byte(0xd0);
1384     emit_byte(0xc0+r);
1385     }
1386     else {
1387     emit_byte(0xc0);
1388     emit_byte(0xc0+r);
1389     emit_byte(i);
1390     }
1391     }
1392     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1393    
1394     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1395     {
1396     emit_byte(0x66);
1397     emit_byte(0xc1);
1398     emit_byte(0xc0+r);
1399     emit_byte(i);
1400     }
1401     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1402    
1403     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1404     {
1405     if (optimize_shift_once && (i == 1)) {
1406     emit_byte(0xd1);
1407     emit_byte(0xc0+r);
1408     }
1409     else {
1410     emit_byte(0xc1);
1411     emit_byte(0xc0+r);
1412     emit_byte(i);
1413     }
1414     }
1415     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1416    
1417     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1418     {
1419     emit_byte(0xd3);
1420     emit_byte(0xc0+d);
1421     }
1422     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1423    
1424     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1425     {
1426     emit_byte(0x66);
1427     emit_byte(0xd3);
1428     emit_byte(0xc0+d);
1429     }
1430     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1431    
1432     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1433     {
1434     emit_byte(0xd2);
1435     emit_byte(0xc0+d);
1436     }
1437     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1438    
1439     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1440     {
1441     emit_byte(0xd3);
1442     emit_byte(0xe0+d);
1443     }
1444     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1445    
1446     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1447     {
1448     emit_byte(0x66);
1449     emit_byte(0xd3);
1450     emit_byte(0xe0+d);
1451     }
1452     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1453    
1454     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1455     {
1456     emit_byte(0xd2);
1457     emit_byte(0xe0+d);
1458     }
1459     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1460    
1461     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1462     {
1463     if (optimize_shift_once && (i == 1)) {
1464     emit_byte(0xd0);
1465     emit_byte(0xc8+r);
1466     }
1467     else {
1468     emit_byte(0xc0);
1469     emit_byte(0xc8+r);
1470     emit_byte(i);
1471     }
1472     }
1473     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1474    
1475     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1476     {
1477     emit_byte(0x66);
1478     emit_byte(0xc1);
1479     emit_byte(0xc8+r);
1480     emit_byte(i);
1481     }
1482     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1483    
1484     // gb-- used for making an fpcr value in compemu_fpp.cpp
1485     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1486     {
1487     emit_byte(0x0b);
1488     emit_byte(0x05+8*d);
1489     emit_long(s);
1490     }
1491     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1492    
1493     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1494     {
1495     if (optimize_shift_once && (i == 1)) {
1496     emit_byte(0xd1);
1497     emit_byte(0xc8+r);
1498     }
1499     else {
1500     emit_byte(0xc1);
1501     emit_byte(0xc8+r);
1502     emit_byte(i);
1503     }
1504     }
1505     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1506    
1507     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1508     {
1509     emit_byte(0xd3);
1510     emit_byte(0xc8+d);
1511     }
1512     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1513    
1514     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1515     {
1516     emit_byte(0x66);
1517     emit_byte(0xd3);
1518     emit_byte(0xc8+d);
1519     }
1520     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1521    
1522     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1523     {
1524     emit_byte(0xd2);
1525     emit_byte(0xc8+d);
1526     }
1527     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1528    
1529     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1530     {
1531     emit_byte(0xd3);
1532     emit_byte(0xe8+d);
1533     }
1534     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1535    
1536     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1537     {
1538     emit_byte(0x66);
1539     emit_byte(0xd3);
1540     emit_byte(0xe8+d);
1541     }
1542     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1543    
1544     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1545     {
1546     emit_byte(0xd2);
1547     emit_byte(0xe8+d);
1548     }
1549     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1550    
1551     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1552     {
1553     emit_byte(0xd3);
1554     emit_byte(0xf8+d);
1555     }
1556     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1557    
1558     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1559     {
1560     emit_byte(0x66);
1561     emit_byte(0xd3);
1562     emit_byte(0xf8+d);
1563     }
1564     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1565    
1566     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1567     {
1568     emit_byte(0xd2);
1569     emit_byte(0xf8+d);
1570     }
1571     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1572    
1573     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1574     {
1575     if (optimize_shift_once && (i == 1)) {
1576     emit_byte(0xd1);
1577     emit_byte(0xe0+r);
1578     }
1579     else {
1580     emit_byte(0xc1);
1581     emit_byte(0xe0+r);
1582     emit_byte(i);
1583     }
1584     }
1585     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1586    
1587     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1588     {
1589     emit_byte(0x66);
1590     emit_byte(0xc1);
1591     emit_byte(0xe0+r);
1592     emit_byte(i);
1593     }
1594     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1595    
1596     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1597     {
1598     if (optimize_shift_once && (i == 1)) {
1599     emit_byte(0xd0);
1600     emit_byte(0xe0+r);
1601     }
1602     else {
1603     emit_byte(0xc0);
1604     emit_byte(0xe0+r);
1605     emit_byte(i);
1606     }
1607     }
1608     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1609    
1610     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1611     {
1612     if (optimize_shift_once && (i == 1)) {
1613     emit_byte(0xd1);
1614     emit_byte(0xe8+r);
1615     }
1616     else {
1617     emit_byte(0xc1);
1618     emit_byte(0xe8+r);
1619     emit_byte(i);
1620     }
1621     }
1622     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1623    
1624     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1625     {
1626     emit_byte(0x66);
1627     emit_byte(0xc1);
1628     emit_byte(0xe8+r);
1629     emit_byte(i);
1630     }
1631     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1632    
1633     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1634     {
1635     if (optimize_shift_once && (i == 1)) {
1636     emit_byte(0xd0);
1637     emit_byte(0xe8+r);
1638     }
1639     else {
1640     emit_byte(0xc0);
1641     emit_byte(0xe8+r);
1642     emit_byte(i);
1643     }
1644     }
1645     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1646    
1647     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1648     {
1649     if (optimize_shift_once && (i == 1)) {
1650     emit_byte(0xd1);
1651     emit_byte(0xf8+r);
1652     }
1653     else {
1654     emit_byte(0xc1);
1655     emit_byte(0xf8+r);
1656     emit_byte(i);
1657     }
1658     }
1659     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1660    
1661     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1662     {
1663     emit_byte(0x66);
1664     emit_byte(0xc1);
1665     emit_byte(0xf8+r);
1666     emit_byte(i);
1667     }
1668     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1669    
1670     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1671     {
1672     if (optimize_shift_once && (i == 1)) {
1673     emit_byte(0xd0);
1674     emit_byte(0xf8+r);
1675     }
1676     else {
1677     emit_byte(0xc0);
1678     emit_byte(0xf8+r);
1679     emit_byte(i);
1680     }
1681     }
1682     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1683    
1684     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1685     {
1686     emit_byte(0x9e);
1687     }
1688     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1689    
1690     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1691     {
1692     emit_byte(0x0f);
1693     emit_byte(0xa2);
1694     }
1695     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1696    
1697     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1698     {
1699     emit_byte(0x9f);
1700     }
1701     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1702    
1703     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1704     {
1705     emit_byte(0x0f);
1706     emit_byte(0x90+cc);
1707     emit_byte(0xc0+d);
1708     }
1709     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1710    
1711     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1712     {
1713     emit_byte(0x0f);
1714     emit_byte(0x90+cc);
1715     emit_byte(0x05);
1716     emit_long(d);
1717     }
1718     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1719    
1720     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1721     {
1722     if (have_cmov) {
1723     emit_byte(0x0f);
1724     emit_byte(0x40+cc);
1725     emit_byte(0xc0+8*d+s);
1726     }
1727     else { /* replacement using branch and mov */
1728     int uncc=(cc^1);
1729     emit_byte(0x70+uncc);
1730     emit_byte(2); /* skip next 2 bytes if not cc=true */
1731     emit_byte(0x89);
1732     emit_byte(0xc0+8*s+d);
1733     }
1734     }
1735     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1736    
1737     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1738     {
1739     emit_byte(0x0f);
1740     emit_byte(0xbc);
1741     emit_byte(0xc0+8*d+s);
1742     }
1743     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1744    
1745     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1746     {
1747     emit_byte(0x0f);
1748     emit_byte(0xbf);
1749     emit_byte(0xc0+8*d+s);
1750     }
1751     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1752    
1753     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1754     {
1755     emit_byte(0x0f);
1756     emit_byte(0xbe);
1757     emit_byte(0xc0+8*d+s);
1758     }
1759     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1760    
1761     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1762     {
1763     emit_byte(0x0f);
1764     emit_byte(0xb7);
1765     emit_byte(0xc0+8*d+s);
1766     }
1767     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1768    
1769     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1770     {
1771     emit_byte(0x0f);
1772     emit_byte(0xb6);
1773     emit_byte(0xc0+8*d+s);
1774     }
1775     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1776    
1777     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1778     {
1779     emit_byte(0x0f);
1780     emit_byte(0xaf);
1781     emit_byte(0xc0+8*d+s);
1782     }
1783     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1784    
1785     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1786     {
1787     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1788     abort();
1789     emit_byte(0xf7);
1790     emit_byte(0xea);
1791     }
1792     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1793    
1794     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1795     {
1796     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1797     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1798     abort();
1799     }
1800     emit_byte(0xf7);
1801     emit_byte(0xe2);
1802     }
1803     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1804    
1805     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1806     {
1807     abort(); /* %^$&%^$%#^ x86! */
1808     emit_byte(0x0f);
1809     emit_byte(0xaf);
1810     emit_byte(0xc0+8*d+s);
1811     }
1812     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1813    
1814     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1815     {
1816     emit_byte(0x88);
1817     emit_byte(0xc0+8*s+d);
1818     }
1819     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1820    
1821     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1822     {
1823     emit_byte(0x66);
1824     emit_byte(0x89);
1825     emit_byte(0xc0+8*s+d);
1826     }
1827     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1828    
1829     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1830     {
1831     int isebp=(baser==5)?0x40:0;
1832     int fi;
1833    
1834     switch(factor) {
1835     case 1: fi=0; break;
1836     case 2: fi=1; break;
1837     case 4: fi=2; break;
1838     case 8: fi=3; break;
1839     default: abort();
1840     }
1841    
1842    
1843     emit_byte(0x8b);
1844     emit_byte(0x04+8*d+isebp);
1845     emit_byte(baser+8*index+0x40*fi);
1846     if (isebp)
1847     emit_byte(0x00);
1848     }
1849     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1850    
1851     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1852     {
1853     int fi;
1854     int isebp;
1855    
1856     switch(factor) {
1857     case 1: fi=0; break;
1858     case 2: fi=1; break;
1859     case 4: fi=2; break;
1860     case 8: fi=3; break;
1861     default: abort();
1862     }
1863     isebp=(baser==5)?0x40:0;
1864    
1865     emit_byte(0x66);
1866     emit_byte(0x8b);
1867     emit_byte(0x04+8*d+isebp);
1868     emit_byte(baser+8*index+0x40*fi);
1869     if (isebp)
1870     emit_byte(0x00);
1871     }
1872     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1873    
1874     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1875     {
1876     int fi;
1877     int isebp;
1878    
1879     switch(factor) {
1880     case 1: fi=0; break;
1881     case 2: fi=1; break;
1882     case 4: fi=2; break;
1883     case 8: fi=3; break;
1884     default: abort();
1885     }
1886     isebp=(baser==5)?0x40:0;
1887    
1888     emit_byte(0x8a);
1889     emit_byte(0x04+8*d+isebp);
1890     emit_byte(baser+8*index+0x40*fi);
1891     if (isebp)
1892     emit_byte(0x00);
1893     }
1894     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1895    
1896     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1897     {
1898     int fi;
1899     int isebp;
1900    
1901     switch(factor) {
1902     case 1: fi=0; break;
1903     case 2: fi=1; break;
1904     case 4: fi=2; break;
1905     case 8: fi=3; break;
1906     default: abort();
1907     }
1908    
1909    
1910     isebp=(baser==5)?0x40:0;
1911    
1912     emit_byte(0x89);
1913     emit_byte(0x04+8*s+isebp);
1914     emit_byte(baser+8*index+0x40*fi);
1915     if (isebp)
1916     emit_byte(0x00);
1917     }
1918     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1919    
1920     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1921     {
1922     int fi;
1923     int isebp;
1924    
1925     switch(factor) {
1926     case 1: fi=0; break;
1927     case 2: fi=1; break;
1928     case 4: fi=2; break;
1929     case 8: fi=3; break;
1930     default: abort();
1931     }
1932     isebp=(baser==5)?0x40:0;
1933    
1934     emit_byte(0x66);
1935     emit_byte(0x89);
1936     emit_byte(0x04+8*s+isebp);
1937     emit_byte(baser+8*index+0x40*fi);
1938     if (isebp)
1939     emit_byte(0x00);
1940     }
1941     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1942    
1943     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1944     {
1945     int fi;
1946     int isebp;
1947    
1948     switch(factor) {
1949     case 1: fi=0; break;
1950     case 2: fi=1; break;
1951     case 4: fi=2; break;
1952     case 8: fi=3; break;
1953     default: abort();
1954     }
1955     isebp=(baser==5)?0x40:0;
1956    
1957     emit_byte(0x88);
1958     emit_byte(0x04+8*s+isebp);
1959     emit_byte(baser+8*index+0x40*fi);
1960     if (isebp)
1961     emit_byte(0x00);
1962     }
1963     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1964    
1965     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1966     {
1967     int fi;
1968    
1969     switch(factor) {
1970     case 1: fi=0; break;
1971     case 2: fi=1; break;
1972     case 4: fi=2; break;
1973     case 8: fi=3; break;
1974     default: abort();
1975     }
1976    
1977     emit_byte(0x89);
1978     emit_byte(0x84+8*s);
1979     emit_byte(baser+8*index+0x40*fi);
1980     emit_long(base);
1981     }
1982     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1983    
1984     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1985     {
1986     int fi;
1987    
1988     switch(factor) {
1989     case 1: fi=0; break;
1990     case 2: fi=1; break;
1991     case 4: fi=2; break;
1992     case 8: fi=3; break;
1993     default: abort();
1994     }
1995    
1996     emit_byte(0x66);
1997     emit_byte(0x89);
1998     emit_byte(0x84+8*s);
1999     emit_byte(baser+8*index+0x40*fi);
2000     emit_long(base);
2001     }
2002     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2003    
2004     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2005     {
2006     int fi;
2007    
2008     switch(factor) {
2009     case 1: fi=0; break;
2010     case 2: fi=1; break;
2011     case 4: fi=2; break;
2012     case 8: fi=3; break;
2013     default: abort();
2014     }
2015    
2016     emit_byte(0x88);
2017     emit_byte(0x84+8*s);
2018     emit_byte(baser+8*index+0x40*fi);
2019     emit_long(base);
2020     }
2021     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2022    
2023     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2024     {
2025     int fi;
2026    
2027     switch(factor) {
2028     case 1: fi=0; break;
2029     case 2: fi=1; break;
2030     case 4: fi=2; break;
2031     case 8: fi=3; break;
2032     default: abort();
2033     }
2034    
2035     emit_byte(0x8b);
2036     emit_byte(0x84+8*d);
2037     emit_byte(baser+8*index+0x40*fi);
2038     emit_long(base);
2039     }
2040     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2041    
2042     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2043     {
2044     int fi;
2045    
2046     switch(factor) {
2047     case 1: fi=0; break;
2048     case 2: fi=1; break;
2049     case 4: fi=2; break;
2050     case 8: fi=3; break;
2051     default: abort();
2052     }
2053    
2054     emit_byte(0x66);
2055     emit_byte(0x8b);
2056     emit_byte(0x84+8*d);
2057     emit_byte(baser+8*index+0x40*fi);
2058     emit_long(base);
2059     }
2060     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2061    
2062     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2063     {
2064     int fi;
2065    
2066     switch(factor) {
2067     case 1: fi=0; break;
2068     case 2: fi=1; break;
2069     case 4: fi=2; break;
2070     case 8: fi=3; break;
2071     default: abort();
2072     }
2073    
2074     emit_byte(0x8a);
2075     emit_byte(0x84+8*d);
2076     emit_byte(baser+8*index+0x40*fi);
2077     emit_long(base);
2078     }
2079     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2080    
2081     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2082     {
2083     int fi;
2084     switch(factor) {
2085     case 1: fi=0; break;
2086     case 2: fi=1; break;
2087     case 4: fi=2; break;
2088     case 8: fi=3; break;
2089     default:
2090     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2091     abort();
2092     }
2093     emit_byte(0x8b);
2094     emit_byte(0x04+8*d);
2095     emit_byte(0x05+8*index+64*fi);
2096     emit_long(base);
2097     }
2098     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2099    
2100     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2101     {
2102     int fi;
2103     switch(factor) {
2104     case 1: fi=0; break;
2105     case 2: fi=1; break;
2106     case 4: fi=2; break;
2107     case 8: fi=3; break;
2108     default:
2109     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2110     abort();
2111     }
2112     if (have_cmov) {
2113     emit_byte(0x0f);
2114     emit_byte(0x40+cond);
2115     emit_byte(0x04+8*d);
2116     emit_byte(0x05+8*index+64*fi);
2117     emit_long(base);
2118     }
2119     else { /* replacement using branch and mov */
2120     int uncc=(cond^1);
2121     emit_byte(0x70+uncc);
2122     emit_byte(7); /* skip next 7 bytes if not cc=true */
2123     emit_byte(0x8b);
2124     emit_byte(0x04+8*d);
2125     emit_byte(0x05+8*index+64*fi);
2126     emit_long(base);
2127     }
2128     }
2129     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2130    
2131     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2132     {
2133     if (have_cmov) {
2134     emit_byte(0x0f);
2135     emit_byte(0x40+cond);
2136     emit_byte(0x05+8*d);
2137     emit_long(mem);
2138     }
2139     else { /* replacement using branch and mov */
2140     int uncc=(cond^1);
2141     emit_byte(0x70+uncc);
2142     emit_byte(6); /* skip next 6 bytes if not cc=true */
2143     emit_byte(0x8b);
2144     emit_byte(0x05+8*d);
2145     emit_long(mem);
2146     }
2147     }
2148     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2149    
2150     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2151     {
2152 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2153 gbeauche 1.1 emit_byte(0x8b);
2154     emit_byte(0x40+8*d+s);
2155     emit_byte(offset);
2156     }
2157     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2158    
2159     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2160     {
2161 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2162 gbeauche 1.1 emit_byte(0x66);
2163     emit_byte(0x8b);
2164     emit_byte(0x40+8*d+s);
2165     emit_byte(offset);
2166     }
2167     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2168    
2169     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2170     {
2171 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2172 gbeauche 1.1 emit_byte(0x8a);
2173     emit_byte(0x40+8*d+s);
2174     emit_byte(offset);
2175     }
2176     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2177    
2178     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2179     {
2180     emit_byte(0x8b);
2181     emit_byte(0x80+8*d+s);
2182     emit_long(offset);
2183     }
2184     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2185    
2186     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2187     {
2188     emit_byte(0x66);
2189     emit_byte(0x8b);
2190     emit_byte(0x80+8*d+s);
2191     emit_long(offset);
2192     }
2193     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2194    
2195     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2196     {
2197     emit_byte(0x8a);
2198     emit_byte(0x80+8*d+s);
2199     emit_long(offset);
2200     }
2201     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2202    
2203     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2204     {
2205 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2206 gbeauche 1.1 emit_byte(0xc7);
2207     emit_byte(0x40+d);
2208     emit_byte(offset);
2209     emit_long(i);
2210     }
2211     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2212    
2213     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2214     {
2215 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2216 gbeauche 1.1 emit_byte(0x66);
2217     emit_byte(0xc7);
2218     emit_byte(0x40+d);
2219     emit_byte(offset);
2220     emit_word(i);
2221     }
2222     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2223    
2224     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2225     {
2226 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2227 gbeauche 1.1 emit_byte(0xc6);
2228     emit_byte(0x40+d);
2229     emit_byte(offset);
2230     emit_byte(i);
2231     }
2232     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2233    
2234     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2235     {
2236 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2237 gbeauche 1.1 emit_byte(0x89);
2238     emit_byte(0x40+8*s+d);
2239     emit_byte(offset);
2240     }
2241     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2242    
2243     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2244     {
2245 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2246 gbeauche 1.1 emit_byte(0x66);
2247     emit_byte(0x89);
2248     emit_byte(0x40+8*s+d);
2249     emit_byte(offset);
2250     }
2251     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2252    
2253     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2254     {
2255 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2256 gbeauche 1.1 emit_byte(0x88);
2257     emit_byte(0x40+8*s+d);
2258     emit_byte(offset);
2259     }
2260     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2261    
2262     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2263     {
2264     if (optimize_imm8 && isbyte(offset)) {
2265     emit_byte(0x8d);
2266     emit_byte(0x40+8*d+s);
2267     emit_byte(offset);
2268     }
2269     else {
2270     emit_byte(0x8d);
2271     emit_byte(0x80+8*d+s);
2272     emit_long(offset);
2273     }
2274     }
2275     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2276    
2277     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2278     {
2279     int fi;
2280    
2281     switch(factor) {
2282     case 1: fi=0; break;
2283     case 2: fi=1; break;
2284     case 4: fi=2; break;
2285     case 8: fi=3; break;
2286     default: abort();
2287     }
2288    
2289     if (optimize_imm8 && isbyte(offset)) {
2290     emit_byte(0x8d);
2291     emit_byte(0x44+8*d);
2292     emit_byte(0x40*fi+8*index+s);
2293     emit_byte(offset);
2294     }
2295     else {
2296     emit_byte(0x8d);
2297     emit_byte(0x84+8*d);
2298     emit_byte(0x40*fi+8*index+s);
2299     emit_long(offset);
2300     }
2301     }
2302     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2303    
2304     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2305     {
2306     int isebp=(s==5)?0x40:0;
2307     int fi;
2308    
2309     switch(factor) {
2310     case 1: fi=0; break;
2311     case 2: fi=1; break;
2312     case 4: fi=2; break;
2313     case 8: fi=3; break;
2314     default: abort();
2315     }
2316    
2317     emit_byte(0x8d);
2318     emit_byte(0x04+8*d+isebp);
2319     emit_byte(0x40*fi+8*index+s);
2320     if (isebp)
2321     emit_byte(0);
2322     }
2323     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2324    
2325     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2326     {
2327     if (optimize_imm8 && isbyte(offset)) {
2328     emit_byte(0x89);
2329     emit_byte(0x40+8*s+d);
2330     emit_byte(offset);
2331     }
2332     else {
2333     emit_byte(0x89);
2334     emit_byte(0x80+8*s+d);
2335     emit_long(offset);
2336     }
2337     }
2338     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2339    
2340     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2341     {
2342     emit_byte(0x66);
2343     emit_byte(0x89);
2344     emit_byte(0x80+8*s+d);
2345     emit_long(offset);
2346     }
2347     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2348    
2349     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2350     {
2351     if (optimize_imm8 && isbyte(offset)) {
2352     emit_byte(0x88);
2353     emit_byte(0x40+8*s+d);
2354     emit_byte(offset);
2355     }
2356     else {
2357     emit_byte(0x88);
2358     emit_byte(0x80+8*s+d);
2359     emit_long(offset);
2360     }
2361     }
2362     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2363    
2364     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2365     {
2366     emit_byte(0x0f);
2367     emit_byte(0xc8+r);
2368     }
2369     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2370    
2371     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2372     {
2373     emit_byte(0x66);
2374     emit_byte(0xc1);
2375     emit_byte(0xc0+r);
2376     emit_byte(0x08);
2377     }
2378     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2379    
2380     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2381     {
2382     emit_byte(0x89);
2383     emit_byte(0xc0+8*s+d);
2384     }
2385     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2386    
2387     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2388     {
2389     emit_byte(0x89);
2390     emit_byte(0x05+8*s);
2391     emit_long(d);
2392     }
2393     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2394    
2395     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2396     {
2397     emit_byte(0x66);
2398     emit_byte(0x89);
2399     emit_byte(0x05+8*s);
2400     emit_long(d);
2401     }
2402     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2403    
2404     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2405     {
2406     emit_byte(0x66);
2407     emit_byte(0x8b);
2408     emit_byte(0x05+8*d);
2409     emit_long(s);
2410     }
2411     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2412    
2413     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2414     {
2415     emit_byte(0x88);
2416     emit_byte(0x05+8*s);
2417     emit_long(d);
2418     }
2419     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2420    
2421     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2422     {
2423     emit_byte(0x8a);
2424     emit_byte(0x05+8*d);
2425     emit_long(s);
2426     }
2427     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2428    
2429     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2430     {
2431     emit_byte(0xb8+d);
2432     emit_long(s);
2433     }
2434     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2435    
2436     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2437     {
2438     emit_byte(0x66);
2439     emit_byte(0xb8+d);
2440     emit_word(s);
2441     }
2442     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2443    
2444     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2445     {
2446     emit_byte(0xb0+d);
2447     emit_byte(s);
2448     }
2449     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2450    
2451     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2452     {
2453     emit_byte(0x81);
2454     emit_byte(0x15);
2455     emit_long(d);
2456     emit_long(s);
2457     }
2458     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2459    
2460     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2461     {
2462     if (optimize_imm8 && isbyte(s)) {
2463     emit_byte(0x83);
2464     emit_byte(0x05);
2465     emit_long(d);
2466     emit_byte(s);
2467     }
2468     else {
2469     emit_byte(0x81);
2470     emit_byte(0x05);
2471     emit_long(d);
2472     emit_long(s);
2473     }
2474     }
2475     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2476    
2477     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2478     {
2479     emit_byte(0x66);
2480     emit_byte(0x81);
2481     emit_byte(0x05);
2482     emit_long(d);
2483     emit_word(s);
2484     }
2485     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2486    
2487     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2488     {
2489     emit_byte(0x80);
2490     emit_byte(0x05);
2491     emit_long(d);
2492     emit_byte(s);
2493     }
2494     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2495    
2496     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2497     {
2498 gbeauche 1.2 if (optimize_accum && isaccum(d))
2499     emit_byte(0xa9);
2500     else {
2501 gbeauche 1.1 emit_byte(0xf7);
2502     emit_byte(0xc0+d);
2503 gbeauche 1.2 }
2504 gbeauche 1.1 emit_long(i);
2505     }
2506     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2507    
2508     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2509     {
2510     emit_byte(0x85);
2511     emit_byte(0xc0+8*s+d);
2512     }
2513     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2514    
2515     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2516     {
2517     emit_byte(0x66);
2518     emit_byte(0x85);
2519     emit_byte(0xc0+8*s+d);
2520     }
2521     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2522    
2523     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2524     {
2525     emit_byte(0x84);
2526     emit_byte(0xc0+8*s+d);
2527     }
2528     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2529    
2530 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2531     {
2532     emit_byte(0x81);
2533     emit_byte(0xf0+d);
2534     emit_long(i);
2535     }
2536     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2537    
2538 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2539     {
2540     if (optimize_imm8 && isbyte(i)) {
2541 gbeauche 1.2 emit_byte(0x83);
2542     emit_byte(0xe0+d);
2543     emit_byte(i);
2544 gbeauche 1.1 }
2545     else {
2546 gbeauche 1.2 if (optimize_accum && isaccum(d))
2547     emit_byte(0x25);
2548     else {
2549     emit_byte(0x81);
2550     emit_byte(0xe0+d);
2551     }
2552     emit_long(i);
2553 gbeauche 1.1 }
2554     }
2555     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2556    
2557     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2558     {
2559 gbeauche 1.2 emit_byte(0x66);
2560     if (optimize_imm8 && isbyte(i)) {
2561     emit_byte(0x83);
2562     emit_byte(0xe0+d);
2563     emit_byte(i);
2564     }
2565     else {
2566     if (optimize_accum && isaccum(d))
2567     emit_byte(0x25);
2568     else {
2569     emit_byte(0x81);
2570     emit_byte(0xe0+d);
2571     }
2572     emit_word(i);
2573     }
2574 gbeauche 1.1 }
2575     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2576    
2577     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2578     {
2579     emit_byte(0x21);
2580     emit_byte(0xc0+8*s+d);
2581     }
2582     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2583    
2584     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2585     {
2586     emit_byte(0x66);
2587     emit_byte(0x21);
2588     emit_byte(0xc0+8*s+d);
2589     }
2590     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2591    
2592     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2593     {
2594     emit_byte(0x20);
2595     emit_byte(0xc0+8*s+d);
2596     }
2597     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2598    
2599     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2600     {
2601     if (optimize_imm8 && isbyte(i)) {
2602     emit_byte(0x83);
2603     emit_byte(0xc8+d);
2604     emit_byte(i);
2605     }
2606     else {
2607 gbeauche 1.2 if (optimize_accum && isaccum(d))
2608     emit_byte(0x0d);
2609     else {
2610 gbeauche 1.1 emit_byte(0x81);
2611     emit_byte(0xc8+d);
2612 gbeauche 1.2 }
2613 gbeauche 1.1 emit_long(i);
2614     }
2615     }
2616     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2617    
2618     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2619     {
2620     emit_byte(0x09);
2621     emit_byte(0xc0+8*s+d);
2622     }
2623     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2624    
2625     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2626     {
2627     emit_byte(0x66);
2628     emit_byte(0x09);
2629     emit_byte(0xc0+8*s+d);
2630     }
2631     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2632    
2633     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2634     {
2635     emit_byte(0x08);
2636     emit_byte(0xc0+8*s+d);
2637     }
2638     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2639    
2640     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2641     {
2642     emit_byte(0x11);
2643     emit_byte(0xc0+8*s+d);
2644     }
2645     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2646    
2647     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2648     {
2649     emit_byte(0x66);
2650     emit_byte(0x11);
2651     emit_byte(0xc0+8*s+d);
2652     }
2653     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2654    
2655     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2656     {
2657     emit_byte(0x10);
2658     emit_byte(0xc0+8*s+d);
2659     }
2660     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2661    
2662     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2663     {
2664     emit_byte(0x01);
2665     emit_byte(0xc0+8*s+d);
2666     }
2667     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2668    
2669     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2670     {
2671     emit_byte(0x66);
2672     emit_byte(0x01);
2673     emit_byte(0xc0+8*s+d);
2674     }
2675     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2676    
2677     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2678     {
2679     emit_byte(0x00);
2680     emit_byte(0xc0+8*s+d);
2681     }
2682     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2683    
2684     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2685     {
2686     if (isbyte(i)) {
2687     emit_byte(0x83);
2688     emit_byte(0xe8+d);
2689     emit_byte(i);
2690     }
2691     else {
2692 gbeauche 1.2 if (optimize_accum && isaccum(d))
2693     emit_byte(0x2d);
2694     else {
2695 gbeauche 1.1 emit_byte(0x81);
2696     emit_byte(0xe8+d);
2697 gbeauche 1.2 }
2698 gbeauche 1.1 emit_long(i);
2699     }
2700     }
2701     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2702    
2703     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2704     {
2705 gbeauche 1.2 if (optimize_accum && isaccum(d))
2706     emit_byte(0x2c);
2707     else {
2708 gbeauche 1.1 emit_byte(0x80);
2709     emit_byte(0xe8+d);
2710 gbeauche 1.2 }
2711 gbeauche 1.1 emit_byte(i);
2712     }
2713     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2714    
2715     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2716     {
2717     if (isbyte(i)) {
2718     emit_byte(0x83);
2719     emit_byte(0xc0+d);
2720     emit_byte(i);
2721     }
2722     else {
2723 gbeauche 1.2 if (optimize_accum && isaccum(d))
2724     emit_byte(0x05);
2725     else {
2726 gbeauche 1.1 emit_byte(0x81);
2727     emit_byte(0xc0+d);
2728 gbeauche 1.2 }
2729 gbeauche 1.1 emit_long(i);
2730     }
2731     }
2732     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2733    
2734     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2735     {
2736 gbeauche 1.2 emit_byte(0x66);
2737 gbeauche 1.1 if (isbyte(i)) {
2738     emit_byte(0x83);
2739     emit_byte(0xc0+d);
2740     emit_byte(i);
2741     }
2742     else {
2743 gbeauche 1.2 if (optimize_accum && isaccum(d))
2744     emit_byte(0x05);
2745     else {
2746 gbeauche 1.1 emit_byte(0x81);
2747     emit_byte(0xc0+d);
2748 gbeauche 1.2 }
2749 gbeauche 1.1 emit_word(i);
2750     }
2751     }
2752     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2753    
2754     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2755     {
2756 gbeauche 1.2 if (optimize_accum && isaccum(d))
2757     emit_byte(0x04);
2758     else {
2759     emit_byte(0x80);
2760     emit_byte(0xc0+d);
2761     }
2762 gbeauche 1.1 emit_byte(i);
2763     }
2764     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2765    
2766     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2767     {
2768     emit_byte(0x19);
2769     emit_byte(0xc0+8*s+d);
2770     }
2771     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2772    
2773     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2774     {
2775     emit_byte(0x66);
2776     emit_byte(0x19);
2777     emit_byte(0xc0+8*s+d);
2778     }
2779     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2780    
2781     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2782     {
2783     emit_byte(0x18);
2784     emit_byte(0xc0+8*s+d);
2785     }
2786     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2787    
2788     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2789     {
2790     emit_byte(0x29);
2791     emit_byte(0xc0+8*s+d);
2792     }
2793     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2794    
2795     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2796     {
2797     emit_byte(0x66);
2798     emit_byte(0x29);
2799     emit_byte(0xc0+8*s+d);
2800     }
2801     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2802    
2803     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2804     {
2805     emit_byte(0x28);
2806     emit_byte(0xc0+8*s+d);
2807     }
2808     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2809    
2810     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2811     {
2812     emit_byte(0x39);
2813     emit_byte(0xc0+8*s+d);
2814     }
2815     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2816    
2817     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2818     {
2819     if (optimize_imm8 && isbyte(i)) {
2820     emit_byte(0x83);
2821     emit_byte(0xf8+r);
2822     emit_byte(i);
2823     }
2824     else {
2825 gbeauche 1.2 if (optimize_accum && isaccum(r))
2826     emit_byte(0x3d);
2827     else {
2828 gbeauche 1.1 emit_byte(0x81);
2829     emit_byte(0xf8+r);
2830 gbeauche 1.2 }
2831 gbeauche 1.1 emit_long(i);
2832     }
2833     }
2834     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2835    
2836     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2837     {
2838     emit_byte(0x66);
2839     emit_byte(0x39);
2840     emit_byte(0xc0+8*s+d);
2841     }
2842     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2843    
2844 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2845     {
2846     emit_byte(0x80);
2847     emit_byte(0x3d);
2848     emit_long(d);
2849     emit_byte(s);
2850     }
2851     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2852    
2853 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2854     {
2855 gbeauche 1.2 if (optimize_accum && isaccum(d))
2856     emit_byte(0x3c);
2857     else {
2858 gbeauche 1.1 emit_byte(0x80);
2859     emit_byte(0xf8+d);
2860 gbeauche 1.2 }
2861 gbeauche 1.1 emit_byte(i);
2862     }
2863     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2864    
2865     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2866     {
2867     emit_byte(0x38);
2868     emit_byte(0xc0+8*s+d);
2869     }
2870     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2871    
2872     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2873     {
2874     int fi;
2875    
2876     switch(factor) {
2877     case 1: fi=0; break;
2878     case 2: fi=1; break;
2879     case 4: fi=2; break;
2880     case 8: fi=3; break;
2881     default: abort();
2882     }
2883     emit_byte(0x39);
2884     emit_byte(0x04+8*d);
2885     emit_byte(5+8*index+0x40*fi);
2886     emit_long(offset);
2887     }
2888     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2889    
2890     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2891     {
2892     emit_byte(0x31);
2893     emit_byte(0xc0+8*s+d);
2894     }
2895     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2896    
2897     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2898     {
2899     emit_byte(0x66);
2900     emit_byte(0x31);
2901     emit_byte(0xc0+8*s+d);
2902     }
2903     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2904    
2905     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2906     {
2907     emit_byte(0x30);
2908     emit_byte(0xc0+8*s+d);
2909     }
2910     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2911    
2912     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2913     {
2914     if (optimize_imm8 && isbyte(s)) {
2915     emit_byte(0x83);
2916     emit_byte(0x2d);
2917     emit_long(d);
2918     emit_byte(s);
2919     }
2920     else {
2921     emit_byte(0x81);
2922     emit_byte(0x2d);
2923     emit_long(d);
2924     emit_long(s);
2925     }
2926     }
2927     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2928    
2929     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2930     {
2931     if (optimize_imm8 && isbyte(s)) {
2932     emit_byte(0x83);
2933     emit_byte(0x3d);
2934     emit_long(d);
2935     emit_byte(s);
2936     }
2937     else {
2938     emit_byte(0x81);
2939     emit_byte(0x3d);
2940     emit_long(d);
2941     emit_long(s);
2942     }
2943     }
2944     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2945    
2946     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2947     {
2948     emit_byte(0x87);
2949     emit_byte(0xc0+8*r1+r2);
2950     }
2951     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2952    
2953     /*************************************************************************
2954     * FIXME: mem access modes probably wrong *
2955     *************************************************************************/
2956    
2957     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2958     {
2959     emit_byte(0x9c);
2960     }
2961     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2962    
2963     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2964     {
2965     emit_byte(0x9d);
2966     }
2967     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2968 gbeauche 1.13
2969     #endif
2970 gbeauche 1.1
2971     /*************************************************************************
2972     * Unoptimizable stuff --- jump *
2973     *************************************************************************/
2974    
2975     static __inline__ void raw_call_r(R4 r)
2976     {
2977 gbeauche 1.20 #if USE_NEW_RTASM
2978     CALLsr(r);
2979     #else
2980 gbeauche 1.1 emit_byte(0xff);
2981     emit_byte(0xd0+r);
2982 gbeauche 1.20 #endif
2983 gbeauche 1.5 }
2984    
2985     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2986     {
2987 gbeauche 1.20 #if USE_NEW_RTASM
2988     CALLsm(base, X86_NOREG, r, m);
2989     #else
2990 gbeauche 1.5 int mu;
2991     switch(m) {
2992     case 1: mu=0; break;
2993     case 2: mu=1; break;
2994     case 4: mu=2; break;
2995     case 8: mu=3; break;
2996     default: abort();
2997     }
2998     emit_byte(0xff);
2999     emit_byte(0x14);
3000     emit_byte(0x05+8*r+0x40*mu);
3001     emit_long(base);
3002 gbeauche 1.20 #endif
3003 gbeauche 1.1 }
3004    
3005     static __inline__ void raw_jmp_r(R4 r)
3006     {
3007 gbeauche 1.20 #if USE_NEW_RTASM
3008     JMPsr(r);
3009     #else
3010 gbeauche 1.1 emit_byte(0xff);
3011     emit_byte(0xe0+r);
3012 gbeauche 1.20 #endif
3013 gbeauche 1.1 }
3014    
3015     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3016     {
3017 gbeauche 1.20 #if USE_NEW_RTASM
3018     JMPsm(base, X86_NOREG, r, m);
3019     #else
3020 gbeauche 1.1 int mu;
3021     switch(m) {
3022     case 1: mu=0; break;
3023     case 2: mu=1; break;
3024     case 4: mu=2; break;
3025     case 8: mu=3; break;
3026     default: abort();
3027     }
3028     emit_byte(0xff);
3029     emit_byte(0x24);
3030     emit_byte(0x05+8*r+0x40*mu);
3031     emit_long(base);
3032 gbeauche 1.20 #endif
3033 gbeauche 1.1 }
3034    
3035     static __inline__ void raw_jmp_m(uae_u32 base)
3036     {
3037     emit_byte(0xff);
3038     emit_byte(0x25);
3039     emit_long(base);
3040     }
3041    
3042    
3043     static __inline__ void raw_call(uae_u32 t)
3044     {
3045 gbeauche 1.20 #if USE_NEW_RTASM
3046     CALLm(t);
3047     #else
3048 gbeauche 1.1 emit_byte(0xe8);
3049     emit_long(t-(uae_u32)target-4);
3050 gbeauche 1.20 #endif
3051 gbeauche 1.1 }
3052    
3053     static __inline__ void raw_jmp(uae_u32 t)
3054     {
3055 gbeauche 1.20 #if USE_NEW_RTASM
3056     JMPm(t);
3057     #else
3058 gbeauche 1.1 emit_byte(0xe9);
3059     emit_long(t-(uae_u32)target-4);
3060 gbeauche 1.20 #endif
3061 gbeauche 1.1 }
3062    
3063     static __inline__ void raw_jl(uae_u32 t)
3064     {
3065     emit_byte(0x0f);
3066     emit_byte(0x8c);
3067 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3068 gbeauche 1.1 }
3069    
3070     static __inline__ void raw_jz(uae_u32 t)
3071     {
3072     emit_byte(0x0f);
3073     emit_byte(0x84);
3074 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3075 gbeauche 1.1 }
3076    
3077     static __inline__ void raw_jnz(uae_u32 t)
3078     {
3079     emit_byte(0x0f);
3080     emit_byte(0x85);
3081 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3082 gbeauche 1.1 }
3083    
3084     static __inline__ void raw_jnz_l_oponly(void)
3085     {
3086     emit_byte(0x0f);
3087     emit_byte(0x85);
3088     }
3089    
3090     static __inline__ void raw_jcc_l_oponly(int cc)
3091     {
3092     emit_byte(0x0f);
3093     emit_byte(0x80+cc);
3094     }
3095    
3096     static __inline__ void raw_jnz_b_oponly(void)
3097     {
3098     emit_byte(0x75);
3099     }
3100    
3101     static __inline__ void raw_jz_b_oponly(void)
3102     {
3103     emit_byte(0x74);
3104     }
3105    
3106     static __inline__ void raw_jcc_b_oponly(int cc)
3107     {
3108     emit_byte(0x70+cc);
3109     }
3110    
3111     static __inline__ void raw_jmp_l_oponly(void)
3112     {
3113     emit_byte(0xe9);
3114     }
3115    
3116     static __inline__ void raw_jmp_b_oponly(void)
3117     {
3118     emit_byte(0xeb);
3119     }
3120    
3121     static __inline__ void raw_ret(void)
3122     {
3123     emit_byte(0xc3);
3124     }
3125    
3126     static __inline__ void raw_nop(void)
3127     {
3128     emit_byte(0x90);
3129     }
3130    
3131 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3132     {
3133     /* Source: GNU Binutils 2.12.90.0.15 */
3134     /* Various efficient no-op patterns for aligning code labels.
3135     Note: Don't try to assemble the instructions in the comments.
3136     0L and 0w are not legal. */
3137     static const uae_u8 f32_1[] =
3138     {0x90}; /* nop */
3139     static const uae_u8 f32_2[] =
3140     {0x89,0xf6}; /* movl %esi,%esi */
3141     static const uae_u8 f32_3[] =
3142     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3143     static const uae_u8 f32_4[] =
3144     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3145     static const uae_u8 f32_5[] =
3146     {0x90, /* nop */
3147     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3148     static const uae_u8 f32_6[] =
3149     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3150     static const uae_u8 f32_7[] =
3151     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3152     static const uae_u8 f32_8[] =
3153     {0x90, /* nop */
3154     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3155     static const uae_u8 f32_9[] =
3156     {0x89,0xf6, /* movl %esi,%esi */
3157     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3158     static const uae_u8 f32_10[] =
3159     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3160     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3161     static const uae_u8 f32_11[] =
3162     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3163     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3164     static const uae_u8 f32_12[] =
3165     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3166     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3167     static const uae_u8 f32_13[] =
3168     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3169     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3170     static const uae_u8 f32_14[] =
3171     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3172     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3173     static const uae_u8 f32_15[] =
3174     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3175     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3176     static const uae_u8 f32_16[] =
3177     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3178     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3179     static const uae_u8 *const f32_patt[] = {
3180     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3181     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3182     };
3183 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3184 gbeauche 1.8
3185 gbeauche 1.21 #if defined(__x86_64__)
3186     /* The recommended way to pad 64bit code is to use NOPs preceded by
3187     maximally four 0x66 prefixes. Balance the size of nops. */
3188     if (nbytes == 0)
3189     return;
3190    
3191     int i;
3192     int nnops = (nbytes + 3) / 4;
3193     int len = nbytes / nnops;
3194     int remains = nbytes - nnops * len;
3195    
3196     for (i = 0; i < remains; i++) {
3197     emit_block(prefixes, len);
3198     raw_nop();
3199     }
3200     for (; i < nnops; i++) {
3201     emit_block(prefixes, len - 1);
3202     raw_nop();
3203     }
3204     #else
3205 gbeauche 1.8 int nloops = nbytes / 16;
3206     while (nloops-- > 0)
3207     emit_block(f32_16, sizeof(f32_16));
3208    
3209     nbytes %= 16;
3210     if (nbytes)
3211     emit_block(f32_patt[nbytes - 1], nbytes);
3212 gbeauche 1.21 #endif
3213 gbeauche 1.8 }
3214    
3215 gbeauche 1.1
3216     /*************************************************************************
3217     * Flag handling, to and fro UAE flag register *
3218     *************************************************************************/
3219    
3220     #ifdef SAHF_SETO_PROFITABLE
3221    
3222     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3223    
3224     static __inline__ void raw_flags_to_reg(int r)
3225     {
3226     raw_lahf(0); /* Most flags in AH */
3227     //raw_setcc(r,0); /* V flag in AL */
3228 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3229 gbeauche 1.1
3230     #if 1 /* Let's avoid those nasty partial register stalls */
3231 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3232     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3233 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3234     live.state[FLAGTMP].status=INMEM;
3235     live.state[FLAGTMP].realreg=-1;
3236     /* We just "evicted" FLAGTMP. */
3237     if (live.nat[r].nholds!=1) {
3238     /* Huh? */
3239     abort();
3240     }
3241     live.nat[r].nholds=0;
3242     #endif
3243     }
3244    
3245     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3246     static __inline__ void raw_reg_to_flags(int r)
3247     {
3248     raw_cmp_b_ri(r,-127); /* set V */
3249     raw_sahf(0);
3250     }
3251    
3252 gbeauche 1.24 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3253     static __inline__ void raw_flags_set_zero(int s, int tmp)
3254     {
3255     raw_mov_l_rr(tmp,s);
3256     raw_lahf(s); /* flags into ah */
3257     raw_and_l_ri(s,0xffffbfff);
3258     raw_and_l_ri(tmp,0x00004000);
3259     raw_xor_l_ri(tmp,0x00004000);
3260     raw_or_l(s,tmp);
3261     raw_sahf(s);
3262     }
3263    
3264 gbeauche 1.1 #else
3265    
3266     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3267     static __inline__ void raw_flags_to_reg(int r)
3268     {
3269     raw_pushfl();
3270     raw_pop_l_r(r);
3271 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3272 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3273     live.state[FLAGTMP].status=INMEM;
3274     live.state[FLAGTMP].realreg=-1;
3275     /* We just "evicted" FLAGTMP. */
3276     if (live.nat[r].nholds!=1) {
3277     /* Huh? */
3278     abort();
3279     }
3280     live.nat[r].nholds=0;
3281     }
3282    
3283     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3284     static __inline__ void raw_reg_to_flags(int r)
3285     {
3286     raw_push_l_r(r);
3287     raw_popfl();
3288     }
3289    
3290 gbeauche 1.24 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3291     static __inline__ void raw_flags_set_zero(int s, int tmp)
3292     {
3293     raw_mov_l_rr(tmp,s);
3294     raw_pushfl();
3295     raw_pop_l_r(s);
3296     raw_and_l_ri(s,0xffffffbf);
3297     raw_and_l_ri(tmp,0x00000040);
3298     raw_xor_l_ri(tmp,0x00000040);
3299     raw_or_l(s,tmp);
3300     raw_push_l_r(s);
3301     raw_popfl();
3302     }
3303 gbeauche 1.1 #endif
3304    
3305     /* Apparently, there are enough instructions between flag store and
3306     flag reload to avoid the partial memory stall */
3307     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3308     {
3309     #if 1
3310 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3311 gbeauche 1.1 #else
3312 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3313     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3314 gbeauche 1.1 #endif
3315     }
3316    
3317     /* FLAGX is byte sized, and we *do* write it at that size */
3318     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3319     {
3320     if (live.nat[target].canbyte)
3321 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3322 gbeauche 1.1 else if (live.nat[target].canword)
3323 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3324 gbeauche 1.1 else
3325 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3326 gbeauche 1.1 }
3327    
3328 gbeauche 1.31 static __inline__ void raw_dec_sp(int off)
3329     {
3330     if (off) raw_sub_l_ri(ESP_INDEX,off);
3331     }
3332    
3333 gbeauche 1.1 static __inline__ void raw_inc_sp(int off)
3334     {
3335 gbeauche 1.31 if (off) raw_add_l_ri(ESP_INDEX,off);
3336 gbeauche 1.1 }
3337    
3338     /*************************************************************************
3339     * Handling mistaken direct memory access *
3340     *************************************************************************/
3341    
3342     // gb-- I don't need that part for JIT Basilisk II
3343     #if defined(NATMEM_OFFSET) && 0
3344     #include <asm/sigcontext.h>
3345     #include <signal.h>
3346    
3347     #define SIG_READ 1
3348     #define SIG_WRITE 2
3349    
3350     static int in_handler=0;
3351     static uae_u8 veccode[256];
3352    
3353     static void vec(int x, struct sigcontext sc)
3354     {
3355     uae_u8* i=(uae_u8*)sc.eip;
3356     uae_u32 addr=sc.cr2;
3357     int r=-1;
3358     int size=4;
3359     int dir=-1;
3360     int len=0;
3361     int j;
3362    
3363     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3364     if (!canbang)
3365     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3366     if (in_handler)
3367     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3368    
3369     if (canbang && i>=compiled_code && i<=current_compile_p) {
3370     if (*i==0x66) {
3371     i++;
3372     size=2;
3373     len++;
3374     }
3375    
3376     switch(i[0]) {
3377     case 0x8a:
3378     if ((i[1]&0xc0)==0x80) {
3379     r=(i[1]>>3)&7;
3380     dir=SIG_READ;
3381     size=1;
3382     len+=6;
3383     break;
3384     }
3385     break;
3386     case 0x88:
3387     if ((i[1]&0xc0)==0x80) {
3388     r=(i[1]>>3)&7;
3389     dir=SIG_WRITE;
3390     size=1;
3391     len+=6;
3392     break;
3393     }
3394     break;
3395     case 0x8b:
3396     if ((i[1]&0xc0)==0x80) {
3397     r=(i[1]>>3)&7;
3398     dir=SIG_READ;
3399     len+=6;
3400     break;
3401     }
3402     if ((i[1]&0xc0)==0x40) {
3403     r=(i[1]>>3)&7;
3404     dir=SIG_READ;
3405     len+=3;
3406     break;
3407     }
3408     break;
3409     case 0x89:
3410     if ((i[1]&0xc0)==0x80) {
3411     r=(i[1]>>3)&7;
3412     dir=SIG_WRITE;
3413     len+=6;
3414     break;
3415     }
3416     if ((i[1]&0xc0)==0x40) {
3417     r=(i[1]>>3)&7;
3418     dir=SIG_WRITE;
3419     len+=3;
3420     break;
3421     }
3422     break;
3423     }
3424     }
3425    
3426     if (r!=-1) {
3427     void* pr=NULL;
3428     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3429    
3430     switch(r) {
3431     case 0: pr=&(sc.eax); break;
3432     case 1: pr=&(sc.ecx); break;
3433     case 2: pr=&(sc.edx); break;
3434     case 3: pr=&(sc.ebx); break;
3435     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3436     case 5: pr=(size>1)?
3437     (void*)(&(sc.ebp)):
3438     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3439     case 6: pr=(size>1)?
3440     (void*)(&(sc.esi)):
3441     (void*)(((uae_u8*)&(sc.edx))+1); break;
3442     case 7: pr=(size>1)?
3443     (void*)(&(sc.edi)):
3444     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3445     default: abort();
3446     }
3447     if (pr) {
3448     blockinfo* bi;
3449    
3450     if (currprefs.comp_oldsegv) {
3451     addr-=NATMEM_OFFSET;
3452    
3453     if ((addr>=0x10000000 && addr<0x40000000) ||
3454     (addr>=0x50000000)) {
3455     write_log("Suspicious address in %x SEGV handler.\n",addr);
3456     }
3457     if (dir==SIG_READ) {
3458     switch(size) {
3459     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3460     case 2: *((uae_u16*)pr)=get_word(addr); break;
3461     case 4: *((uae_u32*)pr)=get_long(addr); break;
3462     default: abort();
3463     }
3464     }
3465     else { /* write */
3466     switch(size) {
3467     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3468     case 2: put_word(addr,*((uae_u16*)pr)); break;
3469     case 4: put_long(addr,*((uae_u32*)pr)); break;
3470     default: abort();
3471     }
3472     }
3473     write_log("Handled one access!\n");
3474     fflush(stdout);
3475     segvcount++;
3476     sc.eip+=len;
3477     }
3478     else {
3479     void* tmp=target;
3480     int i;
3481     uae_u8 vecbuf[5];
3482    
3483     addr-=NATMEM_OFFSET;
3484    
3485     if ((addr>=0x10000000 && addr<0x40000000) ||
3486     (addr>=0x50000000)) {
3487     write_log("Suspicious address in %x SEGV handler.\n",addr);
3488     }
3489    
3490     target=(uae_u8*)sc.eip;
3491     for (i=0;i<5;i++)
3492     vecbuf[i]=target[i];
3493     emit_byte(0xe9);
3494 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3495 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3496    
3497     write_log("Handled one access!\n");
3498     fflush(stdout);
3499     segvcount++;
3500    
3501     target=veccode;
3502    
3503     if (dir==SIG_READ) {
3504     switch(size) {
3505     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3506     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3507     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3508     default: abort();
3509     }
3510     }
3511     else { /* write */
3512     switch(size) {
3513     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3514     case 2: put_word(addr,*((uae_u16*)pr)); break;
3515     case 4: put_long(addr,*((uae_u32*)pr)); break;
3516     default: abort();
3517     }
3518     }
3519     for (i=0;i<5;i++)
3520     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3521 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3522 gbeauche 1.1 emit_byte(0xe9);
3523 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3524 gbeauche 1.1 in_handler=1;
3525     target=tmp;
3526     }
3527     bi=active;
3528     while (bi) {
3529     if (bi->handler &&
3530     (uae_u8*)bi->direct_handler<=i &&
3531     (uae_u8*)bi->nexthandler>i) {
3532     write_log("deleted trigger (%p<%p<%p) %p\n",
3533     bi->handler,
3534     i,
3535     bi->nexthandler,
3536     bi->pc_p);
3537     invalidate_block(bi);
3538     raise_in_cl_list(bi);
3539     set_special(0);
3540     return;
3541     }
3542     bi=bi->next;
3543     }
3544     /* Not found in the active list. Might be a rom routine that
3545     is in the dormant list */
3546     bi=dormant;
3547     while (bi) {
3548     if (bi->handler &&
3549     (uae_u8*)bi->direct_handler<=i &&
3550     (uae_u8*)bi->nexthandler>i) {
3551     write_log("deleted trigger (%p<%p<%p) %p\n",
3552     bi->handler,
3553     i,
3554     bi->nexthandler,
3555     bi->pc_p);
3556     invalidate_block(bi);
3557     raise_in_cl_list(bi);
3558     set_special(0);
3559     return;
3560     }
3561     bi=bi->next;
3562     }
3563     write_log("Huh? Could not find trigger!\n");
3564     return;
3565     }
3566     }
3567     write_log("Can't handle access!\n");
3568     for (j=0;j<10;j++) {
3569     write_log("instruction byte %2d is %02x\n",j,i[j]);
3570     }
3571     write_log("Please send the above info (starting at \"fault address\") to\n"
3572     "bmeyer@csse.monash.edu.au\n"
3573     "This shouldn't happen ;-)\n");
3574     fflush(stdout);
3575     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3576     }
3577     #endif
3578    
3579    
3580     /*************************************************************************
3581     * Checking for CPU features *
3582     *************************************************************************/
3583    
3584 gbeauche 1.3 struct cpuinfo_x86 {
3585     uae_u8 x86; // CPU family
3586     uae_u8 x86_vendor; // CPU vendor
3587     uae_u8 x86_processor; // CPU canonical processor type
3588     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3589     uae_u32 x86_hwcap;
3590     uae_u8 x86_model;
3591     uae_u8 x86_mask;
3592     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3593     char x86_vendor_id[16];
3594     };
3595     struct cpuinfo_x86 cpuinfo;
3596    
3597     enum {
3598     X86_VENDOR_INTEL = 0,
3599     X86_VENDOR_CYRIX = 1,
3600     X86_VENDOR_AMD = 2,
3601     X86_VENDOR_UMC = 3,
3602     X86_VENDOR_NEXGEN = 4,
3603     X86_VENDOR_CENTAUR = 5,
3604     X86_VENDOR_RISE = 6,
3605     X86_VENDOR_TRANSMETA = 7,
3606     X86_VENDOR_NSC = 8,
3607     X86_VENDOR_UNKNOWN = 0xff
3608     };
3609    
3610     enum {
3611     X86_PROCESSOR_I386, /* 80386 */
3612     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3613     X86_PROCESSOR_PENTIUM,
3614     X86_PROCESSOR_PENTIUMPRO,
3615     X86_PROCESSOR_K6,
3616     X86_PROCESSOR_ATHLON,
3617     X86_PROCESSOR_PENTIUM4,
3618 gbeauche 1.28 X86_PROCESSOR_X86_64,
3619 gbeauche 1.3 X86_PROCESSOR_max
3620     };
3621    
3622     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3623     "80386",
3624     "80486",
3625     "Pentium",
3626     "PentiumPro",
3627     "K6",
3628     "Athlon",
3629 gbeauche 1.16 "Pentium4",
3630 gbeauche 1.28 "x86-64"
3631 gbeauche 1.3 };
3632    
3633     static struct ptt {
3634     const int align_loop;
3635     const int align_loop_max_skip;
3636     const int align_jump;
3637     const int align_jump_max_skip;
3638     const int align_func;
3639     }
3640     x86_alignments[X86_PROCESSOR_max] = {
3641     { 4, 3, 4, 3, 4 },
3642     { 16, 15, 16, 15, 16 },
3643     { 16, 7, 16, 7, 16 },
3644     { 16, 15, 16, 7, 16 },
3645     { 32, 7, 32, 7, 32 },
3646 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3647 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3648     { 16, 7, 16, 7, 16 }
3649 gbeauche 1.3 };
3650 gbeauche 1.1
3651 gbeauche 1.3 static void
3652     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3653 gbeauche 1.1 {
3654 gbeauche 1.3 char *v = c->x86_vendor_id;
3655    
3656     if (!strcmp(v, "GenuineIntel"))
3657     c->x86_vendor = X86_VENDOR_INTEL;
3658     else if (!strcmp(v, "AuthenticAMD"))
3659     c->x86_vendor = X86_VENDOR_AMD;
3660     else if (!strcmp(v, "CyrixInstead"))
3661     c->x86_vendor = X86_VENDOR_CYRIX;
3662     else if (!strcmp(v, "Geode by NSC"))
3663     c->x86_vendor = X86_VENDOR_NSC;
3664     else if (!strcmp(v, "UMC UMC UMC "))
3665     c->x86_vendor = X86_VENDOR_UMC;
3666     else if (!strcmp(v, "CentaurHauls"))
3667     c->x86_vendor = X86_VENDOR_CENTAUR;
3668     else if (!strcmp(v, "NexGenDriven"))
3669     c->x86_vendor = X86_VENDOR_NEXGEN;
3670     else if (!strcmp(v, "RiseRiseRise"))
3671     c->x86_vendor = X86_VENDOR_RISE;
3672     else if (!strcmp(v, "GenuineTMx86") ||
3673     !strcmp(v, "TransmetaCPU"))
3674     c->x86_vendor = X86_VENDOR_TRANSMETA;
3675     else
3676     c->x86_vendor = X86_VENDOR_UNKNOWN;
3677     }
3678 gbeauche 1.1
3679 gbeauche 1.3 static void
3680     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3681     {
3682 gbeauche 1.27 const int CPUID_SPACE = 4096;
3683     uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3684     if (cpuid_space == VM_MAP_FAILED)
3685     abort();
3686     vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3687    
3688 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3689 gbeauche 1.3 uae_u8* tmp=get_target();
3690 gbeauche 1.1
3691 gbeauche 1.20 s_op = op;
3692 gbeauche 1.3 set_target(cpuid_space);
3693     raw_push_l_r(0); /* eax */
3694     raw_push_l_r(1); /* ecx */
3695     raw_push_l_r(2); /* edx */
3696     raw_push_l_r(3); /* ebx */
3697 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3698 gbeauche 1.3 raw_cpuid(0);
3699 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3700     raw_mov_l_mr((uintptr)&s_ebx,3);
3701     raw_mov_l_mr((uintptr)&s_ecx,1);
3702     raw_mov_l_mr((uintptr)&s_edx,2);
3703 gbeauche 1.3 raw_pop_l_r(3);
3704     raw_pop_l_r(2);
3705     raw_pop_l_r(1);
3706     raw_pop_l_r(0);
3707     raw_ret();
3708     set_target(tmp);
3709 gbeauche 1.1
3710 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3711 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3712     if (ebx != NULL) *ebx = s_ebx;
3713     if (ecx != NULL) *ecx = s_ecx;
3714     if (edx != NULL) *edx = s_edx;
3715 gbeauche 1.27
3716     vm_release(cpuid_space, CPUID_SPACE);
3717 gbeauche 1.1 }
3718    
3719 gbeauche 1.3 static void
3720     raw_init_cpu(void)
3721 gbeauche 1.1 {
3722 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3723    
3724     /* Defaults */
3725 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3726 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3727     c->cpuid_level = -1; /* CPUID not detected */
3728     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3729     c->x86_vendor_id[0] = '\0'; /* Unset */
3730     c->x86_hwcap = 0;
3731    
3732     /* Get vendor name */
3733     c->x86_vendor_id[12] = '\0';
3734     cpuid(0x00000000,
3735     (uae_u32 *)&c->cpuid_level,
3736     (uae_u32 *)&c->x86_vendor_id[0],
3737     (uae_u32 *)&c->x86_vendor_id[8],
3738     (uae_u32 *)&c->x86_vendor_id[4]);
3739     x86_get_cpu_vendor(c);
3740    
3741     /* Intel-defined flags: level 0x00000001 */
3742     c->x86_brand_id = 0;
3743     if ( c->cpuid_level >= 0x00000001 ) {
3744     uae_u32 tfms, brand_id;
3745     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3746     c->x86 = (tfms >> 8) & 15;
3747 gbeauche 1.29 if (c->x86 == 0xf)
3748     c->x86 += (tfms >> 20) & 0xff; /* extended family */
3749 gbeauche 1.3 c->x86_model = (tfms >> 4) & 15;
3750 gbeauche 1.29 if (c->x86_model == 0xf)
3751     c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3752 gbeauche 1.3 c->x86_brand_id = brand_id & 0xff;
3753     c->x86_mask = tfms & 15;
3754     } else {
3755     /* Have CPUID level 0 only - unheard of */
3756     c->x86 = 4;
3757     }
3758    
3759 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3760     uae_u32 xlvl;
3761     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3762     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3763     if ( xlvl >= 0x80000001 ) {
3764 gbeauche 1.28 uae_u32 features, extra_features;
3765     cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3766 gbeauche 1.16 if (features & (1 << 29)) {
3767     /* Assume x86-64 if long mode is supported */
3768 gbeauche 1.28 c->x86_processor = X86_PROCESSOR_X86_64;
3769 gbeauche 1.16 }
3770 gbeauche 1.28 if (extra_features & (1 << 0))
3771     have_lahf_lm = true;
3772 gbeauche 1.16 }
3773     }
3774    
3775 gbeauche 1.3 /* Canonicalize processor ID */
3776     switch (c->x86) {
3777     case 3:
3778     c->x86_processor = X86_PROCESSOR_I386;
3779     break;
3780     case 4:
3781     c->x86_processor = X86_PROCESSOR_I486;
3782     break;
3783     case 5:
3784     if (c->x86_vendor == X86_VENDOR_AMD)
3785     c->x86_processor = X86_PROCESSOR_K6;
3786     else
3787     c->x86_processor = X86_PROCESSOR_PENTIUM;
3788     break;
3789     case 6:
3790     if (c->x86_vendor == X86_VENDOR_AMD)
3791     c->x86_processor = X86_PROCESSOR_ATHLON;
3792     else
3793     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3794     break;
3795     case 15:
3796 gbeauche 1.29 if (c->x86_processor == X86_PROCESSOR_max) {
3797     switch (c->x86_vendor) {
3798     case X86_VENDOR_INTEL:
3799     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3800     break;
3801     case X86_VENDOR_AMD:
3802     /* Assume a 32-bit Athlon processor if not in long mode */
3803     c->x86_processor = X86_PROCESSOR_ATHLON;
3804     break;
3805     }
3806     }
3807     break;
3808 gbeauche 1.3 }
3809     if (c->x86_processor == X86_PROCESSOR_max) {
3810 gbeauche 1.30 c->x86_processor = X86_PROCESSOR_I386;
3811     fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3812 gbeauche 1.3 fprintf(stderr, " Family : %d\n", c->x86);
3813     fprintf(stderr, " Model : %d\n", c->x86_model);
3814     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3815 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3816 gbeauche 1.3 if (c->x86_brand_id)
3817     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3818     }
3819    
3820     /* Have CMOV support? */
3821 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3822 gbeauche 1.3
3823     /* Can the host CPU suffer from partial register stalls? */
3824     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3825     #if 1
3826     /* It appears that partial register writes are a bad idea even on
3827 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3828     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3829 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3830     have_rat_stall = true;
3831 gbeauche 1.1 #endif
3832 gbeauche 1.3
3833     /* Alignments */
3834     if (tune_alignment) {
3835     align_loops = x86_alignments[c->x86_processor].align_loop;
3836     align_jumps = x86_alignments[c->x86_processor].align_jump;
3837     }
3838    
3839     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3840     c->cpuid_level, c->x86_vendor_id,
3841     x86_processor_string_table[c->x86_processor]);
3842 gbeauche 1.1 }
3843    
3844 gbeauche 1.10 static bool target_check_bsf(void)
3845     {
3846     bool mismatch = false;
3847     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3848     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3849     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3850     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3851     for (int value = -1; value <= 1; value++) {
3852 gbeauche 1.25 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3853     unsigned long tmp = value;
3854 gbeauche 1.10 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3855 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3856 gbeauche 1.10 int OF = (flags >> 11) & 1;
3857     int SF = (flags >> 7) & 1;
3858     int ZF = (flags >> 6) & 1;
3859     int CF = flags & 1;
3860     tmp = (value == 0);
3861     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3862     mismatch = true;
3863     }
3864     }}}}
3865     if (mismatch)
3866     write_log("Target CPU defines all flags on BSF instruction\n");
3867     return !mismatch;
3868     }
3869    
3870 gbeauche 1.1
3871     /*************************************************************************
3872     * FPU stuff *
3873     *************************************************************************/
3874    
3875    
3876     static __inline__ void raw_fp_init(void)
3877     {
3878     int i;
3879    
3880     for (i=0;i<N_FREGS;i++)
3881     live.spos[i]=-2;
3882     live.tos=-1; /* Stack is empty */
3883     }
3884    
3885     static __inline__ void raw_fp_cleanup_drop(void)
3886     {
3887     #if 0
3888     /* using FINIT instead of popping all the entries.
3889     Seems to have side effects --- there is display corruption in
3890     Quake when this is used */
3891     if (live.tos>1) {
3892     emit_byte(0x9b);
3893     emit_byte(0xdb);
3894     emit_byte(0xe3);
3895     live.tos=-1;
3896     }
3897     #endif
3898     while (live.tos>=1) {
3899     emit_byte(0xde);
3900     emit_byte(0xd9);
3901     live.tos-=2;
3902     }
3903     while (live.tos>=0) {
3904     emit_byte(0xdd);
3905     emit_byte(0xd8);
3906     live.tos--;
3907     }
3908     raw_fp_init();
3909     }
3910    
3911     static __inline__ void make_tos(int r)
3912     {
3913     int p,q;
3914    
3915     if (live.spos[r]<0) { /* Register not yet on stack */
3916     emit_byte(0xd9);
3917     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3918     live.tos++;
3919     live.spos[r]=live.tos;
3920     live.onstack[live.tos]=r;
3921     return;
3922     }
3923     /* Register is on stack */
3924     if (live.tos==live.spos[r])
3925     return;
3926     p=live.spos[r];
3927     q=live.onstack[live.tos];
3928    
3929     emit_byte(0xd9);
3930     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3931     live.onstack[live.tos]=r;
3932     live.spos[r]=live.tos;
3933     live.onstack[p]=q;
3934     live.spos[q]=p;
3935     }
3936    
3937     static __inline__ void make_tos2(int r, int r2)
3938     {
3939     int q;
3940    
3941     make_tos(r2); /* Put the reg that's supposed to end up in position2
3942     on top */
3943    
3944     if (live.spos[r]<0) { /* Register not yet on stack */
3945     make_tos(r); /* This will extend the stack */
3946     return;
3947     }
3948     /* Register is on stack */
3949     emit_byte(0xd9);
3950     emit_byte(0xc9); /* Move r2 into position 2 */
3951    
3952     q=live.onstack[live.tos-1];
3953     live.onstack[live.tos]=q;
3954     live.spos[q]=live.tos;
3955     live.onstack[live.tos-1]=r2;
3956     live.spos[r2]=live.tos-1;
3957    
3958     make_tos(r); /* And r into 1 */
3959     }
3960    
3961     static __inline__ int stackpos(int r)
3962     {
3963     if (live.spos[r]<0)
3964     abort();
3965     if (live.tos<live.spos[r]) {
3966     printf("Looking for spos for fnreg %d\n",r);
3967     abort();
3968     }
3969     return live.tos-live.spos[r];
3970     }
3971    
3972     static __inline__ void usereg(int r)
3973     {
3974     if (live.spos[r]<0)
3975     make_tos(r);
3976     }
3977    
3978     /* This is called with one FP value in a reg *above* tos, which it will
3979     pop off the stack if necessary */
3980     static __inline__ void tos_make(int r)
3981     {
3982     if (live.spos[r]<0) {
3983     live.tos++;
3984     live.spos[r]=live.tos;
3985     live.onstack[live.tos]=r;
3986     return;
3987     }
3988     emit_byte(0xdd);
3989     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3990     and pop it*/
3991     }
3992 gbeauche 1.23
3993     /* FP helper functions */
3994     #if USE_NEW_RTASM
3995     #define DEFINE_OP(NAME, GEN) \
3996     static inline void raw_##NAME(uint32 m) \
3997     { \
3998     GEN(m, X86_NOREG, X86_NOREG, 1); \
3999     }
4000     DEFINE_OP(fstl, FSTLm);
4001     DEFINE_OP(fstpl, FSTPLm);
4002     DEFINE_OP(fldl, FLDLm);
4003     DEFINE_OP(fildl, FILDLm);
4004     DEFINE_OP(fistl, FISTLm);
4005     DEFINE_OP(flds, FLDSm);
4006     DEFINE_OP(fsts, FSTSm);
4007     DEFINE_OP(fstpt, FSTPTm);
4008     DEFINE_OP(fldt, FLDTm);
4009     #else
4010     #define DEFINE_OP(NAME, OP1, OP2) \
4011     static inline void raw_##NAME(uint32 m) \
4012     { \
4013     emit_byte(OP1); \
4014     emit_byte(OP2); \
4015     emit_long(m); \
4016     }
4017     DEFINE_OP(fstl, 0xdd, 0x15);
4018     DEFINE_OP(fstpl, 0xdd, 0x1d);
4019     DEFINE_OP(fldl, 0xdd, 0x05);
4020     DEFINE_OP(fildl, 0xdb, 0x05);
4021     DEFINE_OP(fistl, 0xdb, 0x15);
4022     DEFINE_OP(flds, 0xd9, 0x05);
4023     DEFINE_OP(fsts, 0xd9, 0x15);
4024     DEFINE_OP(fstpt, 0xdb, 0x3d);
4025     DEFINE_OP(fldt, 0xdb, 0x2d);
4026     #endif
4027     #undef DEFINE_OP
4028    
4029 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4030     {
4031     make_tos(r);
4032 gbeauche 1.23 raw_fstl(m);
4033 gbeauche 1.1 }
4034     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4035    
4036     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4037     {
4038     make_tos(r);
4039 gbeauche 1.23 raw_fstpl(m);
4040 gbeauche 1.1 live.onstack[live.tos]=-1;
4041     live.tos--;
4042     live.spos[r]=-2;
4043     }
4044     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4045    
4046     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4047     {
4048 gbeauche 1.23 raw_fldl(m);
4049 gbeauche 1.1 tos_make(r);
4050     }
4051     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4052    
4053     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4054     {
4055 gbeauche 1.23 raw_fildl(m);
4056 gbeauche 1.1 tos_make(r);
4057     }
4058     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4059    
4060     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4061     {
4062     make_tos(r);
4063 gbeauche 1.23 raw_fistl(m);
4064 gbeauche 1.1 }
4065     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4066    
4067     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4068     {
4069 gbeauche 1.23 raw_flds(m);
4070 gbeauche 1.1 tos_make(r);
4071     }
4072     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4073    
4074     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4075     {
4076     make_tos(r);
4077 gbeauche 1.23 raw_fsts(m);
4078 gbeauche 1.1 }
4079     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4080    
4081     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4082     {
4083     int rs;
4084    
4085     /* Stupid x87 can't write a long double to mem without popping the
4086     stack! */
4087     usereg(r);
4088     rs=stackpos(r);
4089     emit_byte(0xd9); /* Get a copy to the top of stack */
4090     emit_byte(0xc0+rs);
4091    
4092 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4093 gbeauche 1.1 }
4094     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4095    
4096     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4097     {
4098     int rs;
4099    
4100     make_tos(r);
4101 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4102 gbeauche 1.1 live.onstack[live.tos]=-1;
4103     live.tos--;
4104     live.spos[r]=-2;
4105     }
4106     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4107    
4108     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4109     {
4110 gbeauche 1.23 raw_fldt(m);
4111 gbeauche 1.1 tos_make(r);
4112     }
4113     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4114    
4115     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4116     {
4117     emit_byte(0xd9);
4118     emit_byte(0xeb);
4119     tos_make(r);
4120     }
4121     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4122    
4123     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4124     {
4125     emit_byte(0xd9);
4126     emit_byte(0xec);
4127     tos_make(r);
4128     }
4129     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4130    
4131     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4132     {
4133     emit_byte(0xd9);
4134     emit_byte(0xea);
4135     tos_make(r);
4136     }
4137     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4138    
4139     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4140     {
4141     emit_byte(0xd9);
4142     emit_byte(0xed);
4143     tos_make(r);
4144     }
4145     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4146    
4147     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4148     {
4149     emit_byte(0xd9);
4150     emit_byte(0xe8);
4151     tos_make(r);
4152     }
4153     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4154    
4155     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4156     {
4157     emit_byte(0xd9);
4158     emit_byte(0xee);
4159     tos_make(r);
4160     }
4161     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4162    
4163     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4164     {
4165     int ds;
4166    
4167     usereg(s);
4168     ds=stackpos(s);
4169     if (ds==0 && live.spos[d]>=0) {
4170     /* source is on top of stack, and we already have the dest */
4171     int dd=stackpos(d);
4172     emit_byte(0xdd);
4173     emit_byte(0xd0+dd);
4174     }
4175     else {
4176     emit_byte(0xd9);
4177     emit_byte(0xc0+ds); /* duplicate source on tos */
4178     tos_make(d); /* store to destination, pop if necessary */
4179     }
4180     }
4181     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4182    
4183     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4184     {
4185     emit_byte(0xd9);
4186     emit_byte(0xa8+index);
4187     emit_long(base);
4188     }
4189     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4190    
4191    
4192     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4193     {
4194     int ds;
4195    
4196     if (d!=s) {
4197     usereg(s);
4198     ds=stackpos(s);
4199     emit_byte(0xd9);
4200     emit_byte(0xc0+ds); /* duplicate source */
4201     emit_byte(0xd9);
4202     emit_byte(0xfa); /* take square root */
4203     tos_make(d); /* store to destination */
4204     }
4205     else {
4206     make_tos(d);
4207     emit_byte(0xd9);
4208     emit_byte(0xfa); /* take square root */
4209     }
4210     }
4211     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4212    
4213     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4214     {
4215     int ds;
4216    
4217     if (d!=s) {
4218     usereg(s);
4219     ds=stackpos(s);
4220     emit_byte(0xd9);
4221     emit_byte(0xc0+ds); /* duplicate source */
4222     emit_byte(0xd9);
4223     emit_byte(0xe1); /* take fabs */
4224     tos_make(d); /* store to destination */
4225     }
4226     else {
4227     make_tos(d);
4228     emit_byte(0xd9);
4229     emit_byte(0xe1); /* take fabs */
4230     }
4231     }
4232     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4233    
4234     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4235     {
4236     int ds;
4237    
4238     if (d!=s) {
4239     usereg(s);
4240     ds=stackpos(s);
4241     emit_byte(0xd9);
4242     emit_byte(0xc0+ds); /* duplicate source */
4243     emit_byte(0xd9);
4244     emit_byte(0xfc); /* take frndint */
4245     tos_make(d); /* store to destination */
4246     }
4247     else {
4248     make_tos(d);
4249     emit_byte(0xd9);
4250     emit_byte(0xfc); /* take frndint */
4251     }
4252     }
4253     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4254    
4255     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4256     {
4257     int ds;
4258    
4259     if (d!=s) {
4260     usereg(s);
4261     ds=stackpos(s);
4262     emit_byte(0xd9);
4263     emit_byte(0xc0+ds); /* duplicate source */
4264     emit_byte(0xd9);
4265     emit_byte(0xff); /* take cos */
4266     tos_make(d); /* store to destination */
4267     }
4268     else {
4269     make_tos(d);
4270     emit_byte(0xd9);
4271     emit_byte(0xff); /* take cos */
4272     }
4273     }
4274     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4275    
4276     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4277     {
4278     int ds;
4279    
4280     if (d!=s) {
4281     usereg(s);
4282     ds=stackpos(s);
4283     emit_byte(0xd9);
4284     emit_byte(0xc0+ds); /* duplicate source */
4285     emit_byte(0xd9);
4286     emit_byte(0xfe); /* take sin */
4287     tos_make(d); /* store to destination */
4288     }
4289     else {
4290     make_tos(d);
4291     emit_byte(0xd9);
4292     emit_byte(0xfe); /* take sin */
4293     }
4294     }
4295     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4296    
4297     double one=1;
4298     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4299     {
4300     int ds;
4301    
4302     usereg(s);
4303     ds=stackpos(s);
4304     emit_byte(0xd9);
4305     emit_byte(0xc0+ds); /* duplicate source */
4306    
4307     emit_byte(0xd9);
4308     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4309     emit_byte(0xd9);
4310     emit_byte(0xfc); /* rndint */
4311     emit_byte(0xd9);
4312     emit_byte(0xc9); /* swap top two elements */
4313     emit_byte(0xd8);
4314     emit_byte(0xe1); /* subtract rounded from original */
4315     emit_byte(0xd9);
4316     emit_byte(0xf0); /* f2xm1 */
4317     emit_byte(0xdc);
4318     emit_byte(0x05);
4319 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4320 gbeauche 1.1 emit_byte(0xd9);
4321     emit_byte(0xfd); /* and scale it */
4322     emit_byte(0xdd);
4323     emit_byte(0xd9); /* take he rounded value off */
4324     tos_make(d); /* store to destination */
4325     }
4326     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4327    
4328     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4329     {
4330     int ds;
4331    
4332     usereg(s);
4333     ds=stackpos(s);
4334     emit_byte(0xd9);
4335     emit_byte(0xc0+ds); /* duplicate source */
4336     emit_byte(0xd9);
4337     emit_byte(0xea); /* fldl2e */
4338     emit_byte(0xde);
4339     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4340    
4341     emit_byte(0xd9);
4342     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4343     emit_byte(0xd9);
4344     emit_byte(0xfc); /* rndint */
4345     emit_byte(0xd9);
4346     emit_byte(0xc9); /* swap top two elements */
4347     emit_byte(0xd8);
4348     emit_byte(0xe1); /* subtract rounded from original */
4349     emit_byte(0xd9);
4350     emit_byte(0xf0); /* f2xm1 */
4351     emit_byte(0xdc);
4352     emit_byte(0x05);
4353 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4354 gbeauche 1.1 emit_byte(0xd9);
4355     emit_byte(0xfd); /* and scale it */
4356     emit_byte(0xdd);
4357     emit_byte(0xd9); /* take he rounded value off */
4358     tos_make(d); /* store to destination */
4359     }
4360     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4361    
4362     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4363     {
4364     int ds;
4365    
4366     usereg(s);
4367     ds=stackpos(s);
4368     emit_byte(0xd9);
4369     emit_byte(0xc0+ds); /* duplicate source */
4370     emit_byte(0xd9);
4371     emit_byte(0xe8); /* push '1' */
4372     emit_byte(0xd9);
4373     emit_byte(0xc9); /* swap top two */
4374     emit_byte(0xd9);
4375     emit_byte(0xf1); /* take 1*log2(x) */
4376     tos_make(d); /* store to destination */
4377     }
4378     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4379    
4380    
4381     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4382     {
4383     int ds;
4384    
4385     if (d!=s) {
4386     usereg(s);
4387     ds=stackpos(s);
4388     emit_byte(0xd9);
4389     emit_byte(0xc0+ds); /* duplicate source */
4390     emit_byte(0xd9);
4391     emit_byte(0xe0); /* take fchs */
4392     tos_make(d); /* store to destination */
4393     }
4394     else {
4395     make_tos(d);
4396     emit_byte(0xd9);
4397     emit_byte(0xe0); /* take fchs */
4398     }
4399     }
4400     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4401    
4402     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4403     {
4404     int ds;
4405    
4406     usereg(s);
4407     usereg(d);
4408    
4409     if (live.spos[s]==live.tos) {
4410     /* Source is on top of stack */
4411     ds=stackpos(d);
4412     emit_byte(0xdc);
4413     emit_byte(0xc0+ds); /* add source to dest*/
4414     }
4415     else {
4416     make_tos(d);
4417     ds=stackpos(s);
4418    
4419     emit_byte(0xd8);
4420     emit_byte(0xc0+ds); /* add source to dest*/
4421     }
4422     }
4423     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4424    
4425     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4426     {
4427     int ds;
4428    
4429     usereg(s);
4430     usereg(d);
4431    
4432     if (live.spos[s]==live.tos) {
4433     /* Source is on top of stack */
4434     ds=stackpos(d);
4435     emit_byte(0xdc);
4436     emit_byte(0xe8+ds); /* sub source from dest*/
4437     }
4438     else {
4439     make_tos(d);
4440     ds=stackpos(s);
4441    
4442     emit_byte(0xd8);
4443     emit_byte(0xe0+ds); /* sub src from dest */
4444     }
4445     }
4446     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4447    
4448     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4449     {
4450     int ds;
4451    
4452     usereg(s);
4453     usereg(d);
4454    
4455     make_tos(d);
4456     ds=stackpos(s);
4457    
4458     emit_byte(0xdd);
4459     emit_byte(0xe0+ds); /* cmp dest with source*/
4460     }
4461     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4462    
4463     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4464     {
4465     int ds;
4466    
4467     usereg(s);
4468     usereg(d);
4469    
4470     if (live.spos[s]==live.tos) {
4471     /* Source is on top of stack */
4472     ds=stackpos(d);
4473     emit_byte(0xdc);
4474     emit_byte(0xc8+ds); /* mul dest by source*/
4475     }
4476     else {
4477     make_tos(d);
4478     ds=stackpos(s);
4479    
4480     emit_byte(0xd8);
4481     emit_byte(0xc8+ds); /* mul dest by source*/
4482     }
4483     }
4484     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4485    
4486     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4487     {
4488     int ds;
4489    
4490     usereg(s);
4491     usereg(d);
4492    
4493     if (live.spos[s]==live.tos) {
4494     /* Source is on top of stack */
4495     ds=stackpos(d);
4496     emit_byte(0xdc);
4497     emit_byte(0xf8+ds); /* div dest by source */
4498     }
4499     else {
4500     make_tos(d);
4501     ds=stackpos(s);
4502    
4503     emit_byte(0xd8);
4504     emit_byte(0xf0+ds); /* div dest by source*/
4505     }
4506     }
4507     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4508    
4509     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4510     {
4511     int ds;
4512    
4513     usereg(s);
4514     usereg(d);
4515    
4516     make_tos2(d,s);
4517     ds=stackpos(s);
4518    
4519     if (ds!=1) {
4520     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4521     abort();
4522     }
4523     emit_byte(0xd9);
4524     emit_byte(0xf8); /* take rem from dest by source */
4525     }
4526     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4527    
4528     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4529     {
4530     int ds;
4531    
4532     usereg(s);
4533     usereg(d);
4534    
4535     make_tos2(d,s);
4536     ds=stackpos(s);
4537    
4538     if (ds!=1) {
4539     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4540     abort();
4541     }
4542     emit_byte(0xd9);
4543     emit_byte(0xf5); /* take rem1 from dest by source */
4544     }
4545     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4546    
4547    
4548     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4549     {
4550     make_tos(r);
4551     emit_byte(0xd9); /* ftst */
4552     emit_byte(0xe4);
4553     }
4554     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4555    
4556     /* %eax register is clobbered if target processor doesn't support fucomi */
4557     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4558     #define FFLAG_NREG EAX_INDEX
4559    
4560     static __inline__ void raw_fflags_into_flags(int r)
4561     {
4562     int p;
4563    
4564     usereg(r);
4565     p=stackpos(r);
4566    
4567     emit_byte(0xd9);
4568     emit_byte(0xee); /* Push 0 */
4569     emit_byte(0xd9);
4570     emit_byte(0xc9+p); /* swap top two around */
4571     if (have_cmov) {
4572     // gb-- fucomi is for P6 cores only, not K6-2 then...
4573     emit_byte(0xdb);
4574     emit_byte(0xe9+p); /* fucomi them */
4575     }
4576     else {
4577     emit_byte(0xdd);
4578     emit_byte(0xe1+p); /* fucom them */
4579     emit_byte(0x9b);
4580     emit_byte(0xdf);
4581     emit_byte(0xe0); /* fstsw ax */
4582     raw_sahf(0); /* sahf */
4583     }
4584     emit_byte(0xdd);
4585     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4586     }