ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.35
Committed: 2007-01-13T18:21:30Z (17 years, 8 months ago) by gbeauche
Branch: MAIN
Changes since 1.34: +37 -31 lines
Log Message:
Remove the 33-bit addressing hack as it's overly complex for not much gain.
Rather, use an address override prefix (0x67) though Intel Core optimization
reference guide says to avoid LCP prefixes. In practise, impact on performance
is measurably marginal on e.g. Speedometer tests.

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.26 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 gbeauche 1.26 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.33 /* XXX this has to match X86_Reg8H_Base + 4 */
56     #define AH_INDEX (0x10+4+EAX_INDEX)
57     #define CH_INDEX (0x10+4+ECX_INDEX)
58     #define DH_INDEX (0x10+4+EDX_INDEX)
59     #define BH_INDEX (0x10+4+EBX_INDEX)
60 gbeauche 1.1
61     /* The register in which subroutines return an integer return value */
62 gbeauche 1.20 #define REG_RESULT EAX_INDEX
63 gbeauche 1.1
64     /* The registers subroutines take their first and second argument in */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     /* Handle the _fastcall parameters of ECX and EDX */
67 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
68     #define REG_PAR2 EDX_INDEX
69     #elif defined(__x86_64__)
70     #define REG_PAR1 EDI_INDEX
71     #define REG_PAR2 ESI_INDEX
72 gbeauche 1.1 #else
73 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
74     #define REG_PAR2 EDX_INDEX
75 gbeauche 1.1 #endif
76    
77 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
78 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
79 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
80 gbeauche 1.1 #else
81 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
82 gbeauche 1.1 #endif
83    
84 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
85 gbeauche 1.1 -1 if any reg will do */
86 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
87     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
88 gbeauche 1.1
89 gbeauche 1.31 #define STACK_ALIGN 16
90     #define STACK_OFFSET sizeof(void *)
91    
92 gbeauche 1.1 uae_s8 always_used[]={4,-1};
93 gbeauche 1.20 #if defined(__x86_64__)
94     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
95     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
96     #else
97 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
98     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
99 gbeauche 1.20 #endif
100 gbeauche 1.1
101 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
102     /* Make sure interpretive core does not use cpuopti */
103     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
104 gbeauche 1.20 #error FIXME: code not ready
105 gbeauche 1.17 #else
106 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
107     by the caller */
108 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
109 gbeauche 1.17 #endif
110 gbeauche 1.1
111     /* This *should* be the same as call_saved. But:
112     - We might not really know which registers are saved, and which aren't,
113     so we need to preserve some, but don't want to rely on everyone else
114     also saving those registers
115     - Special registers (such like the stack pointer) should not be "preserved"
116     by pushing, even though they are "saved" across function calls
117     */
118 gbeauche 1.21 #if defined(__x86_64__)
119 gbeauche 1.32 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
120 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
121     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
122 gbeauche 1.21 #else
123 gbeauche 1.32 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
124     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
125 gbeauche 1.21 #endif
126 gbeauche 1.1
127     /* Whether classes of instructions do or don't clobber the native flags */
128     #define CLOBBER_MOV
129     #define CLOBBER_LEA
130     #define CLOBBER_CMOV
131     #define CLOBBER_POP
132     #define CLOBBER_PUSH
133     #define CLOBBER_SUB clobber_flags()
134     #define CLOBBER_SBB clobber_flags()
135     #define CLOBBER_CMP clobber_flags()
136     #define CLOBBER_ADD clobber_flags()
137     #define CLOBBER_ADC clobber_flags()
138     #define CLOBBER_AND clobber_flags()
139     #define CLOBBER_OR clobber_flags()
140     #define CLOBBER_XOR clobber_flags()
141    
142     #define CLOBBER_ROL clobber_flags()
143     #define CLOBBER_ROR clobber_flags()
144     #define CLOBBER_SHLL clobber_flags()
145     #define CLOBBER_SHRL clobber_flags()
146     #define CLOBBER_SHRA clobber_flags()
147     #define CLOBBER_TEST clobber_flags()
148     #define CLOBBER_CL16
149     #define CLOBBER_CL8
150 gbeauche 1.20 #define CLOBBER_SE32
151 gbeauche 1.1 #define CLOBBER_SE16
152     #define CLOBBER_SE8
153 gbeauche 1.20 #define CLOBBER_ZE32
154 gbeauche 1.1 #define CLOBBER_ZE16
155     #define CLOBBER_ZE8
156     #define CLOBBER_SW16 clobber_flags()
157     #define CLOBBER_SW32
158     #define CLOBBER_SETCC
159     #define CLOBBER_MUL clobber_flags()
160     #define CLOBBER_BT clobber_flags()
161     #define CLOBBER_BSF clobber_flags()
162    
163 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
164 gbeauche 1.20 #if defined(__x86_64__)
165     #define USE_NEW_RTASM 1
166     #endif
167    
168     #if USE_NEW_RTASM
169 gbeauche 1.13
170     #if defined(__x86_64__)
171     #define X86_TARGET_64BIT 1
172 gbeauche 1.35 /* The address override prefix causes a 5 cycles penalty on Intel Core
173     processors. Another solution would be to decompose the load in an LEA,
174     MOV (to zero-extend), MOV (from memory): is it better? */
175     #define ADDR32 x86_emit_byte(0x67),
176     #else
177     #define ADDR32 /**/
178 gbeauche 1.13 #endif
179     #define X86_FLAT_REGISTERS 0
180 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
181     #define X86_OPTIMIZE_ROTSHI 1
182 gbeauche 1.13 #include "codegen_x86.h"
183    
184     #define x86_emit_byte(B) emit_byte(B)
185     #define x86_emit_word(W) emit_word(W)
186     #define x86_emit_long(L) emit_long(L)
187 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
188 gbeauche 1.13 #define x86_get_target() get_target()
189     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
190    
191     static void jit_fail(const char *msg, const char *file, int line, const char *function)
192     {
193     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
194     function, file, line, msg);
195     abort();
196     }
197    
198     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
199     {
200 gbeauche 1.20 #if defined(__x86_64__)
201     PUSHQr(r);
202     #else
203 gbeauche 1.13 PUSHLr(r);
204 gbeauche 1.20 #endif
205 gbeauche 1.13 }
206     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
207    
208     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
209     {
210 gbeauche 1.20 #if defined(__x86_64__)
211     POPQr(r);
212     #else
213 gbeauche 1.13 POPLr(r);
214 gbeauche 1.20 #endif
215 gbeauche 1.13 }
216     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
217    
218 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
219     {
220     #if defined(__x86_64__)
221     POPQm(d, X86_NOREG, X86_NOREG, 1);
222     #else
223     POPLm(d, X86_NOREG, X86_NOREG, 1);
224     #endif
225     }
226     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
227    
228 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
229     {
230     BTLir(i, r);
231     }
232     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
233    
234     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
235     {
236     BTLrr(b, r);
237     }
238     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
239    
240     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
241     {
242     BTCLir(i, r);
243     }
244     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
245    
246     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
247     {
248     BTCLrr(b, r);
249     }
250     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
251    
252     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
253     {
254     BTRLir(i, r);
255     }
256     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
257    
258     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
259     {
260     BTRLrr(b, r);
261     }
262     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
263    
264     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
265     {
266     BTSLir(i, r);
267     }
268     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
269    
270     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
271     {
272     BTSLrr(b, r);
273     }
274     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
275    
276     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
277     {
278     SUBWir(i, d);
279     }
280     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
281    
282     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
283     {
284     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
285     }
286     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
287    
288     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
289     {
290     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
291     }
292     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
293    
294     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
295     {
296     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
297     }
298     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
299    
300     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
301     {
302     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
303     }
304     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
305    
306     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
307     {
308     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
309     }
310     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
311    
312     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
313     {
314     ROLBir(i, r);
315     }
316     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
317    
318     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
319     {
320     ROLWir(i, r);
321     }
322     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
323    
324     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
325     {
326     ROLLir(i, r);
327     }
328     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
329    
330     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
331     {
332     ROLLrr(r, d);
333     }
334     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
335    
336     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
337     {
338     ROLWrr(r, d);
339     }
340     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
341    
342     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
343     {
344     ROLBrr(r, d);
345     }
346     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
347    
348     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
349     {
350     SHLLrr(r, d);
351     }
352     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
353    
354     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
355     {
356     SHLWrr(r, d);
357     }
358     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
359    
360     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
361     {
362     SHLBrr(r, d);
363     }
364     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
365    
366     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
367     {
368     RORBir(i, r);
369     }
370     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
371    
372     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
373     {
374     RORWir(i, r);
375     }
376     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
377    
378     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
379     {
380     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
381     }
382     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
383    
384     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
385     {
386     RORLir(i, r);
387     }
388     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
389    
390     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
391     {
392     RORLrr(r, d);
393     }
394     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
395    
396     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
397     {
398     RORWrr(r, d);
399     }
400     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
401    
402     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
403     {
404     RORBrr(r, d);
405     }
406     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
407    
408     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
409     {
410     SHRLrr(r, d);
411     }
412     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
413    
414     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
415     {
416     SHRWrr(r, d);
417     }
418     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
419    
420     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
421     {
422     SHRBrr(r, d);
423     }
424     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
425    
426     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
427     {
428 gbeauche 1.14 SARLrr(r, d);
429 gbeauche 1.13 }
430     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
431    
432     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
433     {
434 gbeauche 1.14 SARWrr(r, d);
435 gbeauche 1.13 }
436     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
437    
438     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
439     {
440 gbeauche 1.14 SARBrr(r, d);
441 gbeauche 1.13 }
442     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
443    
444     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
445     {
446     SHLLir(i, r);
447     }
448     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
449    
450     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
451     {
452     SHLWir(i, r);
453     }
454     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
455    
456     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
457     {
458     SHLBir(i, r);
459     }
460     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
461    
462     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
463     {
464     SHRLir(i, r);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
469     {
470     SHRWir(i, r);
471     }
472     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
473    
474     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
475     {
476     SHRBir(i, r);
477     }
478     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
479    
480     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
481     {
482 gbeauche 1.14 SARLir(i, r);
483 gbeauche 1.13 }
484     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
485    
486     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
487     {
488 gbeauche 1.14 SARWir(i, r);
489 gbeauche 1.13 }
490     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
491    
492     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
493     {
494 gbeauche 1.14 SARBir(i, r);
495 gbeauche 1.13 }
496     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
497    
498     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
499     {
500     SAHF();
501     }
502     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
503    
504     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
505     {
506     CPUID();
507     }
508     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
509    
510     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
511     {
512     LAHF();
513     }
514     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
515    
516     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
517     {
518     SETCCir(cc, d);
519     }
520     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
521    
522     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
523     {
524     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
525     }
526     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
527    
528     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
529     {
530 gbeauche 1.15 if (have_cmov)
531     CMOVLrr(cc, s, d);
532     else { /* replacement using branch and mov */
533     #if defined(__x86_64__)
534     write_log("x86-64 implementations are bound to have CMOV!\n");
535     abort();
536     #endif
537     JCCSii(cc^1, 2);
538     MOVLrr(s, d);
539     }
540 gbeauche 1.13 }
541     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
542    
543     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
544     {
545     BSFLrr(s, d);
546     }
547     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
548    
549 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
550     {
551     MOVSLQrr(s, d);
552     }
553     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
554    
555 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
556     {
557     MOVSWLrr(s, d);
558     }
559     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
560    
561     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
562     {
563     MOVSBLrr(s, d);
564     }
565     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
566    
567     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
568     {
569     MOVZWLrr(s, d);
570     }
571     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
572    
573     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
574     {
575     MOVZBLrr(s, d);
576     }
577     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
578    
579     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
580     {
581 gbeauche 1.14 IMULLrr(s, d);
582 gbeauche 1.13 }
583     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
584    
585     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
586     {
587 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
588     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
589 gbeauche 1.13 abort();
590 gbeauche 1.14 }
591     IMULLr(s);
592 gbeauche 1.13 }
593     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
594    
595     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
596     {
597 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
598     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
599 gbeauche 1.13 abort();
600 gbeauche 1.14 }
601     MULLr(s);
602 gbeauche 1.13 }
603     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
604    
605     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
606     {
607 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
608 gbeauche 1.13 }
609     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
610    
611     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
612     {
613     MOVBrr(s, d);
614     }
615     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
616    
617     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
618     {
619     MOVWrr(s, d);
620     }
621     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
622    
623     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
624     {
625 gbeauche 1.35 ADDR32 MOVLmr(0, baser, index, factor, d);
626 gbeauche 1.13 }
627     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
628    
629     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
630     {
631 gbeauche 1.35 ADDR32 MOVWmr(0, baser, index, factor, d);
632 gbeauche 1.13 }
633     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
634    
635     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
636     {
637 gbeauche 1.35 ADDR32 MOVBmr(0, baser, index, factor, d);
638 gbeauche 1.13 }
639     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
640    
641     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
642     {
643 gbeauche 1.35 ADDR32 MOVLrm(s, 0, baser, index, factor);
644 gbeauche 1.13 }
645     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
646    
647     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
648     {
649 gbeauche 1.35 ADDR32 MOVWrm(s, 0, baser, index, factor);
650 gbeauche 1.13 }
651     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
652    
653     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
654     {
655 gbeauche 1.35 ADDR32 MOVBrm(s, 0, baser, index, factor);
656 gbeauche 1.13 }
657     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
658    
659     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
660     {
661 gbeauche 1.35 ADDR32 MOVLrm(s, base, baser, index, factor);
662 gbeauche 1.13 }
663     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
664    
665     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
666     {
667 gbeauche 1.35 ADDR32 MOVWrm(s, base, baser, index, factor);
668 gbeauche 1.13 }
669     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
670    
671     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
672     {
673 gbeauche 1.35 ADDR32 MOVBrm(s, base, baser, index, factor);
674 gbeauche 1.13 }
675     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
676    
677     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
678     {
679 gbeauche 1.35 ADDR32 MOVLmr(base, baser, index, factor, d);
680 gbeauche 1.13 }
681     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
682    
683     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
684     {
685 gbeauche 1.35 ADDR32 MOVWmr(base, baser, index, factor, d);
686 gbeauche 1.13 }
687     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
688    
689     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
690     {
691 gbeauche 1.35 ADDR32 MOVBmr(base, baser, index, factor, d);
692 gbeauche 1.13 }
693     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
694    
695     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
696     {
697 gbeauche 1.35 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
698 gbeauche 1.13 }
699     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
700    
701     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
702     {
703 gbeauche 1.15 if (have_cmov)
704 gbeauche 1.35 ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
705 gbeauche 1.15 else { /* replacement using branch and mov */
706     #if defined(__x86_64__)
707     write_log("x86-64 implementations are bound to have CMOV!\n");
708     abort();
709     #endif
710     JCCSii(cond^1, 7);
711 gbeauche 1.35 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
712 gbeauche 1.15 }
713 gbeauche 1.13 }
714     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
715    
716     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
717     {
718 gbeauche 1.15 if (have_cmov)
719     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
720     else { /* replacement using branch and mov */
721     #if defined(__x86_64__)
722     write_log("x86-64 implementations are bound to have CMOV!\n");
723     abort();
724     #endif
725     JCCSii(cond^1, 6);
726     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
727     }
728 gbeauche 1.13 }
729     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
730    
731     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
732     {
733 gbeauche 1.35 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
734 gbeauche 1.13 }
735     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
736    
737     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
738     {
739 gbeauche 1.35 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
740 gbeauche 1.13 }
741     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
742    
743     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
744     {
745 gbeauche 1.35 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
746 gbeauche 1.13 }
747     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
748    
749     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
750     {
751 gbeauche 1.35 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
752 gbeauche 1.13 }
753     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
754    
755     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
756     {
757 gbeauche 1.35 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
758 gbeauche 1.13 }
759     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
760    
761     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
762     {
763 gbeauche 1.35 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
764 gbeauche 1.13 }
765     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
766    
767     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
768     {
769 gbeauche 1.35 ADDR32 MOVLim(i, offset, d, X86_NOREG, 1);
770 gbeauche 1.13 }
771     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
772    
773     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
774     {
775 gbeauche 1.35 ADDR32 MOVWim(i, offset, d, X86_NOREG, 1);
776 gbeauche 1.13 }
777     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
778    
779     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
780     {
781 gbeauche 1.35 ADDR32 MOVBim(i, offset, d, X86_NOREG, 1);
782 gbeauche 1.13 }
783     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
784    
785     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
786     {
787 gbeauche 1.35 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
788 gbeauche 1.13 }
789     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
790    
791     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
792     {
793 gbeauche 1.35 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
794 gbeauche 1.13 }
795     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
796    
797     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
798     {
799 gbeauche 1.35 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
800 gbeauche 1.13 }
801     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
802    
803     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
804     {
805     LEALmr(offset, s, X86_NOREG, 1, d);
806     }
807     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
808    
809     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
810     {
811     LEALmr(offset, s, index, factor, d);
812     }
813     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
814    
815     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
816     {
817     LEALmr(0, s, index, factor, d);
818     }
819     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
820    
821     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
822     {
823 gbeauche 1.35 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
824 gbeauche 1.13 }
825     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
826    
827     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
828     {
829 gbeauche 1.35 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
830 gbeauche 1.13 }
831     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
832    
833     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
834     {
835 gbeauche 1.35 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
836 gbeauche 1.13 }
837     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
838    
839     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
840     {
841     BSWAPLr(r);
842     }
843     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
844    
845     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
846     {
847     ROLWir(8, r);
848     }
849     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
850    
851     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
852     {
853     MOVLrr(s, d);
854     }
855     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
856    
857     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
858     {
859     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
860     }
861     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
862    
863     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
864     {
865     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
866     }
867     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
868    
869     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
870     {
871     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
872     }
873     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
874    
875     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
876     {
877     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
878     }
879     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
880    
881     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
882     {
883     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
884     }
885     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
886    
887     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
888     {
889     MOVLir(s, d);
890     }
891     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
892    
893     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
894     {
895     MOVWir(s, d);
896     }
897     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
898    
899     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
900     {
901     MOVBir(s, d);
902     }
903     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
904    
905     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
906     {
907     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
908     }
909     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
910    
911     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
912     {
913     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
914     }
915     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
916    
917     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
918     {
919     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
920     }
921     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
922    
923     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
924     {
925     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
926     }
927     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
928    
929     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
930     {
931     TESTLir(i, d);
932     }
933     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
934    
935     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
936     {
937     TESTLrr(s, d);
938     }
939     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
940    
941     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
942     {
943     TESTWrr(s, d);
944     }
945     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
946    
947     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
948     {
949     TESTBrr(s, d);
950     }
951     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
952    
953 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
954     {
955     XORLir(i, d);
956     }
957     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
958    
959 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
960     {
961     ANDLir(i, d);
962     }
963     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
964    
965     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
966     {
967     ANDWir(i, d);
968     }
969     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
970    
971     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
972     {
973     ANDLrr(s, d);
974     }
975     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
976    
977     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
978     {
979     ANDWrr(s, d);
980     }
981     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
982    
983     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
984     {
985     ANDBrr(s, d);
986     }
987     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
988    
989     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
990     {
991     ORLir(i, d);
992     }
993     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
994    
995     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
996     {
997     ORLrr(s, d);
998     }
999     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1000    
1001     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1002     {
1003     ORWrr(s, d);
1004     }
1005     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1006    
1007     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1008     {
1009     ORBrr(s, d);
1010     }
1011     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1012    
1013     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1014     {
1015     ADCLrr(s, d);
1016     }
1017     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1018    
1019     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1020     {
1021     ADCWrr(s, d);
1022     }
1023     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1024    
1025     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1026     {
1027     ADCBrr(s, d);
1028     }
1029     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1030    
1031     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1032     {
1033     ADDLrr(s, d);
1034     }
1035     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1036    
1037     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1038     {
1039     ADDWrr(s, d);
1040     }
1041     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1042    
1043     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1044     {
1045     ADDBrr(s, d);
1046     }
1047     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1048    
1049     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1050     {
1051     SUBLir(i, d);
1052     }
1053     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1054    
1055     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1056     {
1057     SUBBir(i, d);
1058     }
1059     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1060    
1061     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1062     {
1063     ADDLir(i, d);
1064     }
1065     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1066    
1067     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1068     {
1069     ADDWir(i, d);
1070     }
1071     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1072    
1073     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1074     {
1075     ADDBir(i, d);
1076     }
1077     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1078    
1079     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1080     {
1081     SBBLrr(s, d);
1082     }
1083     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1084    
1085     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1086     {
1087     SBBWrr(s, d);
1088     }
1089     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1090    
1091     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1092     {
1093     SBBBrr(s, d);
1094     }
1095     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1096    
1097     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1098     {
1099     SUBLrr(s, d);
1100     }
1101     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1102    
1103     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1104     {
1105     SUBWrr(s, d);
1106     }
1107     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1108    
1109     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1110     {
1111     SUBBrr(s, d);
1112     }
1113     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1114    
1115     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1116     {
1117     CMPLrr(s, d);
1118     }
1119     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1120    
1121     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1122     {
1123     CMPLir(i, r);
1124     }
1125     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1126    
1127     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1128     {
1129     CMPWrr(s, d);
1130     }
1131     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1132    
1133     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1134     {
1135     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1136     }
1137     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1138    
1139     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1140     {
1141     CMPBir(i, d);
1142     }
1143     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1144    
1145     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1146     {
1147     CMPBrr(s, d);
1148     }
1149     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1150    
1151     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1152     {
1153 gbeauche 1.35 ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d);
1154 gbeauche 1.13 }
1155     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1156    
1157     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1158     {
1159     XORLrr(s, d);
1160     }
1161     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1162    
1163     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1164     {
1165     XORWrr(s, d);
1166     }
1167     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1168    
1169     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1170     {
1171     XORBrr(s, d);
1172     }
1173     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1174    
1175     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1176     {
1177     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1178     }
1179     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1180    
1181     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1182     {
1183     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1184     }
1185     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1186    
1187     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1188     {
1189     XCHGLrr(r2, r1);
1190     }
1191     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1192    
1193     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1194     {
1195 gbeauche 1.18 PUSHF();
1196 gbeauche 1.13 }
1197     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1198    
1199     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1200     {
1201 gbeauche 1.18 POPF();
1202 gbeauche 1.13 }
1203     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1204    
1205 gbeauche 1.34 /* Generate floating-point instructions */
1206     static inline void x86_fadd_m(MEMR s)
1207     {
1208     FADDLm(s,X86_NOREG,X86_NOREG,1);
1209     }
1210    
1211 gbeauche 1.13 #else
1212    
1213 gbeauche 1.2 const bool optimize_accum = true;
1214 gbeauche 1.1 const bool optimize_imm8 = true;
1215     const bool optimize_shift_once = true;
1216    
1217     /*************************************************************************
1218     * Actual encoding of the instructions on the target CPU *
1219     *************************************************************************/
1220    
1221 gbeauche 1.2 static __inline__ int isaccum(int r)
1222     {
1223     return (r == EAX_INDEX);
1224     }
1225    
1226 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1227     {
1228     return (x>=-128 && x<=127);
1229     }
1230    
1231     static __inline__ int isword(uae_s32 x)
1232     {
1233     return (x>=-32768 && x<=32767);
1234     }
1235    
1236     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1237     {
1238     emit_byte(0x50+r);
1239     }
1240     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1241    
1242     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1243     {
1244     emit_byte(0x58+r);
1245     }
1246     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1247    
1248 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1249     {
1250     emit_byte(0x8f);
1251     emit_byte(0x05);
1252     emit_long(d);
1253     }
1254     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1255    
1256 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1257     {
1258     emit_byte(0x0f);
1259     emit_byte(0xba);
1260     emit_byte(0xe0+r);
1261     emit_byte(i);
1262     }
1263     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1264    
1265     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1266     {
1267     emit_byte(0x0f);
1268     emit_byte(0xa3);
1269     emit_byte(0xc0+8*b+r);
1270     }
1271     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1272    
1273     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1274     {
1275     emit_byte(0x0f);
1276     emit_byte(0xba);
1277     emit_byte(0xf8+r);
1278     emit_byte(i);
1279     }
1280     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1281    
1282     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1283     {
1284     emit_byte(0x0f);
1285     emit_byte(0xbb);
1286     emit_byte(0xc0+8*b+r);
1287     }
1288     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1289    
1290    
1291     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1292     {
1293     emit_byte(0x0f);
1294     emit_byte(0xba);
1295     emit_byte(0xf0+r);
1296     emit_byte(i);
1297     }
1298     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1299    
1300     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1301     {
1302     emit_byte(0x0f);
1303     emit_byte(0xb3);
1304     emit_byte(0xc0+8*b+r);
1305     }
1306     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1307    
1308     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1309     {
1310     emit_byte(0x0f);
1311     emit_byte(0xba);
1312     emit_byte(0xe8+r);
1313     emit_byte(i);
1314     }
1315     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1316    
1317     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1318     {
1319     emit_byte(0x0f);
1320     emit_byte(0xab);
1321     emit_byte(0xc0+8*b+r);
1322     }
1323     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1324    
1325     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1326     {
1327     emit_byte(0x66);
1328     if (isbyte(i)) {
1329     emit_byte(0x83);
1330     emit_byte(0xe8+d);
1331     emit_byte(i);
1332     }
1333     else {
1334 gbeauche 1.2 if (optimize_accum && isaccum(d))
1335     emit_byte(0x2d);
1336     else {
1337 gbeauche 1.1 emit_byte(0x81);
1338     emit_byte(0xe8+d);
1339 gbeauche 1.2 }
1340 gbeauche 1.1 emit_word(i);
1341     }
1342     }
1343     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1344    
1345    
1346     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1347     {
1348     emit_byte(0x8b);
1349     emit_byte(0x05+8*d);
1350     emit_long(s);
1351     }
1352     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1353    
1354     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1355     {
1356     emit_byte(0xc7);
1357     emit_byte(0x05);
1358     emit_long(d);
1359     emit_long(s);
1360     }
1361     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1362    
1363     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1364     {
1365     emit_byte(0x66);
1366     emit_byte(0xc7);
1367     emit_byte(0x05);
1368     emit_long(d);
1369     emit_word(s);
1370     }
1371     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1372    
1373     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1374     {
1375     emit_byte(0xc6);
1376     emit_byte(0x05);
1377     emit_long(d);
1378     emit_byte(s);
1379     }
1380     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1381    
1382     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1383     {
1384     if (optimize_shift_once && (i == 1)) {
1385     emit_byte(0xd0);
1386     emit_byte(0x05);
1387     emit_long(d);
1388     }
1389     else {
1390     emit_byte(0xc0);
1391     emit_byte(0x05);
1392     emit_long(d);
1393     emit_byte(i);
1394     }
1395     }
1396     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1397    
1398     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1399     {
1400     if (optimize_shift_once && (i == 1)) {
1401     emit_byte(0xd0);
1402     emit_byte(0xc0+r);
1403     }
1404     else {
1405     emit_byte(0xc0);
1406     emit_byte(0xc0+r);
1407     emit_byte(i);
1408     }
1409     }
1410     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1411    
1412     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1413     {
1414     emit_byte(0x66);
1415     emit_byte(0xc1);
1416     emit_byte(0xc0+r);
1417     emit_byte(i);
1418     }
1419     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1420    
1421     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1422     {
1423     if (optimize_shift_once && (i == 1)) {
1424     emit_byte(0xd1);
1425     emit_byte(0xc0+r);
1426     }
1427     else {
1428     emit_byte(0xc1);
1429     emit_byte(0xc0+r);
1430     emit_byte(i);
1431     }
1432     }
1433     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1434    
1435     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1436     {
1437     emit_byte(0xd3);
1438     emit_byte(0xc0+d);
1439     }
1440     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1441    
1442     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1443     {
1444     emit_byte(0x66);
1445     emit_byte(0xd3);
1446     emit_byte(0xc0+d);
1447     }
1448     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1449    
1450     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1451     {
1452     emit_byte(0xd2);
1453     emit_byte(0xc0+d);
1454     }
1455     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1456    
1457     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1458     {
1459     emit_byte(0xd3);
1460     emit_byte(0xe0+d);
1461     }
1462     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1463    
1464     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1465     {
1466     emit_byte(0x66);
1467     emit_byte(0xd3);
1468     emit_byte(0xe0+d);
1469     }
1470     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1471    
1472     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1473     {
1474     emit_byte(0xd2);
1475     emit_byte(0xe0+d);
1476     }
1477     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1478    
1479     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1480     {
1481     if (optimize_shift_once && (i == 1)) {
1482     emit_byte(0xd0);
1483     emit_byte(0xc8+r);
1484     }
1485     else {
1486     emit_byte(0xc0);
1487     emit_byte(0xc8+r);
1488     emit_byte(i);
1489     }
1490     }
1491     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1492    
1493     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1494     {
1495     emit_byte(0x66);
1496     emit_byte(0xc1);
1497     emit_byte(0xc8+r);
1498     emit_byte(i);
1499     }
1500     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1501    
1502     // gb-- used for making an fpcr value in compemu_fpp.cpp
1503     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1504     {
1505     emit_byte(0x0b);
1506     emit_byte(0x05+8*d);
1507     emit_long(s);
1508     }
1509     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1510    
1511     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1512     {
1513     if (optimize_shift_once && (i == 1)) {
1514     emit_byte(0xd1);
1515     emit_byte(0xc8+r);
1516     }
1517     else {
1518     emit_byte(0xc1);
1519     emit_byte(0xc8+r);
1520     emit_byte(i);
1521     }
1522     }
1523     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1524    
1525     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1526     {
1527     emit_byte(0xd3);
1528     emit_byte(0xc8+d);
1529     }
1530     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1531    
1532     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1533     {
1534     emit_byte(0x66);
1535     emit_byte(0xd3);
1536     emit_byte(0xc8+d);
1537     }
1538     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1539    
1540     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1541     {
1542     emit_byte(0xd2);
1543     emit_byte(0xc8+d);
1544     }
1545     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1546    
1547     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1548     {
1549     emit_byte(0xd3);
1550     emit_byte(0xe8+d);
1551     }
1552     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1553    
1554     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1555     {
1556     emit_byte(0x66);
1557     emit_byte(0xd3);
1558     emit_byte(0xe8+d);
1559     }
1560     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1561    
1562     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1563     {
1564     emit_byte(0xd2);
1565     emit_byte(0xe8+d);
1566     }
1567     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1568    
1569     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1570     {
1571     emit_byte(0xd3);
1572     emit_byte(0xf8+d);
1573     }
1574     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1575    
1576     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1577     {
1578     emit_byte(0x66);
1579     emit_byte(0xd3);
1580     emit_byte(0xf8+d);
1581     }
1582     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1583    
1584     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1585     {
1586     emit_byte(0xd2);
1587     emit_byte(0xf8+d);
1588     }
1589     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1590    
1591     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1592     {
1593     if (optimize_shift_once && (i == 1)) {
1594     emit_byte(0xd1);
1595     emit_byte(0xe0+r);
1596     }
1597     else {
1598     emit_byte(0xc1);
1599     emit_byte(0xe0+r);
1600     emit_byte(i);
1601     }
1602     }
1603     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1604    
1605     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1606     {
1607     emit_byte(0x66);
1608     emit_byte(0xc1);
1609     emit_byte(0xe0+r);
1610     emit_byte(i);
1611     }
1612     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1613    
1614     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1615     {
1616     if (optimize_shift_once && (i == 1)) {
1617     emit_byte(0xd0);
1618     emit_byte(0xe0+r);
1619     }
1620     else {
1621     emit_byte(0xc0);
1622     emit_byte(0xe0+r);
1623     emit_byte(i);
1624     }
1625     }
1626     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1627    
1628     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1629     {
1630     if (optimize_shift_once && (i == 1)) {
1631     emit_byte(0xd1);
1632     emit_byte(0xe8+r);
1633     }
1634     else {
1635     emit_byte(0xc1);
1636     emit_byte(0xe8+r);
1637     emit_byte(i);
1638     }
1639     }
1640     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1641    
1642     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1643     {
1644     emit_byte(0x66);
1645     emit_byte(0xc1);
1646     emit_byte(0xe8+r);
1647     emit_byte(i);
1648     }
1649     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1650    
1651     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1652     {
1653     if (optimize_shift_once && (i == 1)) {
1654     emit_byte(0xd0);
1655     emit_byte(0xe8+r);
1656     }
1657     else {
1658     emit_byte(0xc0);
1659     emit_byte(0xe8+r);
1660     emit_byte(i);
1661     }
1662     }
1663     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1664    
1665     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1666     {
1667     if (optimize_shift_once && (i == 1)) {
1668     emit_byte(0xd1);
1669     emit_byte(0xf8+r);
1670     }
1671     else {
1672     emit_byte(0xc1);
1673     emit_byte(0xf8+r);
1674     emit_byte(i);
1675     }
1676     }
1677     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1678    
1679     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1680     {
1681     emit_byte(0x66);
1682     emit_byte(0xc1);
1683     emit_byte(0xf8+r);
1684     emit_byte(i);
1685     }
1686     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1687    
1688     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1689     {
1690     if (optimize_shift_once && (i == 1)) {
1691     emit_byte(0xd0);
1692     emit_byte(0xf8+r);
1693     }
1694     else {
1695     emit_byte(0xc0);
1696     emit_byte(0xf8+r);
1697     emit_byte(i);
1698     }
1699     }
1700     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1701    
1702     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1703     {
1704     emit_byte(0x9e);
1705     }
1706     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1707    
1708     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1709     {
1710     emit_byte(0x0f);
1711     emit_byte(0xa2);
1712     }
1713     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1714    
1715     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1716     {
1717     emit_byte(0x9f);
1718     }
1719     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1720    
1721     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1722     {
1723     emit_byte(0x0f);
1724     emit_byte(0x90+cc);
1725     emit_byte(0xc0+d);
1726     }
1727     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1728    
1729     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1730     {
1731     emit_byte(0x0f);
1732     emit_byte(0x90+cc);
1733     emit_byte(0x05);
1734     emit_long(d);
1735     }
1736     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1737    
1738     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1739     {
1740     if (have_cmov) {
1741     emit_byte(0x0f);
1742     emit_byte(0x40+cc);
1743     emit_byte(0xc0+8*d+s);
1744     }
1745     else { /* replacement using branch and mov */
1746     int uncc=(cc^1);
1747     emit_byte(0x70+uncc);
1748     emit_byte(2); /* skip next 2 bytes if not cc=true */
1749     emit_byte(0x89);
1750     emit_byte(0xc0+8*s+d);
1751     }
1752     }
1753     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1754    
1755     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1756     {
1757     emit_byte(0x0f);
1758     emit_byte(0xbc);
1759     emit_byte(0xc0+8*d+s);
1760     }
1761     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1762    
1763     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1764     {
1765     emit_byte(0x0f);
1766     emit_byte(0xbf);
1767     emit_byte(0xc0+8*d+s);
1768     }
1769     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1770    
1771     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1772     {
1773     emit_byte(0x0f);
1774     emit_byte(0xbe);
1775     emit_byte(0xc0+8*d+s);
1776     }
1777     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1778    
1779     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1780     {
1781     emit_byte(0x0f);
1782     emit_byte(0xb7);
1783     emit_byte(0xc0+8*d+s);
1784     }
1785     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1786    
1787     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1788     {
1789     emit_byte(0x0f);
1790     emit_byte(0xb6);
1791     emit_byte(0xc0+8*d+s);
1792     }
1793     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1794    
1795     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1796     {
1797     emit_byte(0x0f);
1798     emit_byte(0xaf);
1799     emit_byte(0xc0+8*d+s);
1800     }
1801     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1802    
1803     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1804     {
1805     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1806     abort();
1807     emit_byte(0xf7);
1808     emit_byte(0xea);
1809     }
1810     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1811    
1812     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1813     {
1814     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1815     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1816     abort();
1817     }
1818     emit_byte(0xf7);
1819     emit_byte(0xe2);
1820     }
1821     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1822    
1823     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1824     {
1825     abort(); /* %^$&%^$%#^ x86! */
1826     emit_byte(0x0f);
1827     emit_byte(0xaf);
1828     emit_byte(0xc0+8*d+s);
1829     }
1830     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1831    
1832     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1833     {
1834     emit_byte(0x88);
1835     emit_byte(0xc0+8*s+d);
1836     }
1837     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1838    
1839     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1840     {
1841     emit_byte(0x66);
1842     emit_byte(0x89);
1843     emit_byte(0xc0+8*s+d);
1844     }
1845     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1846    
1847     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1848     {
1849     int isebp=(baser==5)?0x40:0;
1850     int fi;
1851    
1852     switch(factor) {
1853     case 1: fi=0; break;
1854     case 2: fi=1; break;
1855     case 4: fi=2; break;
1856     case 8: fi=3; break;
1857     default: abort();
1858     }
1859    
1860    
1861     emit_byte(0x8b);
1862     emit_byte(0x04+8*d+isebp);
1863     emit_byte(baser+8*index+0x40*fi);
1864     if (isebp)
1865     emit_byte(0x00);
1866     }
1867     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1868    
1869     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1870     {
1871     int fi;
1872     int isebp;
1873    
1874     switch(factor) {
1875     case 1: fi=0; break;
1876     case 2: fi=1; break;
1877     case 4: fi=2; break;
1878     case 8: fi=3; break;
1879     default: abort();
1880     }
1881     isebp=(baser==5)?0x40:0;
1882    
1883     emit_byte(0x66);
1884     emit_byte(0x8b);
1885     emit_byte(0x04+8*d+isebp);
1886     emit_byte(baser+8*index+0x40*fi);
1887     if (isebp)
1888     emit_byte(0x00);
1889     }
1890     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1891    
1892     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1893     {
1894     int fi;
1895     int isebp;
1896    
1897     switch(factor) {
1898     case 1: fi=0; break;
1899     case 2: fi=1; break;
1900     case 4: fi=2; break;
1901     case 8: fi=3; break;
1902     default: abort();
1903     }
1904     isebp=(baser==5)?0x40:0;
1905    
1906     emit_byte(0x8a);
1907     emit_byte(0x04+8*d+isebp);
1908     emit_byte(baser+8*index+0x40*fi);
1909     if (isebp)
1910     emit_byte(0x00);
1911     }
1912     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1913    
1914     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1915     {
1916     int fi;
1917     int isebp;
1918    
1919     switch(factor) {
1920     case 1: fi=0; break;
1921     case 2: fi=1; break;
1922     case 4: fi=2; break;
1923     case 8: fi=3; break;
1924     default: abort();
1925     }
1926    
1927    
1928     isebp=(baser==5)?0x40:0;
1929    
1930     emit_byte(0x89);
1931     emit_byte(0x04+8*s+isebp);
1932     emit_byte(baser+8*index+0x40*fi);
1933     if (isebp)
1934     emit_byte(0x00);
1935     }
1936     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1937    
1938     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1939     {
1940     int fi;
1941     int isebp;
1942    
1943     switch(factor) {
1944     case 1: fi=0; break;
1945     case 2: fi=1; break;
1946     case 4: fi=2; break;
1947     case 8: fi=3; break;
1948     default: abort();
1949     }
1950     isebp=(baser==5)?0x40:0;
1951    
1952     emit_byte(0x66);
1953     emit_byte(0x89);
1954     emit_byte(0x04+8*s+isebp);
1955     emit_byte(baser+8*index+0x40*fi);
1956     if (isebp)
1957     emit_byte(0x00);
1958     }
1959     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1960    
1961     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1962     {
1963     int fi;
1964     int isebp;
1965    
1966     switch(factor) {
1967     case 1: fi=0; break;
1968     case 2: fi=1; break;
1969     case 4: fi=2; break;
1970     case 8: fi=3; break;
1971     default: abort();
1972     }
1973     isebp=(baser==5)?0x40:0;
1974    
1975     emit_byte(0x88);
1976     emit_byte(0x04+8*s+isebp);
1977     emit_byte(baser+8*index+0x40*fi);
1978     if (isebp)
1979     emit_byte(0x00);
1980     }
1981     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1982    
1983     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1984     {
1985     int fi;
1986    
1987     switch(factor) {
1988     case 1: fi=0; break;
1989     case 2: fi=1; break;
1990     case 4: fi=2; break;
1991     case 8: fi=3; break;
1992     default: abort();
1993     }
1994    
1995     emit_byte(0x89);
1996     emit_byte(0x84+8*s);
1997     emit_byte(baser+8*index+0x40*fi);
1998     emit_long(base);
1999     }
2000     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2001    
2002     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2003     {
2004     int fi;
2005    
2006     switch(factor) {
2007     case 1: fi=0; break;
2008     case 2: fi=1; break;
2009     case 4: fi=2; break;
2010     case 8: fi=3; break;
2011     default: abort();
2012     }
2013    
2014     emit_byte(0x66);
2015     emit_byte(0x89);
2016     emit_byte(0x84+8*s);
2017     emit_byte(baser+8*index+0x40*fi);
2018     emit_long(base);
2019     }
2020     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2021    
2022     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2023     {
2024     int fi;
2025    
2026     switch(factor) {
2027     case 1: fi=0; break;
2028     case 2: fi=1; break;
2029     case 4: fi=2; break;
2030     case 8: fi=3; break;
2031     default: abort();
2032     }
2033    
2034     emit_byte(0x88);
2035     emit_byte(0x84+8*s);
2036     emit_byte(baser+8*index+0x40*fi);
2037     emit_long(base);
2038     }
2039     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2040    
2041     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2042     {
2043     int fi;
2044    
2045     switch(factor) {
2046     case 1: fi=0; break;
2047     case 2: fi=1; break;
2048     case 4: fi=2; break;
2049     case 8: fi=3; break;
2050     default: abort();
2051     }
2052    
2053     emit_byte(0x8b);
2054     emit_byte(0x84+8*d);
2055     emit_byte(baser+8*index+0x40*fi);
2056     emit_long(base);
2057     }
2058     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2059    
2060     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2061     {
2062     int fi;
2063    
2064     switch(factor) {
2065     case 1: fi=0; break;
2066     case 2: fi=1; break;
2067     case 4: fi=2; break;
2068     case 8: fi=3; break;
2069     default: abort();
2070     }
2071    
2072     emit_byte(0x66);
2073     emit_byte(0x8b);
2074     emit_byte(0x84+8*d);
2075     emit_byte(baser+8*index+0x40*fi);
2076     emit_long(base);
2077     }
2078     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2079    
2080     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2081     {
2082     int fi;
2083    
2084     switch(factor) {
2085     case 1: fi=0; break;
2086     case 2: fi=1; break;
2087     case 4: fi=2; break;
2088     case 8: fi=3; break;
2089     default: abort();
2090     }
2091    
2092     emit_byte(0x8a);
2093     emit_byte(0x84+8*d);
2094     emit_byte(baser+8*index+0x40*fi);
2095     emit_long(base);
2096     }
2097     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2098    
2099     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2100     {
2101     int fi;
2102     switch(factor) {
2103     case 1: fi=0; break;
2104     case 2: fi=1; break;
2105     case 4: fi=2; break;
2106     case 8: fi=3; break;
2107     default:
2108     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2109     abort();
2110     }
2111     emit_byte(0x8b);
2112     emit_byte(0x04+8*d);
2113     emit_byte(0x05+8*index+64*fi);
2114     emit_long(base);
2115     }
2116     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2117    
2118     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2119     {
2120     int fi;
2121     switch(factor) {
2122     case 1: fi=0; break;
2123     case 2: fi=1; break;
2124     case 4: fi=2; break;
2125     case 8: fi=3; break;
2126     default:
2127     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2128     abort();
2129     }
2130     if (have_cmov) {
2131     emit_byte(0x0f);
2132     emit_byte(0x40+cond);
2133     emit_byte(0x04+8*d);
2134     emit_byte(0x05+8*index+64*fi);
2135     emit_long(base);
2136     }
2137     else { /* replacement using branch and mov */
2138     int uncc=(cond^1);
2139     emit_byte(0x70+uncc);
2140     emit_byte(7); /* skip next 7 bytes if not cc=true */
2141     emit_byte(0x8b);
2142     emit_byte(0x04+8*d);
2143     emit_byte(0x05+8*index+64*fi);
2144     emit_long(base);
2145     }
2146     }
2147     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2148    
2149     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2150     {
2151     if (have_cmov) {
2152     emit_byte(0x0f);
2153     emit_byte(0x40+cond);
2154     emit_byte(0x05+8*d);
2155     emit_long(mem);
2156     }
2157     else { /* replacement using branch and mov */
2158     int uncc=(cond^1);
2159     emit_byte(0x70+uncc);
2160     emit_byte(6); /* skip next 6 bytes if not cc=true */
2161     emit_byte(0x8b);
2162     emit_byte(0x05+8*d);
2163     emit_long(mem);
2164     }
2165     }
2166     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2167    
2168     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2169     {
2170 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2171 gbeauche 1.1 emit_byte(0x8b);
2172     emit_byte(0x40+8*d+s);
2173     emit_byte(offset);
2174     }
2175     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2176    
2177     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2178     {
2179 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2180 gbeauche 1.1 emit_byte(0x66);
2181     emit_byte(0x8b);
2182     emit_byte(0x40+8*d+s);
2183     emit_byte(offset);
2184     }
2185     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2186    
2187     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2188     {
2189 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2190 gbeauche 1.1 emit_byte(0x8a);
2191     emit_byte(0x40+8*d+s);
2192     emit_byte(offset);
2193     }
2194     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2195    
2196     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2197     {
2198     emit_byte(0x8b);
2199     emit_byte(0x80+8*d+s);
2200     emit_long(offset);
2201     }
2202     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2203    
2204     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2205     {
2206     emit_byte(0x66);
2207     emit_byte(0x8b);
2208     emit_byte(0x80+8*d+s);
2209     emit_long(offset);
2210     }
2211     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2212    
2213     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2214     {
2215     emit_byte(0x8a);
2216     emit_byte(0x80+8*d+s);
2217     emit_long(offset);
2218     }
2219     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2220    
2221     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2222     {
2223 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2224 gbeauche 1.1 emit_byte(0xc7);
2225     emit_byte(0x40+d);
2226     emit_byte(offset);
2227     emit_long(i);
2228     }
2229     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2230    
2231     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2232     {
2233 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2234 gbeauche 1.1 emit_byte(0x66);
2235     emit_byte(0xc7);
2236     emit_byte(0x40+d);
2237     emit_byte(offset);
2238     emit_word(i);
2239     }
2240     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2241    
2242     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2243     {
2244 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2245 gbeauche 1.1 emit_byte(0xc6);
2246     emit_byte(0x40+d);
2247     emit_byte(offset);
2248     emit_byte(i);
2249     }
2250     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2251    
2252     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2253     {
2254 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2255 gbeauche 1.1 emit_byte(0x89);
2256     emit_byte(0x40+8*s+d);
2257     emit_byte(offset);
2258     }
2259     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2260    
2261     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2262     {
2263 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2264 gbeauche 1.1 emit_byte(0x66);
2265     emit_byte(0x89);
2266     emit_byte(0x40+8*s+d);
2267     emit_byte(offset);
2268     }
2269     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2270    
2271     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2272     {
2273 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2274 gbeauche 1.1 emit_byte(0x88);
2275     emit_byte(0x40+8*s+d);
2276     emit_byte(offset);
2277     }
2278     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2279    
2280     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2281     {
2282     if (optimize_imm8 && isbyte(offset)) {
2283     emit_byte(0x8d);
2284     emit_byte(0x40+8*d+s);
2285     emit_byte(offset);
2286     }
2287     else {
2288     emit_byte(0x8d);
2289     emit_byte(0x80+8*d+s);
2290     emit_long(offset);
2291     }
2292     }
2293     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2294    
2295     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2296     {
2297     int fi;
2298    
2299     switch(factor) {
2300     case 1: fi=0; break;
2301     case 2: fi=1; break;
2302     case 4: fi=2; break;
2303     case 8: fi=3; break;
2304     default: abort();
2305     }
2306    
2307     if (optimize_imm8 && isbyte(offset)) {
2308     emit_byte(0x8d);
2309     emit_byte(0x44+8*d);
2310     emit_byte(0x40*fi+8*index+s);
2311     emit_byte(offset);
2312     }
2313     else {
2314     emit_byte(0x8d);
2315     emit_byte(0x84+8*d);
2316     emit_byte(0x40*fi+8*index+s);
2317     emit_long(offset);
2318     }
2319     }
2320     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2321    
2322     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2323     {
2324     int isebp=(s==5)?0x40:0;
2325     int fi;
2326    
2327     switch(factor) {
2328     case 1: fi=0; break;
2329     case 2: fi=1; break;
2330     case 4: fi=2; break;
2331     case 8: fi=3; break;
2332     default: abort();
2333     }
2334    
2335     emit_byte(0x8d);
2336     emit_byte(0x04+8*d+isebp);
2337     emit_byte(0x40*fi+8*index+s);
2338     if (isebp)
2339     emit_byte(0);
2340     }
2341     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2342    
2343     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2344     {
2345     if (optimize_imm8 && isbyte(offset)) {
2346     emit_byte(0x89);
2347     emit_byte(0x40+8*s+d);
2348     emit_byte(offset);
2349     }
2350     else {
2351     emit_byte(0x89);
2352     emit_byte(0x80+8*s+d);
2353     emit_long(offset);
2354     }
2355     }
2356     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2357    
2358     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2359     {
2360     emit_byte(0x66);
2361     emit_byte(0x89);
2362     emit_byte(0x80+8*s+d);
2363     emit_long(offset);
2364     }
2365     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2366    
2367     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2368     {
2369     if (optimize_imm8 && isbyte(offset)) {
2370     emit_byte(0x88);
2371     emit_byte(0x40+8*s+d);
2372     emit_byte(offset);
2373     }
2374     else {
2375     emit_byte(0x88);
2376     emit_byte(0x80+8*s+d);
2377     emit_long(offset);
2378     }
2379     }
2380     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2381    
2382     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2383     {
2384     emit_byte(0x0f);
2385     emit_byte(0xc8+r);
2386     }
2387     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2388    
2389     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2390     {
2391     emit_byte(0x66);
2392     emit_byte(0xc1);
2393     emit_byte(0xc0+r);
2394     emit_byte(0x08);
2395     }
2396     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2397    
2398     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2399     {
2400     emit_byte(0x89);
2401     emit_byte(0xc0+8*s+d);
2402     }
2403     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2404    
2405     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2406     {
2407     emit_byte(0x89);
2408     emit_byte(0x05+8*s);
2409     emit_long(d);
2410     }
2411     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2412    
2413     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2414     {
2415     emit_byte(0x66);
2416     emit_byte(0x89);
2417     emit_byte(0x05+8*s);
2418     emit_long(d);
2419     }
2420     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2421    
2422     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2423     {
2424     emit_byte(0x66);
2425     emit_byte(0x8b);
2426     emit_byte(0x05+8*d);
2427     emit_long(s);
2428     }
2429     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2430    
2431     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2432     {
2433     emit_byte(0x88);
2434 gbeauche 1.33 emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
2435 gbeauche 1.1 emit_long(d);
2436     }
2437     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2438    
2439     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2440     {
2441     emit_byte(0x8a);
2442     emit_byte(0x05+8*d);
2443     emit_long(s);
2444     }
2445     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2446    
2447     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2448     {
2449     emit_byte(0xb8+d);
2450     emit_long(s);
2451     }
2452     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2453    
2454     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2455     {
2456     emit_byte(0x66);
2457     emit_byte(0xb8+d);
2458     emit_word(s);
2459     }
2460     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2461    
2462     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2463     {
2464     emit_byte(0xb0+d);
2465     emit_byte(s);
2466     }
2467     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2468    
2469     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2470     {
2471     emit_byte(0x81);
2472     emit_byte(0x15);
2473     emit_long(d);
2474     emit_long(s);
2475     }
2476     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2477    
2478     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2479     {
2480     if (optimize_imm8 && isbyte(s)) {
2481     emit_byte(0x83);
2482     emit_byte(0x05);
2483     emit_long(d);
2484     emit_byte(s);
2485     }
2486     else {
2487     emit_byte(0x81);
2488     emit_byte(0x05);
2489     emit_long(d);
2490     emit_long(s);
2491     }
2492     }
2493     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2494    
2495     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2496     {
2497     emit_byte(0x66);
2498     emit_byte(0x81);
2499     emit_byte(0x05);
2500     emit_long(d);
2501     emit_word(s);
2502     }
2503     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2504    
2505     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2506     {
2507     emit_byte(0x80);
2508     emit_byte(0x05);
2509     emit_long(d);
2510     emit_byte(s);
2511     }
2512     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2513    
2514     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2515     {
2516 gbeauche 1.2 if (optimize_accum && isaccum(d))
2517     emit_byte(0xa9);
2518     else {
2519 gbeauche 1.1 emit_byte(0xf7);
2520     emit_byte(0xc0+d);
2521 gbeauche 1.2 }
2522 gbeauche 1.1 emit_long(i);
2523     }
2524     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2525    
2526     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2527     {
2528     emit_byte(0x85);
2529     emit_byte(0xc0+8*s+d);
2530     }
2531     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2532    
2533     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2534     {
2535     emit_byte(0x66);
2536     emit_byte(0x85);
2537     emit_byte(0xc0+8*s+d);
2538     }
2539     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2540    
2541     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2542     {
2543     emit_byte(0x84);
2544     emit_byte(0xc0+8*s+d);
2545     }
2546     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2547    
2548 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2549     {
2550     emit_byte(0x81);
2551     emit_byte(0xf0+d);
2552     emit_long(i);
2553     }
2554     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2555    
2556 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2557     {
2558     if (optimize_imm8 && isbyte(i)) {
2559 gbeauche 1.2 emit_byte(0x83);
2560     emit_byte(0xe0+d);
2561     emit_byte(i);
2562 gbeauche 1.1 }
2563     else {
2564 gbeauche 1.2 if (optimize_accum && isaccum(d))
2565     emit_byte(0x25);
2566     else {
2567     emit_byte(0x81);
2568     emit_byte(0xe0+d);
2569     }
2570     emit_long(i);
2571 gbeauche 1.1 }
2572     }
2573     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2574    
2575     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2576     {
2577 gbeauche 1.2 emit_byte(0x66);
2578     if (optimize_imm8 && isbyte(i)) {
2579     emit_byte(0x83);
2580     emit_byte(0xe0+d);
2581     emit_byte(i);
2582     }
2583     else {
2584     if (optimize_accum && isaccum(d))
2585     emit_byte(0x25);
2586     else {
2587     emit_byte(0x81);
2588     emit_byte(0xe0+d);
2589     }
2590     emit_word(i);
2591     }
2592 gbeauche 1.1 }
2593     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2594    
2595     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2596     {
2597     emit_byte(0x21);
2598     emit_byte(0xc0+8*s+d);
2599     }
2600     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2601    
2602     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2603     {
2604     emit_byte(0x66);
2605     emit_byte(0x21);
2606     emit_byte(0xc0+8*s+d);
2607     }
2608     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2609    
2610     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2611     {
2612     emit_byte(0x20);
2613     emit_byte(0xc0+8*s+d);
2614     }
2615     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2616    
2617     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2618     {
2619     if (optimize_imm8 && isbyte(i)) {
2620     emit_byte(0x83);
2621     emit_byte(0xc8+d);
2622     emit_byte(i);
2623     }
2624     else {
2625 gbeauche 1.2 if (optimize_accum && isaccum(d))
2626     emit_byte(0x0d);
2627     else {
2628 gbeauche 1.1 emit_byte(0x81);
2629     emit_byte(0xc8+d);
2630 gbeauche 1.2 }
2631 gbeauche 1.1 emit_long(i);
2632     }
2633     }
2634     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2635    
2636     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2637     {
2638     emit_byte(0x09);
2639     emit_byte(0xc0+8*s+d);
2640     }
2641     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2642    
2643     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2644     {
2645     emit_byte(0x66);
2646     emit_byte(0x09);
2647     emit_byte(0xc0+8*s+d);
2648     }
2649     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2650    
2651     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2652     {
2653     emit_byte(0x08);
2654     emit_byte(0xc0+8*s+d);
2655     }
2656     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2657    
2658     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2659     {
2660     emit_byte(0x11);
2661     emit_byte(0xc0+8*s+d);
2662     }
2663     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2664    
2665     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2666     {
2667     emit_byte(0x66);
2668     emit_byte(0x11);
2669     emit_byte(0xc0+8*s+d);
2670     }
2671     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2672    
2673     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2674     {
2675     emit_byte(0x10);
2676     emit_byte(0xc0+8*s+d);
2677     }
2678     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2679    
2680     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2681     {
2682     emit_byte(0x01);
2683     emit_byte(0xc0+8*s+d);
2684     }
2685     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2686    
2687     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2688     {
2689     emit_byte(0x66);
2690     emit_byte(0x01);
2691     emit_byte(0xc0+8*s+d);
2692     }
2693     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2694    
2695     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2696     {
2697     emit_byte(0x00);
2698     emit_byte(0xc0+8*s+d);
2699     }
2700     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2701    
2702     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2703     {
2704     if (isbyte(i)) {
2705     emit_byte(0x83);
2706     emit_byte(0xe8+d);
2707     emit_byte(i);
2708     }
2709     else {
2710 gbeauche 1.2 if (optimize_accum && isaccum(d))
2711     emit_byte(0x2d);
2712     else {
2713 gbeauche 1.1 emit_byte(0x81);
2714     emit_byte(0xe8+d);
2715 gbeauche 1.2 }
2716 gbeauche 1.1 emit_long(i);
2717     }
2718     }
2719     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2720    
2721     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2722     {
2723 gbeauche 1.2 if (optimize_accum && isaccum(d))
2724     emit_byte(0x2c);
2725     else {
2726 gbeauche 1.1 emit_byte(0x80);
2727     emit_byte(0xe8+d);
2728 gbeauche 1.2 }
2729 gbeauche 1.1 emit_byte(i);
2730     }
2731     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2732    
2733     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2734     {
2735     if (isbyte(i)) {
2736     emit_byte(0x83);
2737     emit_byte(0xc0+d);
2738     emit_byte(i);
2739     }
2740     else {
2741 gbeauche 1.2 if (optimize_accum && isaccum(d))
2742     emit_byte(0x05);
2743     else {
2744 gbeauche 1.1 emit_byte(0x81);
2745     emit_byte(0xc0+d);
2746 gbeauche 1.2 }
2747 gbeauche 1.1 emit_long(i);
2748     }
2749     }
2750     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2751    
2752     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2753     {
2754 gbeauche 1.2 emit_byte(0x66);
2755 gbeauche 1.1 if (isbyte(i)) {
2756     emit_byte(0x83);
2757     emit_byte(0xc0+d);
2758     emit_byte(i);
2759     }
2760     else {
2761 gbeauche 1.2 if (optimize_accum && isaccum(d))
2762     emit_byte(0x05);
2763     else {
2764 gbeauche 1.1 emit_byte(0x81);
2765     emit_byte(0xc0+d);
2766 gbeauche 1.2 }
2767 gbeauche 1.1 emit_word(i);
2768     }
2769     }
2770     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2771    
2772     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2773     {
2774 gbeauche 1.2 if (optimize_accum && isaccum(d))
2775     emit_byte(0x04);
2776     else {
2777     emit_byte(0x80);
2778     emit_byte(0xc0+d);
2779     }
2780 gbeauche 1.1 emit_byte(i);
2781     }
2782     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2783    
2784     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2785     {
2786     emit_byte(0x19);
2787     emit_byte(0xc0+8*s+d);
2788     }
2789     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2790    
2791     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2792     {
2793     emit_byte(0x66);
2794     emit_byte(0x19);
2795     emit_byte(0xc0+8*s+d);
2796     }
2797     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2798    
2799     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2800     {
2801     emit_byte(0x18);
2802     emit_byte(0xc0+8*s+d);
2803     }
2804     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2805    
2806     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2807     {
2808     emit_byte(0x29);
2809     emit_byte(0xc0+8*s+d);
2810     }
2811     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2812    
2813     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2814     {
2815     emit_byte(0x66);
2816     emit_byte(0x29);
2817     emit_byte(0xc0+8*s+d);
2818     }
2819     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2820    
2821     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2822     {
2823     emit_byte(0x28);
2824     emit_byte(0xc0+8*s+d);
2825     }
2826     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2827    
2828     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2829     {
2830     emit_byte(0x39);
2831     emit_byte(0xc0+8*s+d);
2832     }
2833     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2834    
2835     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2836     {
2837     if (optimize_imm8 && isbyte(i)) {
2838     emit_byte(0x83);
2839     emit_byte(0xf8+r);
2840     emit_byte(i);
2841     }
2842     else {
2843 gbeauche 1.2 if (optimize_accum && isaccum(r))
2844     emit_byte(0x3d);
2845     else {
2846 gbeauche 1.1 emit_byte(0x81);
2847     emit_byte(0xf8+r);
2848 gbeauche 1.2 }
2849 gbeauche 1.1 emit_long(i);
2850     }
2851     }
2852     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2853    
2854     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2855     {
2856     emit_byte(0x66);
2857     emit_byte(0x39);
2858     emit_byte(0xc0+8*s+d);
2859     }
2860     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2861    
2862 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2863     {
2864     emit_byte(0x80);
2865     emit_byte(0x3d);
2866     emit_long(d);
2867     emit_byte(s);
2868     }
2869     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2870    
2871 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2872     {
2873 gbeauche 1.2 if (optimize_accum && isaccum(d))
2874     emit_byte(0x3c);
2875     else {
2876 gbeauche 1.1 emit_byte(0x80);
2877     emit_byte(0xf8+d);
2878 gbeauche 1.2 }
2879 gbeauche 1.1 emit_byte(i);
2880     }
2881     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2882    
2883     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2884     {
2885     emit_byte(0x38);
2886     emit_byte(0xc0+8*s+d);
2887     }
2888     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2889    
2890     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2891     {
2892     int fi;
2893    
2894     switch(factor) {
2895     case 1: fi=0; break;
2896     case 2: fi=1; break;
2897     case 4: fi=2; break;
2898     case 8: fi=3; break;
2899     default: abort();
2900     }
2901     emit_byte(0x39);
2902     emit_byte(0x04+8*d);
2903     emit_byte(5+8*index+0x40*fi);
2904     emit_long(offset);
2905     }
2906     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2907    
2908     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2909     {
2910     emit_byte(0x31);
2911     emit_byte(0xc0+8*s+d);
2912     }
2913     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2914    
2915     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2916     {
2917     emit_byte(0x66);
2918     emit_byte(0x31);
2919     emit_byte(0xc0+8*s+d);
2920     }
2921     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2922    
2923     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2924     {
2925     emit_byte(0x30);
2926     emit_byte(0xc0+8*s+d);
2927     }
2928     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2929    
2930     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2931     {
2932     if (optimize_imm8 && isbyte(s)) {
2933     emit_byte(0x83);
2934     emit_byte(0x2d);
2935     emit_long(d);
2936     emit_byte(s);
2937     }
2938     else {
2939     emit_byte(0x81);
2940     emit_byte(0x2d);
2941     emit_long(d);
2942     emit_long(s);
2943     }
2944     }
2945     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2946    
2947     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2948     {
2949     if (optimize_imm8 && isbyte(s)) {
2950     emit_byte(0x83);
2951     emit_byte(0x3d);
2952     emit_long(d);
2953     emit_byte(s);
2954     }
2955     else {
2956     emit_byte(0x81);
2957     emit_byte(0x3d);
2958     emit_long(d);
2959     emit_long(s);
2960     }
2961     }
2962     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2963    
2964     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2965     {
2966     emit_byte(0x87);
2967     emit_byte(0xc0+8*r1+r2);
2968     }
2969     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2970    
2971     /*************************************************************************
2972     * FIXME: mem access modes probably wrong *
2973     *************************************************************************/
2974    
2975     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2976     {
2977     emit_byte(0x9c);
2978     }
2979     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2980    
2981     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2982     {
2983     emit_byte(0x9d);
2984     }
2985     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2986 gbeauche 1.13
2987 gbeauche 1.34 /* Generate floating-point instructions */
2988     static inline void x86_fadd_m(MEMR s)
2989     {
2990     emit_byte(0xdc);
2991     emit_byte(0x05);
2992     emit_long(s);
2993     }
2994    
2995 gbeauche 1.13 #endif
2996 gbeauche 1.1
2997     /*************************************************************************
2998     * Unoptimizable stuff --- jump *
2999     *************************************************************************/
3000    
3001     static __inline__ void raw_call_r(R4 r)
3002     {
3003 gbeauche 1.20 #if USE_NEW_RTASM
3004     CALLsr(r);
3005     #else
3006 gbeauche 1.1 emit_byte(0xff);
3007     emit_byte(0xd0+r);
3008 gbeauche 1.20 #endif
3009 gbeauche 1.5 }
3010    
3011     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3012     {
3013 gbeauche 1.20 #if USE_NEW_RTASM
3014     CALLsm(base, X86_NOREG, r, m);
3015     #else
3016 gbeauche 1.5 int mu;
3017     switch(m) {
3018     case 1: mu=0; break;
3019     case 2: mu=1; break;
3020     case 4: mu=2; break;
3021     case 8: mu=3; break;
3022     default: abort();
3023     }
3024     emit_byte(0xff);
3025     emit_byte(0x14);
3026     emit_byte(0x05+8*r+0x40*mu);
3027     emit_long(base);
3028 gbeauche 1.20 #endif
3029 gbeauche 1.1 }
3030    
3031     static __inline__ void raw_jmp_r(R4 r)
3032     {
3033 gbeauche 1.20 #if USE_NEW_RTASM
3034     JMPsr(r);
3035     #else
3036 gbeauche 1.1 emit_byte(0xff);
3037     emit_byte(0xe0+r);
3038 gbeauche 1.20 #endif
3039 gbeauche 1.1 }
3040    
3041     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3042     {
3043 gbeauche 1.20 #if USE_NEW_RTASM
3044     JMPsm(base, X86_NOREG, r, m);
3045     #else
3046 gbeauche 1.1 int mu;
3047     switch(m) {
3048     case 1: mu=0; break;
3049     case 2: mu=1; break;
3050     case 4: mu=2; break;
3051     case 8: mu=3; break;
3052     default: abort();
3053     }
3054     emit_byte(0xff);
3055     emit_byte(0x24);
3056     emit_byte(0x05+8*r+0x40*mu);
3057     emit_long(base);
3058 gbeauche 1.20 #endif
3059 gbeauche 1.1 }
3060    
3061     static __inline__ void raw_jmp_m(uae_u32 base)
3062     {
3063     emit_byte(0xff);
3064     emit_byte(0x25);
3065     emit_long(base);
3066     }
3067    
3068    
3069     static __inline__ void raw_call(uae_u32 t)
3070     {
3071 gbeauche 1.20 #if USE_NEW_RTASM
3072     CALLm(t);
3073     #else
3074 gbeauche 1.1 emit_byte(0xe8);
3075     emit_long(t-(uae_u32)target-4);
3076 gbeauche 1.20 #endif
3077 gbeauche 1.1 }
3078    
3079     static __inline__ void raw_jmp(uae_u32 t)
3080     {
3081 gbeauche 1.20 #if USE_NEW_RTASM
3082     JMPm(t);
3083     #else
3084 gbeauche 1.1 emit_byte(0xe9);
3085     emit_long(t-(uae_u32)target-4);
3086 gbeauche 1.20 #endif
3087 gbeauche 1.1 }
3088    
3089     static __inline__ void raw_jl(uae_u32 t)
3090     {
3091     emit_byte(0x0f);
3092     emit_byte(0x8c);
3093 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3094 gbeauche 1.1 }
3095    
3096     static __inline__ void raw_jz(uae_u32 t)
3097     {
3098     emit_byte(0x0f);
3099     emit_byte(0x84);
3100 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3101 gbeauche 1.1 }
3102    
3103     static __inline__ void raw_jnz(uae_u32 t)
3104     {
3105     emit_byte(0x0f);
3106     emit_byte(0x85);
3107 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3108 gbeauche 1.1 }
3109    
3110     static __inline__ void raw_jnz_l_oponly(void)
3111     {
3112     emit_byte(0x0f);
3113     emit_byte(0x85);
3114     }
3115    
3116     static __inline__ void raw_jcc_l_oponly(int cc)
3117     {
3118     emit_byte(0x0f);
3119     emit_byte(0x80+cc);
3120     }
3121    
3122     static __inline__ void raw_jnz_b_oponly(void)
3123     {
3124     emit_byte(0x75);
3125     }
3126    
3127     static __inline__ void raw_jz_b_oponly(void)
3128     {
3129     emit_byte(0x74);
3130     }
3131    
3132     static __inline__ void raw_jcc_b_oponly(int cc)
3133     {
3134     emit_byte(0x70+cc);
3135     }
3136    
3137     static __inline__ void raw_jmp_l_oponly(void)
3138     {
3139     emit_byte(0xe9);
3140     }
3141    
3142     static __inline__ void raw_jmp_b_oponly(void)
3143     {
3144     emit_byte(0xeb);
3145     }
3146    
3147     static __inline__ void raw_ret(void)
3148     {
3149     emit_byte(0xc3);
3150     }
3151    
3152     static __inline__ void raw_nop(void)
3153     {
3154     emit_byte(0x90);
3155     }
3156    
3157 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3158     {
3159     /* Source: GNU Binutils 2.12.90.0.15 */
3160     /* Various efficient no-op patterns for aligning code labels.
3161     Note: Don't try to assemble the instructions in the comments.
3162     0L and 0w are not legal. */
3163     static const uae_u8 f32_1[] =
3164     {0x90}; /* nop */
3165     static const uae_u8 f32_2[] =
3166     {0x89,0xf6}; /* movl %esi,%esi */
3167     static const uae_u8 f32_3[] =
3168     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3169     static const uae_u8 f32_4[] =
3170     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3171     static const uae_u8 f32_5[] =
3172     {0x90, /* nop */
3173     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3174     static const uae_u8 f32_6[] =
3175     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3176     static const uae_u8 f32_7[] =
3177     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3178     static const uae_u8 f32_8[] =
3179     {0x90, /* nop */
3180     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3181     static const uae_u8 f32_9[] =
3182     {0x89,0xf6, /* movl %esi,%esi */
3183     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3184     static const uae_u8 f32_10[] =
3185     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3186     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3187     static const uae_u8 f32_11[] =
3188     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3189     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3190     static const uae_u8 f32_12[] =
3191     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3192     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3193     static const uae_u8 f32_13[] =
3194     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3195     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3196     static const uae_u8 f32_14[] =
3197     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3198     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3199     static const uae_u8 f32_15[] =
3200     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3201     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3202     static const uae_u8 f32_16[] =
3203     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3204     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3205     static const uae_u8 *const f32_patt[] = {
3206     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3207     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3208     };
3209 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3210 gbeauche 1.8
3211 gbeauche 1.21 #if defined(__x86_64__)
3212     /* The recommended way to pad 64bit code is to use NOPs preceded by
3213     maximally four 0x66 prefixes. Balance the size of nops. */
3214     if (nbytes == 0)
3215     return;
3216    
3217     int i;
3218     int nnops = (nbytes + 3) / 4;
3219     int len = nbytes / nnops;
3220     int remains = nbytes - nnops * len;
3221    
3222     for (i = 0; i < remains; i++) {
3223     emit_block(prefixes, len);
3224     raw_nop();
3225     }
3226     for (; i < nnops; i++) {
3227     emit_block(prefixes, len - 1);
3228     raw_nop();
3229     }
3230     #else
3231 gbeauche 1.8 int nloops = nbytes / 16;
3232     while (nloops-- > 0)
3233     emit_block(f32_16, sizeof(f32_16));
3234    
3235     nbytes %= 16;
3236     if (nbytes)
3237     emit_block(f32_patt[nbytes - 1], nbytes);
3238 gbeauche 1.21 #endif
3239 gbeauche 1.8 }
3240    
3241 gbeauche 1.1
3242     /*************************************************************************
3243     * Flag handling, to and fro UAE flag register *
3244     *************************************************************************/
3245    
3246     #ifdef SAHF_SETO_PROFITABLE
3247    
3248     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3249     static __inline__ void raw_flags_to_reg(int r)
3250     {
3251     raw_lahf(0); /* Most flags in AH */
3252     //raw_setcc(r,0); /* V flag in AL */
3253 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3254 gbeauche 1.1
3255     #if 1 /* Let's avoid those nasty partial register stalls */
3256 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3257 gbeauche 1.33 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
3258 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3259     live.state[FLAGTMP].status=INMEM;
3260     live.state[FLAGTMP].realreg=-1;
3261     /* We just "evicted" FLAGTMP. */
3262     if (live.nat[r].nholds!=1) {
3263     /* Huh? */
3264     abort();
3265     }
3266     live.nat[r].nholds=0;
3267     #endif
3268     }
3269    
3270     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3271     static __inline__ void raw_reg_to_flags(int r)
3272     {
3273     raw_cmp_b_ri(r,-127); /* set V */
3274     raw_sahf(0);
3275     }
3276    
3277 gbeauche 1.24 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3278     static __inline__ void raw_flags_set_zero(int s, int tmp)
3279     {
3280     raw_mov_l_rr(tmp,s);
3281     raw_lahf(s); /* flags into ah */
3282     raw_and_l_ri(s,0xffffbfff);
3283     raw_and_l_ri(tmp,0x00004000);
3284     raw_xor_l_ri(tmp,0x00004000);
3285     raw_or_l(s,tmp);
3286     raw_sahf(s);
3287     }
3288    
3289 gbeauche 1.1 #else
3290    
3291     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3292     static __inline__ void raw_flags_to_reg(int r)
3293     {
3294     raw_pushfl();
3295     raw_pop_l_r(r);
3296 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3297 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3298     live.state[FLAGTMP].status=INMEM;
3299     live.state[FLAGTMP].realreg=-1;
3300     /* We just "evicted" FLAGTMP. */
3301     if (live.nat[r].nholds!=1) {
3302     /* Huh? */
3303     abort();
3304     }
3305     live.nat[r].nholds=0;
3306     }
3307    
3308     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3309     static __inline__ void raw_reg_to_flags(int r)
3310     {
3311     raw_push_l_r(r);
3312     raw_popfl();
3313     }
3314    
3315 gbeauche 1.24 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3316     static __inline__ void raw_flags_set_zero(int s, int tmp)
3317     {
3318     raw_mov_l_rr(tmp,s);
3319     raw_pushfl();
3320     raw_pop_l_r(s);
3321     raw_and_l_ri(s,0xffffffbf);
3322     raw_and_l_ri(tmp,0x00000040);
3323     raw_xor_l_ri(tmp,0x00000040);
3324     raw_or_l(s,tmp);
3325     raw_push_l_r(s);
3326     raw_popfl();
3327     }
3328 gbeauche 1.1 #endif
3329    
3330     /* Apparently, there are enough instructions between flag store and
3331     flag reload to avoid the partial memory stall */
3332     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3333     {
3334     #if 1
3335 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3336 gbeauche 1.1 #else
3337 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3338     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3339 gbeauche 1.1 #endif
3340     }
3341    
3342     /* FLAGX is byte sized, and we *do* write it at that size */
3343     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3344     {
3345     if (live.nat[target].canbyte)
3346 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3347 gbeauche 1.1 else if (live.nat[target].canword)
3348 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3349 gbeauche 1.1 else
3350 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3351 gbeauche 1.1 }
3352    
3353 gbeauche 1.31 static __inline__ void raw_dec_sp(int off)
3354     {
3355     if (off) raw_sub_l_ri(ESP_INDEX,off);
3356     }
3357    
3358 gbeauche 1.1 static __inline__ void raw_inc_sp(int off)
3359     {
3360 gbeauche 1.31 if (off) raw_add_l_ri(ESP_INDEX,off);
3361 gbeauche 1.1 }
3362    
3363     /*************************************************************************
3364     * Handling mistaken direct memory access *
3365     *************************************************************************/
3366    
3367     // gb-- I don't need that part for JIT Basilisk II
3368     #if defined(NATMEM_OFFSET) && 0
3369     #include <asm/sigcontext.h>
3370     #include <signal.h>
3371    
3372     #define SIG_READ 1
3373     #define SIG_WRITE 2
3374    
3375     static int in_handler=0;
3376     static uae_u8 veccode[256];
3377    
3378     static void vec(int x, struct sigcontext sc)
3379     {
3380     uae_u8* i=(uae_u8*)sc.eip;
3381     uae_u32 addr=sc.cr2;
3382     int r=-1;
3383     int size=4;
3384     int dir=-1;
3385     int len=0;
3386     int j;
3387    
3388     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3389     if (!canbang)
3390     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3391     if (in_handler)
3392     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3393    
3394     if (canbang && i>=compiled_code && i<=current_compile_p) {
3395     if (*i==0x66) {
3396     i++;
3397     size=2;
3398     len++;
3399     }
3400    
3401     switch(i[0]) {
3402     case 0x8a:
3403     if ((i[1]&0xc0)==0x80) {
3404     r=(i[1]>>3)&7;
3405     dir=SIG_READ;
3406     size=1;
3407     len+=6;
3408     break;
3409     }
3410     break;
3411     case 0x88:
3412     if ((i[1]&0xc0)==0x80) {
3413     r=(i[1]>>3)&7;
3414     dir=SIG_WRITE;
3415     size=1;
3416     len+=6;
3417     break;
3418     }
3419     break;
3420     case 0x8b:
3421     if ((i[1]&0xc0)==0x80) {
3422     r=(i[1]>>3)&7;
3423     dir=SIG_READ;
3424     len+=6;
3425     break;
3426     }
3427     if ((i[1]&0xc0)==0x40) {
3428     r=(i[1]>>3)&7;
3429     dir=SIG_READ;
3430     len+=3;
3431     break;
3432     }
3433     break;
3434     case 0x89:
3435     if ((i[1]&0xc0)==0x80) {
3436     r=(i[1]>>3)&7;
3437     dir=SIG_WRITE;
3438     len+=6;
3439     break;
3440     }
3441     if ((i[1]&0xc0)==0x40) {
3442     r=(i[1]>>3)&7;
3443     dir=SIG_WRITE;
3444     len+=3;
3445     break;
3446     }
3447     break;
3448     }
3449     }
3450    
3451     if (r!=-1) {
3452     void* pr=NULL;
3453     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3454    
3455     switch(r) {
3456     case 0: pr=&(sc.eax); break;
3457     case 1: pr=&(sc.ecx); break;
3458     case 2: pr=&(sc.edx); break;
3459     case 3: pr=&(sc.ebx); break;
3460     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3461     case 5: pr=(size>1)?
3462     (void*)(&(sc.ebp)):
3463     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3464     case 6: pr=(size>1)?
3465     (void*)(&(sc.esi)):
3466     (void*)(((uae_u8*)&(sc.edx))+1); break;
3467     case 7: pr=(size>1)?
3468     (void*)(&(sc.edi)):
3469     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3470     default: abort();
3471     }
3472     if (pr) {
3473     blockinfo* bi;
3474    
3475     if (currprefs.comp_oldsegv) {
3476     addr-=NATMEM_OFFSET;
3477    
3478     if ((addr>=0x10000000 && addr<0x40000000) ||
3479     (addr>=0x50000000)) {
3480     write_log("Suspicious address in %x SEGV handler.\n",addr);
3481     }
3482     if (dir==SIG_READ) {
3483     switch(size) {
3484     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3485     case 2: *((uae_u16*)pr)=get_word(addr); break;
3486     case 4: *((uae_u32*)pr)=get_long(addr); break;
3487     default: abort();
3488     }
3489     }
3490     else { /* write */
3491     switch(size) {
3492     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3493     case 2: put_word(addr,*((uae_u16*)pr)); break;
3494     case 4: put_long(addr,*((uae_u32*)pr)); break;
3495     default: abort();
3496     }
3497     }
3498     write_log("Handled one access!\n");
3499     fflush(stdout);
3500     segvcount++;
3501     sc.eip+=len;
3502     }
3503     else {
3504     void* tmp=target;
3505     int i;
3506     uae_u8 vecbuf[5];
3507    
3508     addr-=NATMEM_OFFSET;
3509    
3510     if ((addr>=0x10000000 && addr<0x40000000) ||
3511     (addr>=0x50000000)) {
3512     write_log("Suspicious address in %x SEGV handler.\n",addr);
3513     }
3514    
3515     target=(uae_u8*)sc.eip;
3516     for (i=0;i<5;i++)
3517     vecbuf[i]=target[i];
3518     emit_byte(0xe9);
3519 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3520 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3521    
3522     write_log("Handled one access!\n");
3523     fflush(stdout);
3524     segvcount++;
3525    
3526     target=veccode;
3527    
3528     if (dir==SIG_READ) {
3529     switch(size) {
3530     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3531     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3532     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3533     default: abort();
3534     }
3535     }
3536     else { /* write */
3537     switch(size) {
3538     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3539     case 2: put_word(addr,*((uae_u16*)pr)); break;
3540     case 4: put_long(addr,*((uae_u32*)pr)); break;
3541     default: abort();
3542     }
3543     }
3544     for (i=0;i<5;i++)
3545     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3546 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3547 gbeauche 1.1 emit_byte(0xe9);
3548 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3549 gbeauche 1.1 in_handler=1;
3550     target=tmp;
3551     }
3552     bi=active;
3553     while (bi) {
3554     if (bi->handler &&
3555     (uae_u8*)bi->direct_handler<=i &&
3556     (uae_u8*)bi->nexthandler>i) {
3557     write_log("deleted trigger (%p<%p<%p) %p\n",
3558     bi->handler,
3559     i,
3560     bi->nexthandler,
3561     bi->pc_p);
3562     invalidate_block(bi);
3563     raise_in_cl_list(bi);
3564     set_special(0);
3565     return;
3566     }
3567     bi=bi->next;
3568     }
3569     /* Not found in the active list. Might be a rom routine that
3570     is in the dormant list */
3571     bi=dormant;
3572     while (bi) {
3573     if (bi->handler &&
3574     (uae_u8*)bi->direct_handler<=i &&
3575     (uae_u8*)bi->nexthandler>i) {
3576     write_log("deleted trigger (%p<%p<%p) %p\n",
3577     bi->handler,
3578     i,
3579     bi->nexthandler,
3580     bi->pc_p);
3581     invalidate_block(bi);
3582     raise_in_cl_list(bi);
3583     set_special(0);
3584     return;
3585     }
3586     bi=bi->next;
3587     }
3588     write_log("Huh? Could not find trigger!\n");
3589     return;
3590     }
3591     }
3592     write_log("Can't handle access!\n");
3593     for (j=0;j<10;j++) {
3594     write_log("instruction byte %2d is %02x\n",j,i[j]);
3595     }
3596     write_log("Please send the above info (starting at \"fault address\") to\n"
3597     "bmeyer@csse.monash.edu.au\n"
3598     "This shouldn't happen ;-)\n");
3599     fflush(stdout);
3600     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3601     }
3602     #endif
3603    
3604    
3605     /*************************************************************************
3606     * Checking for CPU features *
3607     *************************************************************************/
3608    
3609 gbeauche 1.3 struct cpuinfo_x86 {
3610     uae_u8 x86; // CPU family
3611     uae_u8 x86_vendor; // CPU vendor
3612     uae_u8 x86_processor; // CPU canonical processor type
3613     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3614     uae_u32 x86_hwcap;
3615     uae_u8 x86_model;
3616     uae_u8 x86_mask;
3617     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3618     char x86_vendor_id[16];
3619     };
3620     struct cpuinfo_x86 cpuinfo;
3621    
3622     enum {
3623     X86_VENDOR_INTEL = 0,
3624     X86_VENDOR_CYRIX = 1,
3625     X86_VENDOR_AMD = 2,
3626     X86_VENDOR_UMC = 3,
3627     X86_VENDOR_NEXGEN = 4,
3628     X86_VENDOR_CENTAUR = 5,
3629     X86_VENDOR_RISE = 6,
3630     X86_VENDOR_TRANSMETA = 7,
3631     X86_VENDOR_NSC = 8,
3632     X86_VENDOR_UNKNOWN = 0xff
3633     };
3634    
3635     enum {
3636     X86_PROCESSOR_I386, /* 80386 */
3637     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3638     X86_PROCESSOR_PENTIUM,
3639     X86_PROCESSOR_PENTIUMPRO,
3640     X86_PROCESSOR_K6,
3641     X86_PROCESSOR_ATHLON,
3642     X86_PROCESSOR_PENTIUM4,
3643 gbeauche 1.28 X86_PROCESSOR_X86_64,
3644 gbeauche 1.3 X86_PROCESSOR_max
3645     };
3646    
3647     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3648     "80386",
3649     "80486",
3650     "Pentium",
3651     "PentiumPro",
3652     "K6",
3653     "Athlon",
3654 gbeauche 1.16 "Pentium4",
3655 gbeauche 1.28 "x86-64"
3656 gbeauche 1.3 };
3657    
3658     static struct ptt {
3659     const int align_loop;
3660     const int align_loop_max_skip;
3661     const int align_jump;
3662     const int align_jump_max_skip;
3663     const int align_func;
3664     }
3665     x86_alignments[X86_PROCESSOR_max] = {
3666     { 4, 3, 4, 3, 4 },
3667     { 16, 15, 16, 15, 16 },
3668     { 16, 7, 16, 7, 16 },
3669     { 16, 15, 16, 7, 16 },
3670     { 32, 7, 32, 7, 32 },
3671 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3672 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3673     { 16, 7, 16, 7, 16 }
3674 gbeauche 1.3 };
3675 gbeauche 1.1
3676 gbeauche 1.3 static void
3677     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3678 gbeauche 1.1 {
3679 gbeauche 1.3 char *v = c->x86_vendor_id;
3680    
3681     if (!strcmp(v, "GenuineIntel"))
3682     c->x86_vendor = X86_VENDOR_INTEL;
3683     else if (!strcmp(v, "AuthenticAMD"))
3684     c->x86_vendor = X86_VENDOR_AMD;
3685     else if (!strcmp(v, "CyrixInstead"))
3686     c->x86_vendor = X86_VENDOR_CYRIX;
3687     else if (!strcmp(v, "Geode by NSC"))
3688     c->x86_vendor = X86_VENDOR_NSC;
3689     else if (!strcmp(v, "UMC UMC UMC "))
3690     c->x86_vendor = X86_VENDOR_UMC;
3691     else if (!strcmp(v, "CentaurHauls"))
3692     c->x86_vendor = X86_VENDOR_CENTAUR;
3693     else if (!strcmp(v, "NexGenDriven"))
3694     c->x86_vendor = X86_VENDOR_NEXGEN;
3695     else if (!strcmp(v, "RiseRiseRise"))
3696     c->x86_vendor = X86_VENDOR_RISE;
3697     else if (!strcmp(v, "GenuineTMx86") ||
3698     !strcmp(v, "TransmetaCPU"))
3699     c->x86_vendor = X86_VENDOR_TRANSMETA;
3700     else
3701     c->x86_vendor = X86_VENDOR_UNKNOWN;
3702     }
3703 gbeauche 1.1
3704 gbeauche 1.3 static void
3705     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3706     {
3707 gbeauche 1.27 const int CPUID_SPACE = 4096;
3708     uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3709     if (cpuid_space == VM_MAP_FAILED)
3710     abort();
3711     vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3712    
3713 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3714 gbeauche 1.3 uae_u8* tmp=get_target();
3715 gbeauche 1.1
3716 gbeauche 1.20 s_op = op;
3717 gbeauche 1.3 set_target(cpuid_space);
3718     raw_push_l_r(0); /* eax */
3719     raw_push_l_r(1); /* ecx */
3720     raw_push_l_r(2); /* edx */
3721     raw_push_l_r(3); /* ebx */
3722 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3723 gbeauche 1.3 raw_cpuid(0);
3724 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3725     raw_mov_l_mr((uintptr)&s_ebx,3);
3726     raw_mov_l_mr((uintptr)&s_ecx,1);
3727     raw_mov_l_mr((uintptr)&s_edx,2);
3728 gbeauche 1.3 raw_pop_l_r(3);
3729     raw_pop_l_r(2);
3730     raw_pop_l_r(1);
3731     raw_pop_l_r(0);
3732     raw_ret();
3733     set_target(tmp);
3734 gbeauche 1.1
3735 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3736 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3737     if (ebx != NULL) *ebx = s_ebx;
3738     if (ecx != NULL) *ecx = s_ecx;
3739     if (edx != NULL) *edx = s_edx;
3740 gbeauche 1.27
3741     vm_release(cpuid_space, CPUID_SPACE);
3742 gbeauche 1.1 }
3743    
3744 gbeauche 1.3 static void
3745     raw_init_cpu(void)
3746 gbeauche 1.1 {
3747 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3748    
3749     /* Defaults */
3750 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3751 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3752     c->cpuid_level = -1; /* CPUID not detected */
3753     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3754     c->x86_vendor_id[0] = '\0'; /* Unset */
3755     c->x86_hwcap = 0;
3756    
3757     /* Get vendor name */
3758     c->x86_vendor_id[12] = '\0';
3759     cpuid(0x00000000,
3760     (uae_u32 *)&c->cpuid_level,
3761     (uae_u32 *)&c->x86_vendor_id[0],
3762     (uae_u32 *)&c->x86_vendor_id[8],
3763     (uae_u32 *)&c->x86_vendor_id[4]);
3764     x86_get_cpu_vendor(c);
3765    
3766     /* Intel-defined flags: level 0x00000001 */
3767     c->x86_brand_id = 0;
3768     if ( c->cpuid_level >= 0x00000001 ) {
3769     uae_u32 tfms, brand_id;
3770     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3771     c->x86 = (tfms >> 8) & 15;
3772 gbeauche 1.29 if (c->x86 == 0xf)
3773     c->x86 += (tfms >> 20) & 0xff; /* extended family */
3774 gbeauche 1.3 c->x86_model = (tfms >> 4) & 15;
3775 gbeauche 1.29 if (c->x86_model == 0xf)
3776     c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3777 gbeauche 1.3 c->x86_brand_id = brand_id & 0xff;
3778     c->x86_mask = tfms & 15;
3779     } else {
3780     /* Have CPUID level 0 only - unheard of */
3781     c->x86 = 4;
3782     }
3783    
3784 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3785     uae_u32 xlvl;
3786     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3787     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3788     if ( xlvl >= 0x80000001 ) {
3789 gbeauche 1.28 uae_u32 features, extra_features;
3790     cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3791 gbeauche 1.16 if (features & (1 << 29)) {
3792     /* Assume x86-64 if long mode is supported */
3793 gbeauche 1.28 c->x86_processor = X86_PROCESSOR_X86_64;
3794 gbeauche 1.16 }
3795 gbeauche 1.28 if (extra_features & (1 << 0))
3796     have_lahf_lm = true;
3797 gbeauche 1.16 }
3798     }
3799    
3800 gbeauche 1.3 /* Canonicalize processor ID */
3801     switch (c->x86) {
3802     case 3:
3803     c->x86_processor = X86_PROCESSOR_I386;
3804     break;
3805     case 4:
3806     c->x86_processor = X86_PROCESSOR_I486;
3807     break;
3808     case 5:
3809     if (c->x86_vendor == X86_VENDOR_AMD)
3810     c->x86_processor = X86_PROCESSOR_K6;
3811     else
3812     c->x86_processor = X86_PROCESSOR_PENTIUM;
3813     break;
3814     case 6:
3815     if (c->x86_vendor == X86_VENDOR_AMD)
3816     c->x86_processor = X86_PROCESSOR_ATHLON;
3817     else
3818     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3819     break;
3820     case 15:
3821 gbeauche 1.29 if (c->x86_processor == X86_PROCESSOR_max) {
3822     switch (c->x86_vendor) {
3823     case X86_VENDOR_INTEL:
3824     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3825     break;
3826     case X86_VENDOR_AMD:
3827     /* Assume a 32-bit Athlon processor if not in long mode */
3828     c->x86_processor = X86_PROCESSOR_ATHLON;
3829     break;
3830     }
3831     }
3832     break;
3833 gbeauche 1.3 }
3834     if (c->x86_processor == X86_PROCESSOR_max) {
3835 gbeauche 1.30 c->x86_processor = X86_PROCESSOR_I386;
3836     fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3837 gbeauche 1.3 fprintf(stderr, " Family : %d\n", c->x86);
3838     fprintf(stderr, " Model : %d\n", c->x86_model);
3839     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3840 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3841 gbeauche 1.3 if (c->x86_brand_id)
3842     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3843     }
3844    
3845     /* Have CMOV support? */
3846 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3847 gbeauche 1.3
3848     /* Can the host CPU suffer from partial register stalls? */
3849     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3850     #if 1
3851     /* It appears that partial register writes are a bad idea even on
3852 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3853     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3854 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3855     have_rat_stall = true;
3856 gbeauche 1.1 #endif
3857 gbeauche 1.3
3858     /* Alignments */
3859     if (tune_alignment) {
3860     align_loops = x86_alignments[c->x86_processor].align_loop;
3861     align_jumps = x86_alignments[c->x86_processor].align_jump;
3862     }
3863    
3864     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3865     c->cpuid_level, c->x86_vendor_id,
3866     x86_processor_string_table[c->x86_processor]);
3867 gbeauche 1.1 }
3868    
3869 gbeauche 1.10 static bool target_check_bsf(void)
3870     {
3871     bool mismatch = false;
3872     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3873     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3874     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3875     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3876     for (int value = -1; value <= 1; value++) {
3877 gbeauche 1.25 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3878     unsigned long tmp = value;
3879 gbeauche 1.10 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3880 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3881 gbeauche 1.10 int OF = (flags >> 11) & 1;
3882     int SF = (flags >> 7) & 1;
3883     int ZF = (flags >> 6) & 1;
3884     int CF = flags & 1;
3885     tmp = (value == 0);
3886     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3887     mismatch = true;
3888     }
3889     }}}}
3890     if (mismatch)
3891     write_log("Target CPU defines all flags on BSF instruction\n");
3892     return !mismatch;
3893     }
3894    
3895 gbeauche 1.1
3896     /*************************************************************************
3897     * FPU stuff *
3898     *************************************************************************/
3899    
3900    
3901     static __inline__ void raw_fp_init(void)
3902     {
3903     int i;
3904    
3905     for (i=0;i<N_FREGS;i++)
3906     live.spos[i]=-2;
3907     live.tos=-1; /* Stack is empty */
3908     }
3909    
3910     static __inline__ void raw_fp_cleanup_drop(void)
3911     {
3912     #if 0
3913     /* using FINIT instead of popping all the entries.
3914     Seems to have side effects --- there is display corruption in
3915     Quake when this is used */
3916     if (live.tos>1) {
3917     emit_byte(0x9b);
3918     emit_byte(0xdb);
3919     emit_byte(0xe3);
3920     live.tos=-1;
3921     }
3922     #endif
3923     while (live.tos>=1) {
3924     emit_byte(0xde);
3925     emit_byte(0xd9);
3926     live.tos-=2;
3927     }
3928     while (live.tos>=0) {
3929     emit_byte(0xdd);
3930     emit_byte(0xd8);
3931     live.tos--;
3932     }
3933     raw_fp_init();
3934     }
3935    
3936     static __inline__ void make_tos(int r)
3937     {
3938     int p,q;
3939    
3940     if (live.spos[r]<0) { /* Register not yet on stack */
3941     emit_byte(0xd9);
3942     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3943     live.tos++;
3944     live.spos[r]=live.tos;
3945     live.onstack[live.tos]=r;
3946     return;
3947     }
3948     /* Register is on stack */
3949     if (live.tos==live.spos[r])
3950     return;
3951     p=live.spos[r];
3952     q=live.onstack[live.tos];
3953    
3954     emit_byte(0xd9);
3955     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3956     live.onstack[live.tos]=r;
3957     live.spos[r]=live.tos;
3958     live.onstack[p]=q;
3959     live.spos[q]=p;
3960     }
3961    
3962     static __inline__ void make_tos2(int r, int r2)
3963     {
3964     int q;
3965    
3966     make_tos(r2); /* Put the reg that's supposed to end up in position2
3967     on top */
3968    
3969     if (live.spos[r]<0) { /* Register not yet on stack */
3970     make_tos(r); /* This will extend the stack */
3971     return;
3972     }
3973     /* Register is on stack */
3974     emit_byte(0xd9);
3975     emit_byte(0xc9); /* Move r2 into position 2 */
3976    
3977     q=live.onstack[live.tos-1];
3978     live.onstack[live.tos]=q;
3979     live.spos[q]=live.tos;
3980     live.onstack[live.tos-1]=r2;
3981     live.spos[r2]=live.tos-1;
3982    
3983     make_tos(r); /* And r into 1 */
3984     }
3985    
3986     static __inline__ int stackpos(int r)
3987     {
3988     if (live.spos[r]<0)
3989     abort();
3990     if (live.tos<live.spos[r]) {
3991     printf("Looking for spos for fnreg %d\n",r);
3992     abort();
3993     }
3994     return live.tos-live.spos[r];
3995     }
3996    
3997     static __inline__ void usereg(int r)
3998     {
3999     if (live.spos[r]<0)
4000     make_tos(r);
4001     }
4002    
4003     /* This is called with one FP value in a reg *above* tos, which it will
4004     pop off the stack if necessary */
4005     static __inline__ void tos_make(int r)
4006     {
4007     if (live.spos[r]<0) {
4008     live.tos++;
4009     live.spos[r]=live.tos;
4010     live.onstack[live.tos]=r;
4011     return;
4012     }
4013     emit_byte(0xdd);
4014     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
4015     and pop it*/
4016     }
4017 gbeauche 1.23
4018     /* FP helper functions */
4019     #if USE_NEW_RTASM
4020     #define DEFINE_OP(NAME, GEN) \
4021     static inline void raw_##NAME(uint32 m) \
4022     { \
4023     GEN(m, X86_NOREG, X86_NOREG, 1); \
4024     }
4025     DEFINE_OP(fstl, FSTLm);
4026     DEFINE_OP(fstpl, FSTPLm);
4027     DEFINE_OP(fldl, FLDLm);
4028     DEFINE_OP(fildl, FILDLm);
4029     DEFINE_OP(fistl, FISTLm);
4030     DEFINE_OP(flds, FLDSm);
4031     DEFINE_OP(fsts, FSTSm);
4032     DEFINE_OP(fstpt, FSTPTm);
4033     DEFINE_OP(fldt, FLDTm);
4034     #else
4035     #define DEFINE_OP(NAME, OP1, OP2) \
4036     static inline void raw_##NAME(uint32 m) \
4037     { \
4038     emit_byte(OP1); \
4039     emit_byte(OP2); \
4040     emit_long(m); \
4041     }
4042     DEFINE_OP(fstl, 0xdd, 0x15);
4043     DEFINE_OP(fstpl, 0xdd, 0x1d);
4044     DEFINE_OP(fldl, 0xdd, 0x05);
4045     DEFINE_OP(fildl, 0xdb, 0x05);
4046     DEFINE_OP(fistl, 0xdb, 0x15);
4047     DEFINE_OP(flds, 0xd9, 0x05);
4048     DEFINE_OP(fsts, 0xd9, 0x15);
4049     DEFINE_OP(fstpt, 0xdb, 0x3d);
4050     DEFINE_OP(fldt, 0xdb, 0x2d);
4051     #endif
4052     #undef DEFINE_OP
4053    
4054 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4055     {
4056     make_tos(r);
4057 gbeauche 1.23 raw_fstl(m);
4058 gbeauche 1.1 }
4059     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4060    
4061     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4062     {
4063     make_tos(r);
4064 gbeauche 1.23 raw_fstpl(m);
4065 gbeauche 1.1 live.onstack[live.tos]=-1;
4066     live.tos--;
4067     live.spos[r]=-2;
4068     }
4069     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4070    
4071     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4072     {
4073 gbeauche 1.23 raw_fldl(m);
4074 gbeauche 1.1 tos_make(r);
4075     }
4076     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4077    
4078     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4079     {
4080 gbeauche 1.23 raw_fildl(m);
4081 gbeauche 1.1 tos_make(r);
4082     }
4083     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4084    
4085     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4086     {
4087     make_tos(r);
4088 gbeauche 1.23 raw_fistl(m);
4089 gbeauche 1.1 }
4090     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4091    
4092     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4093     {
4094 gbeauche 1.23 raw_flds(m);
4095 gbeauche 1.1 tos_make(r);
4096     }
4097     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4098    
4099     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4100     {
4101     make_tos(r);
4102 gbeauche 1.23 raw_fsts(m);
4103 gbeauche 1.1 }
4104     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4105    
4106     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4107     {
4108     int rs;
4109    
4110     /* Stupid x87 can't write a long double to mem without popping the
4111     stack! */
4112     usereg(r);
4113     rs=stackpos(r);
4114     emit_byte(0xd9); /* Get a copy to the top of stack */
4115     emit_byte(0xc0+rs);
4116    
4117 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4118 gbeauche 1.1 }
4119     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4120    
4121     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4122     {
4123     int rs;
4124    
4125     make_tos(r);
4126 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4127 gbeauche 1.1 live.onstack[live.tos]=-1;
4128     live.tos--;
4129     live.spos[r]=-2;
4130     }
4131     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4132    
4133     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4134     {
4135 gbeauche 1.23 raw_fldt(m);
4136 gbeauche 1.1 tos_make(r);
4137     }
4138     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4139    
4140     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4141     {
4142     emit_byte(0xd9);
4143     emit_byte(0xeb);
4144     tos_make(r);
4145     }
4146     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4147    
4148     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4149     {
4150     emit_byte(0xd9);
4151     emit_byte(0xec);
4152     tos_make(r);
4153     }
4154     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4155    
4156     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4157     {
4158     emit_byte(0xd9);
4159     emit_byte(0xea);
4160     tos_make(r);
4161     }
4162     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4163    
4164     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4165     {
4166     emit_byte(0xd9);
4167     emit_byte(0xed);
4168     tos_make(r);
4169     }
4170     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4171    
4172     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4173     {
4174     emit_byte(0xd9);
4175     emit_byte(0xe8);
4176     tos_make(r);
4177     }
4178     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4179    
4180     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4181     {
4182     emit_byte(0xd9);
4183     emit_byte(0xee);
4184     tos_make(r);
4185     }
4186     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4187    
4188     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4189     {
4190     int ds;
4191    
4192     usereg(s);
4193     ds=stackpos(s);
4194     if (ds==0 && live.spos[d]>=0) {
4195     /* source is on top of stack, and we already have the dest */
4196     int dd=stackpos(d);
4197     emit_byte(0xdd);
4198     emit_byte(0xd0+dd);
4199     }
4200     else {
4201     emit_byte(0xd9);
4202     emit_byte(0xc0+ds); /* duplicate source on tos */
4203     tos_make(d); /* store to destination, pop if necessary */
4204     }
4205     }
4206     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4207    
4208     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4209     {
4210     emit_byte(0xd9);
4211     emit_byte(0xa8+index);
4212     emit_long(base);
4213     }
4214     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4215    
4216    
4217     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4218     {
4219     int ds;
4220    
4221     if (d!=s) {
4222     usereg(s);
4223     ds=stackpos(s);
4224     emit_byte(0xd9);
4225     emit_byte(0xc0+ds); /* duplicate source */
4226     emit_byte(0xd9);
4227     emit_byte(0xfa); /* take square root */
4228     tos_make(d); /* store to destination */
4229     }
4230     else {
4231     make_tos(d);
4232     emit_byte(0xd9);
4233     emit_byte(0xfa); /* take square root */
4234     }
4235     }
4236     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4237    
4238     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4239     {
4240     int ds;
4241    
4242     if (d!=s) {
4243     usereg(s);
4244     ds=stackpos(s);
4245     emit_byte(0xd9);
4246     emit_byte(0xc0+ds); /* duplicate source */
4247     emit_byte(0xd9);
4248     emit_byte(0xe1); /* take fabs */
4249     tos_make(d); /* store to destination */
4250     }
4251     else {
4252     make_tos(d);
4253     emit_byte(0xd9);
4254     emit_byte(0xe1); /* take fabs */
4255     }
4256     }
4257     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4258    
4259     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4260     {
4261     int ds;
4262    
4263     if (d!=s) {
4264     usereg(s);
4265     ds=stackpos(s);
4266     emit_byte(0xd9);
4267     emit_byte(0xc0+ds); /* duplicate source */
4268     emit_byte(0xd9);
4269     emit_byte(0xfc); /* take frndint */
4270     tos_make(d); /* store to destination */
4271     }
4272     else {
4273     make_tos(d);
4274     emit_byte(0xd9);
4275     emit_byte(0xfc); /* take frndint */
4276     }
4277     }
4278     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4279    
4280     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4281     {
4282     int ds;
4283    
4284     if (d!=s) {
4285     usereg(s);
4286     ds=stackpos(s);
4287     emit_byte(0xd9);
4288     emit_byte(0xc0+ds); /* duplicate source */
4289     emit_byte(0xd9);
4290     emit_byte(0xff); /* take cos */
4291     tos_make(d); /* store to destination */
4292     }
4293     else {
4294     make_tos(d);
4295     emit_byte(0xd9);
4296     emit_byte(0xff); /* take cos */
4297     }
4298     }
4299     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4300    
4301     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4302     {
4303     int ds;
4304    
4305     if (d!=s) {
4306     usereg(s);
4307     ds=stackpos(s);
4308     emit_byte(0xd9);
4309     emit_byte(0xc0+ds); /* duplicate source */
4310     emit_byte(0xd9);
4311     emit_byte(0xfe); /* take sin */
4312     tos_make(d); /* store to destination */
4313     }
4314     else {
4315     make_tos(d);
4316     emit_byte(0xd9);
4317     emit_byte(0xfe); /* take sin */
4318     }
4319     }
4320     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4321    
4322 gbeauche 1.34 static const double one=1;
4323 gbeauche 1.1 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4324     {
4325     int ds;
4326    
4327     usereg(s);
4328     ds=stackpos(s);
4329     emit_byte(0xd9);
4330     emit_byte(0xc0+ds); /* duplicate source */
4331    
4332     emit_byte(0xd9);
4333     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4334     emit_byte(0xd9);
4335     emit_byte(0xfc); /* rndint */
4336     emit_byte(0xd9);
4337     emit_byte(0xc9); /* swap top two elements */
4338     emit_byte(0xd8);
4339     emit_byte(0xe1); /* subtract rounded from original */
4340     emit_byte(0xd9);
4341     emit_byte(0xf0); /* f2xm1 */
4342 gbeauche 1.34 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4343 gbeauche 1.1 emit_byte(0xd9);
4344     emit_byte(0xfd); /* and scale it */
4345     emit_byte(0xdd);
4346     emit_byte(0xd9); /* take he rounded value off */
4347     tos_make(d); /* store to destination */
4348     }
4349     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4350    
4351     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4352     {
4353     int ds;
4354    
4355     usereg(s);
4356     ds=stackpos(s);
4357     emit_byte(0xd9);
4358     emit_byte(0xc0+ds); /* duplicate source */
4359     emit_byte(0xd9);
4360     emit_byte(0xea); /* fldl2e */
4361     emit_byte(0xde);
4362     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4363    
4364     emit_byte(0xd9);
4365     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4366     emit_byte(0xd9);
4367     emit_byte(0xfc); /* rndint */
4368     emit_byte(0xd9);
4369     emit_byte(0xc9); /* swap top two elements */
4370     emit_byte(0xd8);
4371     emit_byte(0xe1); /* subtract rounded from original */
4372     emit_byte(0xd9);
4373     emit_byte(0xf0); /* f2xm1 */
4374 gbeauche 1.34 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4375 gbeauche 1.1 emit_byte(0xd9);
4376     emit_byte(0xfd); /* and scale it */
4377     emit_byte(0xdd);
4378     emit_byte(0xd9); /* take he rounded value off */
4379     tos_make(d); /* store to destination */
4380     }
4381     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4382    
4383     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4384     {
4385     int ds;
4386    
4387     usereg(s);
4388     ds=stackpos(s);
4389     emit_byte(0xd9);
4390     emit_byte(0xc0+ds); /* duplicate source */
4391     emit_byte(0xd9);
4392     emit_byte(0xe8); /* push '1' */
4393     emit_byte(0xd9);
4394     emit_byte(0xc9); /* swap top two */
4395     emit_byte(0xd9);
4396     emit_byte(0xf1); /* take 1*log2(x) */
4397     tos_make(d); /* store to destination */
4398     }
4399     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4400    
4401    
4402     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4403     {
4404     int ds;
4405    
4406     if (d!=s) {
4407     usereg(s);
4408     ds=stackpos(s);
4409     emit_byte(0xd9);
4410     emit_byte(0xc0+ds); /* duplicate source */
4411     emit_byte(0xd9);
4412     emit_byte(0xe0); /* take fchs */
4413     tos_make(d); /* store to destination */
4414     }
4415     else {
4416     make_tos(d);
4417     emit_byte(0xd9);
4418     emit_byte(0xe0); /* take fchs */
4419     }
4420     }
4421     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4422    
4423     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4424     {
4425     int ds;
4426    
4427     usereg(s);
4428     usereg(d);
4429    
4430     if (live.spos[s]==live.tos) {
4431     /* Source is on top of stack */
4432     ds=stackpos(d);
4433     emit_byte(0xdc);
4434     emit_byte(0xc0+ds); /* add source to dest*/
4435     }
4436     else {
4437     make_tos(d);
4438     ds=stackpos(s);
4439    
4440     emit_byte(0xd8);
4441     emit_byte(0xc0+ds); /* add source to dest*/
4442     }
4443     }
4444     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4445    
4446     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4447     {
4448     int ds;
4449    
4450     usereg(s);
4451     usereg(d);
4452    
4453     if (live.spos[s]==live.tos) {
4454     /* Source is on top of stack */
4455     ds=stackpos(d);
4456     emit_byte(0xdc);
4457     emit_byte(0xe8+ds); /* sub source from dest*/
4458     }
4459     else {
4460     make_tos(d);
4461     ds=stackpos(s);
4462    
4463     emit_byte(0xd8);
4464     emit_byte(0xe0+ds); /* sub src from dest */
4465     }
4466     }
4467     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4468    
4469     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4470     {
4471     int ds;
4472    
4473     usereg(s);
4474     usereg(d);
4475    
4476     make_tos(d);
4477     ds=stackpos(s);
4478    
4479     emit_byte(0xdd);
4480     emit_byte(0xe0+ds); /* cmp dest with source*/
4481     }
4482     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4483    
4484     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4485     {
4486     int ds;
4487    
4488     usereg(s);
4489     usereg(d);
4490    
4491     if (live.spos[s]==live.tos) {
4492     /* Source is on top of stack */
4493     ds=stackpos(d);
4494     emit_byte(0xdc);
4495     emit_byte(0xc8+ds); /* mul dest by source*/
4496     }
4497     else {
4498     make_tos(d);
4499     ds=stackpos(s);
4500    
4501     emit_byte(0xd8);
4502     emit_byte(0xc8+ds); /* mul dest by source*/
4503     }
4504     }
4505     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4506    
4507     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4508     {
4509     int ds;
4510    
4511     usereg(s);
4512     usereg(d);
4513    
4514     if (live.spos[s]==live.tos) {
4515     /* Source is on top of stack */
4516     ds=stackpos(d);
4517     emit_byte(0xdc);
4518     emit_byte(0xf8+ds); /* div dest by source */
4519     }
4520     else {
4521     make_tos(d);
4522     ds=stackpos(s);
4523    
4524     emit_byte(0xd8);
4525     emit_byte(0xf0+ds); /* div dest by source*/
4526     }
4527     }
4528     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4529    
4530     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4531     {
4532     int ds;
4533    
4534     usereg(s);
4535     usereg(d);
4536    
4537     make_tos2(d,s);
4538     ds=stackpos(s);
4539    
4540     if (ds!=1) {
4541     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4542     abort();
4543     }
4544     emit_byte(0xd9);
4545     emit_byte(0xf8); /* take rem from dest by source */
4546     }
4547     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4548    
4549     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4550     {
4551     int ds;
4552    
4553     usereg(s);
4554     usereg(d);
4555    
4556     make_tos2(d,s);
4557     ds=stackpos(s);
4558    
4559     if (ds!=1) {
4560     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4561     abort();
4562     }
4563     emit_byte(0xd9);
4564     emit_byte(0xf5); /* take rem1 from dest by source */
4565     }
4566     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4567    
4568    
4569     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4570     {
4571     make_tos(r);
4572     emit_byte(0xd9); /* ftst */
4573     emit_byte(0xe4);
4574     }
4575     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4576    
4577     /* %eax register is clobbered if target processor doesn't support fucomi */
4578     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4579     #define FFLAG_NREG EAX_INDEX
4580    
4581     static __inline__ void raw_fflags_into_flags(int r)
4582     {
4583     int p;
4584    
4585     usereg(r);
4586     p=stackpos(r);
4587    
4588     emit_byte(0xd9);
4589     emit_byte(0xee); /* Push 0 */
4590     emit_byte(0xd9);
4591     emit_byte(0xc9+p); /* swap top two around */
4592     if (have_cmov) {
4593     // gb-- fucomi is for P6 cores only, not K6-2 then...
4594     emit_byte(0xdb);
4595     emit_byte(0xe9+p); /* fucomi them */
4596     }
4597     else {
4598     emit_byte(0xdd);
4599     emit_byte(0xe1+p); /* fucom them */
4600     emit_byte(0x9b);
4601     emit_byte(0xdf);
4602     emit_byte(0xe0); /* fstsw ax */
4603     raw_sahf(0); /* sahf */
4604     }
4605     emit_byte(0xdd);
4606     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4607     }