ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.36
Committed: 2007-01-14T12:23:29Z (17 years, 8 months ago) by gbeauche
Branch: MAIN
Changes since 1.35: +127 -31 lines
Log Message:
Use SAHF_SETO_PROFITABLE wherever possible on x86-64, it's faster. This can't
be the default because some very ancient CPUs don't support LAHF in long mode

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.26 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 gbeauche 1.26 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.33 /* XXX this has to match X86_Reg8H_Base + 4 */
56     #define AH_INDEX (0x10+4+EAX_INDEX)
57     #define CH_INDEX (0x10+4+ECX_INDEX)
58     #define DH_INDEX (0x10+4+EDX_INDEX)
59     #define BH_INDEX (0x10+4+EBX_INDEX)
60 gbeauche 1.1
61     /* The register in which subroutines return an integer return value */
62 gbeauche 1.20 #define REG_RESULT EAX_INDEX
63 gbeauche 1.1
64     /* The registers subroutines take their first and second argument in */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     /* Handle the _fastcall parameters of ECX and EDX */
67 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
68     #define REG_PAR2 EDX_INDEX
69     #elif defined(__x86_64__)
70     #define REG_PAR1 EDI_INDEX
71     #define REG_PAR2 ESI_INDEX
72 gbeauche 1.1 #else
73 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
74     #define REG_PAR2 EDX_INDEX
75 gbeauche 1.1 #endif
76    
77 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
78 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
79 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
80 gbeauche 1.1 #else
81 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
82 gbeauche 1.1 #endif
83    
84 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
85 gbeauche 1.1 -1 if any reg will do */
86 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
87     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
88 gbeauche 1.1
89 gbeauche 1.31 #define STACK_ALIGN 16
90     #define STACK_OFFSET sizeof(void *)
91    
92 gbeauche 1.1 uae_s8 always_used[]={4,-1};
93 gbeauche 1.20 #if defined(__x86_64__)
94     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
95     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
96     #else
97 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
98     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
99 gbeauche 1.20 #endif
100 gbeauche 1.1
101 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
102     /* Make sure interpretive core does not use cpuopti */
103     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
104 gbeauche 1.20 #error FIXME: code not ready
105 gbeauche 1.17 #else
106 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
107     by the caller */
108 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
109 gbeauche 1.17 #endif
110 gbeauche 1.1
111     /* This *should* be the same as call_saved. But:
112     - We might not really know which registers are saved, and which aren't,
113     so we need to preserve some, but don't want to rely on everyone else
114     also saving those registers
115     - Special registers (such like the stack pointer) should not be "preserved"
116     by pushing, even though they are "saved" across function calls
117     */
118 gbeauche 1.21 #if defined(__x86_64__)
119 gbeauche 1.32 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
120 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
121     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
122 gbeauche 1.21 #else
123 gbeauche 1.32 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
124     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
125 gbeauche 1.21 #endif
126 gbeauche 1.1
127     /* Whether classes of instructions do or don't clobber the native flags */
128     #define CLOBBER_MOV
129     #define CLOBBER_LEA
130     #define CLOBBER_CMOV
131     #define CLOBBER_POP
132     #define CLOBBER_PUSH
133     #define CLOBBER_SUB clobber_flags()
134     #define CLOBBER_SBB clobber_flags()
135     #define CLOBBER_CMP clobber_flags()
136     #define CLOBBER_ADD clobber_flags()
137     #define CLOBBER_ADC clobber_flags()
138     #define CLOBBER_AND clobber_flags()
139     #define CLOBBER_OR clobber_flags()
140     #define CLOBBER_XOR clobber_flags()
141    
142     #define CLOBBER_ROL clobber_flags()
143     #define CLOBBER_ROR clobber_flags()
144     #define CLOBBER_SHLL clobber_flags()
145     #define CLOBBER_SHRL clobber_flags()
146     #define CLOBBER_SHRA clobber_flags()
147     #define CLOBBER_TEST clobber_flags()
148     #define CLOBBER_CL16
149     #define CLOBBER_CL8
150 gbeauche 1.20 #define CLOBBER_SE32
151 gbeauche 1.1 #define CLOBBER_SE16
152     #define CLOBBER_SE8
153 gbeauche 1.20 #define CLOBBER_ZE32
154 gbeauche 1.1 #define CLOBBER_ZE16
155     #define CLOBBER_ZE8
156     #define CLOBBER_SW16 clobber_flags()
157     #define CLOBBER_SW32
158     #define CLOBBER_SETCC
159     #define CLOBBER_MUL clobber_flags()
160     #define CLOBBER_BT clobber_flags()
161     #define CLOBBER_BSF clobber_flags()
162    
163 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
164 gbeauche 1.20 #if defined(__x86_64__)
165     #define USE_NEW_RTASM 1
166     #endif
167    
168     #if USE_NEW_RTASM
169 gbeauche 1.13
170     #if defined(__x86_64__)
171     #define X86_TARGET_64BIT 1
172 gbeauche 1.35 /* The address override prefix causes a 5 cycles penalty on Intel Core
173     processors. Another solution would be to decompose the load in an LEA,
174     MOV (to zero-extend), MOV (from memory): is it better? */
175     #define ADDR32 x86_emit_byte(0x67),
176     #else
177     #define ADDR32 /**/
178 gbeauche 1.13 #endif
179     #define X86_FLAT_REGISTERS 0
180 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
181     #define X86_OPTIMIZE_ROTSHI 1
182 gbeauche 1.13 #include "codegen_x86.h"
183    
184     #define x86_emit_byte(B) emit_byte(B)
185     #define x86_emit_word(W) emit_word(W)
186     #define x86_emit_long(L) emit_long(L)
187 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
188 gbeauche 1.13 #define x86_get_target() get_target()
189     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
190    
191     static void jit_fail(const char *msg, const char *file, int line, const char *function)
192     {
193     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
194     function, file, line, msg);
195     abort();
196     }
197    
198     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
199     {
200 gbeauche 1.20 #if defined(__x86_64__)
201     PUSHQr(r);
202     #else
203 gbeauche 1.13 PUSHLr(r);
204 gbeauche 1.20 #endif
205 gbeauche 1.13 }
206     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
207    
208     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
209     {
210 gbeauche 1.20 #if defined(__x86_64__)
211     POPQr(r);
212     #else
213 gbeauche 1.13 POPLr(r);
214 gbeauche 1.20 #endif
215 gbeauche 1.13 }
216     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
217    
218 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
219     {
220     #if defined(__x86_64__)
221     POPQm(d, X86_NOREG, X86_NOREG, 1);
222     #else
223     POPLm(d, X86_NOREG, X86_NOREG, 1);
224     #endif
225     }
226     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
227    
228 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
229     {
230     BTLir(i, r);
231     }
232     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
233    
234     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
235     {
236     BTLrr(b, r);
237     }
238     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
239    
240     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
241     {
242     BTCLir(i, r);
243     }
244     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
245    
246     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
247     {
248     BTCLrr(b, r);
249     }
250     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
251    
252     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
253     {
254     BTRLir(i, r);
255     }
256     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
257    
258     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
259     {
260     BTRLrr(b, r);
261     }
262     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
263    
264     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
265     {
266     BTSLir(i, r);
267     }
268     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
269    
270     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
271     {
272     BTSLrr(b, r);
273     }
274     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
275    
276     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
277     {
278     SUBWir(i, d);
279     }
280     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
281    
282     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
283     {
284     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
285     }
286     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
287    
288     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
289     {
290     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
291     }
292     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
293    
294     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
295     {
296     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
297     }
298     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
299    
300     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
301     {
302     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
303     }
304     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
305    
306     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
307     {
308     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
309     }
310     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
311    
312     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
313     {
314     ROLBir(i, r);
315     }
316     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
317    
318     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
319     {
320     ROLWir(i, r);
321     }
322     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
323    
324     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
325     {
326     ROLLir(i, r);
327     }
328     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
329    
330     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
331     {
332     ROLLrr(r, d);
333     }
334     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
335    
336     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
337     {
338     ROLWrr(r, d);
339     }
340     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
341    
342     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
343     {
344     ROLBrr(r, d);
345     }
346     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
347    
348     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
349     {
350     SHLLrr(r, d);
351     }
352     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
353    
354     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
355     {
356     SHLWrr(r, d);
357     }
358     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
359    
360     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
361     {
362     SHLBrr(r, d);
363     }
364     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
365    
366     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
367     {
368     RORBir(i, r);
369     }
370     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
371    
372     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
373     {
374     RORWir(i, r);
375     }
376     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
377    
378     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
379     {
380     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
381     }
382     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
383    
384     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
385     {
386     RORLir(i, r);
387     }
388     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
389    
390     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
391     {
392     RORLrr(r, d);
393     }
394     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
395    
396     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
397     {
398     RORWrr(r, d);
399     }
400     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
401    
402     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
403     {
404     RORBrr(r, d);
405     }
406     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
407    
408     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
409     {
410     SHRLrr(r, d);
411     }
412     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
413    
414     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
415     {
416     SHRWrr(r, d);
417     }
418     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
419    
420     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
421     {
422     SHRBrr(r, d);
423     }
424     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
425    
426     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
427     {
428 gbeauche 1.14 SARLrr(r, d);
429 gbeauche 1.13 }
430     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
431    
432     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
433     {
434 gbeauche 1.14 SARWrr(r, d);
435 gbeauche 1.13 }
436     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
437    
438     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
439     {
440 gbeauche 1.14 SARBrr(r, d);
441 gbeauche 1.13 }
442     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
443    
444     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
445     {
446     SHLLir(i, r);
447     }
448     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
449    
450     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
451     {
452     SHLWir(i, r);
453     }
454     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
455    
456     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
457     {
458     SHLBir(i, r);
459     }
460     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
461    
462     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
463     {
464     SHRLir(i, r);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
469     {
470     SHRWir(i, r);
471     }
472     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
473    
474     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
475     {
476     SHRBir(i, r);
477     }
478     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
479    
480     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
481     {
482 gbeauche 1.14 SARLir(i, r);
483 gbeauche 1.13 }
484     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
485    
486     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
487     {
488 gbeauche 1.14 SARWir(i, r);
489 gbeauche 1.13 }
490     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
491    
492     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
493     {
494 gbeauche 1.14 SARBir(i, r);
495 gbeauche 1.13 }
496     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
497    
498     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
499     {
500     SAHF();
501     }
502     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
503    
504     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
505     {
506     CPUID();
507     }
508     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
509    
510     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
511     {
512     LAHF();
513     }
514     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
515    
516     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
517     {
518     SETCCir(cc, d);
519     }
520     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
521    
522     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
523     {
524     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
525     }
526     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
527    
528     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
529     {
530 gbeauche 1.15 if (have_cmov)
531     CMOVLrr(cc, s, d);
532     else { /* replacement using branch and mov */
533     #if defined(__x86_64__)
534     write_log("x86-64 implementations are bound to have CMOV!\n");
535     abort();
536     #endif
537     JCCSii(cc^1, 2);
538     MOVLrr(s, d);
539     }
540 gbeauche 1.13 }
541     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
542    
543     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
544     {
545     BSFLrr(s, d);
546     }
547     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
548    
549 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
550     {
551     MOVSLQrr(s, d);
552     }
553     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
554    
555 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
556     {
557     MOVSWLrr(s, d);
558     }
559     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
560    
561     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
562     {
563     MOVSBLrr(s, d);
564     }
565     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
566    
567     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
568     {
569     MOVZWLrr(s, d);
570     }
571     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
572    
573     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
574     {
575     MOVZBLrr(s, d);
576     }
577     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
578    
579     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
580     {
581 gbeauche 1.14 IMULLrr(s, d);
582 gbeauche 1.13 }
583     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
584    
585     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
586     {
587 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
588     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
589 gbeauche 1.13 abort();
590 gbeauche 1.14 }
591     IMULLr(s);
592 gbeauche 1.13 }
593     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
594    
595     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
596     {
597 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
598     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
599 gbeauche 1.13 abort();
600 gbeauche 1.14 }
601     MULLr(s);
602 gbeauche 1.13 }
603     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
604    
605     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
606     {
607 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
608 gbeauche 1.13 }
609     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
610    
611     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
612     {
613     MOVBrr(s, d);
614     }
615     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
616    
617     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
618     {
619     MOVWrr(s, d);
620     }
621     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
622    
623     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
624     {
625 gbeauche 1.35 ADDR32 MOVLmr(0, baser, index, factor, d);
626 gbeauche 1.13 }
627     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
628    
629     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
630     {
631 gbeauche 1.35 ADDR32 MOVWmr(0, baser, index, factor, d);
632 gbeauche 1.13 }
633     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
634    
635     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
636     {
637 gbeauche 1.35 ADDR32 MOVBmr(0, baser, index, factor, d);
638 gbeauche 1.13 }
639     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
640    
641     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
642     {
643 gbeauche 1.35 ADDR32 MOVLrm(s, 0, baser, index, factor);
644 gbeauche 1.13 }
645     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
646    
647     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
648     {
649 gbeauche 1.35 ADDR32 MOVWrm(s, 0, baser, index, factor);
650 gbeauche 1.13 }
651     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
652    
653     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
654     {
655 gbeauche 1.35 ADDR32 MOVBrm(s, 0, baser, index, factor);
656 gbeauche 1.13 }
657     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
658    
659     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
660     {
661 gbeauche 1.35 ADDR32 MOVLrm(s, base, baser, index, factor);
662 gbeauche 1.13 }
663     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
664    
665     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
666     {
667 gbeauche 1.35 ADDR32 MOVWrm(s, base, baser, index, factor);
668 gbeauche 1.13 }
669     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
670    
671     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
672     {
673 gbeauche 1.35 ADDR32 MOVBrm(s, base, baser, index, factor);
674 gbeauche 1.13 }
675     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
676    
677     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
678     {
679 gbeauche 1.35 ADDR32 MOVLmr(base, baser, index, factor, d);
680 gbeauche 1.13 }
681     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
682    
683     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
684     {
685 gbeauche 1.35 ADDR32 MOVWmr(base, baser, index, factor, d);
686 gbeauche 1.13 }
687     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
688    
689     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
690     {
691 gbeauche 1.35 ADDR32 MOVBmr(base, baser, index, factor, d);
692 gbeauche 1.13 }
693     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
694    
695     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
696     {
697 gbeauche 1.35 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
698 gbeauche 1.13 }
699     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
700    
701     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
702     {
703 gbeauche 1.15 if (have_cmov)
704 gbeauche 1.35 ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
705 gbeauche 1.15 else { /* replacement using branch and mov */
706     #if defined(__x86_64__)
707     write_log("x86-64 implementations are bound to have CMOV!\n");
708     abort();
709     #endif
710     JCCSii(cond^1, 7);
711 gbeauche 1.35 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
712 gbeauche 1.15 }
713 gbeauche 1.13 }
714     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
715    
716     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
717     {
718 gbeauche 1.15 if (have_cmov)
719     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
720     else { /* replacement using branch and mov */
721     #if defined(__x86_64__)
722     write_log("x86-64 implementations are bound to have CMOV!\n");
723     abort();
724     #endif
725     JCCSii(cond^1, 6);
726     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
727     }
728 gbeauche 1.13 }
729     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
730    
731     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
732     {
733 gbeauche 1.35 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
734 gbeauche 1.13 }
735     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
736    
737     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
738     {
739 gbeauche 1.35 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
740 gbeauche 1.13 }
741     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
742    
743     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
744     {
745 gbeauche 1.35 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
746 gbeauche 1.13 }
747     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
748    
749     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
750     {
751 gbeauche 1.35 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
752 gbeauche 1.13 }
753     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
754    
755     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
756     {
757 gbeauche 1.35 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
758 gbeauche 1.13 }
759     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
760    
761     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
762     {
763 gbeauche 1.35 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
764 gbeauche 1.13 }
765     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
766    
767     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
768     {
769 gbeauche 1.35 ADDR32 MOVLim(i, offset, d, X86_NOREG, 1);
770 gbeauche 1.13 }
771     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
772    
773     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
774     {
775 gbeauche 1.35 ADDR32 MOVWim(i, offset, d, X86_NOREG, 1);
776 gbeauche 1.13 }
777     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
778    
779     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
780     {
781 gbeauche 1.35 ADDR32 MOVBim(i, offset, d, X86_NOREG, 1);
782 gbeauche 1.13 }
783     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
784    
785     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
786     {
787 gbeauche 1.35 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
788 gbeauche 1.13 }
789     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
790    
791     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
792     {
793 gbeauche 1.35 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
794 gbeauche 1.13 }
795     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
796    
797     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
798     {
799 gbeauche 1.35 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
800 gbeauche 1.13 }
801     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
802    
803     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
804     {
805     LEALmr(offset, s, X86_NOREG, 1, d);
806     }
807     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
808    
809     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
810     {
811     LEALmr(offset, s, index, factor, d);
812     }
813     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
814    
815     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
816     {
817     LEALmr(0, s, index, factor, d);
818     }
819     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
820    
821 gbeauche 1.36 LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
822     {
823     LEALmr(0, X86_NOREG, index, factor, d);
824     }
825     LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
826    
827 gbeauche 1.13 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
828     {
829 gbeauche 1.35 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
830 gbeauche 1.13 }
831     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
832    
833     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
834     {
835 gbeauche 1.35 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
836 gbeauche 1.13 }
837     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
838    
839     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
840     {
841 gbeauche 1.35 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
842 gbeauche 1.13 }
843     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
844    
845     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
846     {
847     BSWAPLr(r);
848     }
849     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
850    
851     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
852     {
853     ROLWir(8, r);
854     }
855     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
856    
857     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
858     {
859     MOVLrr(s, d);
860     }
861     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
862    
863     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
864     {
865     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
866     }
867     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
868    
869     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
870     {
871     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
872     }
873     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
874    
875     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
876     {
877     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
878     }
879     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
880    
881     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
882     {
883     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
884     }
885     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
886    
887     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
888     {
889     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
890     }
891     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
892    
893     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
894     {
895     MOVLir(s, d);
896     }
897     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
898    
899     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
900     {
901     MOVWir(s, d);
902     }
903     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
904    
905     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
906     {
907     MOVBir(s, d);
908     }
909     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
910    
911     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
912     {
913     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
914     }
915     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
916    
917     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
918     {
919     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
920     }
921     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
922    
923     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
924     {
925     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
926     }
927     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
928    
929     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
930     {
931     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
932     }
933     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
934    
935     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
936     {
937     TESTLir(i, d);
938     }
939     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
940    
941     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
942     {
943     TESTLrr(s, d);
944     }
945     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
946    
947     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
948     {
949     TESTWrr(s, d);
950     }
951     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
952    
953     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
954     {
955     TESTBrr(s, d);
956     }
957     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
958    
959 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
960     {
961     XORLir(i, d);
962     }
963     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
964    
965 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
966     {
967     ANDLir(i, d);
968     }
969     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
970    
971     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
972     {
973     ANDWir(i, d);
974     }
975     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
976    
977     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
978     {
979     ANDLrr(s, d);
980     }
981     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
982    
983     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
984     {
985     ANDWrr(s, d);
986     }
987     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
988    
989     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
990     {
991     ANDBrr(s, d);
992     }
993     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
994    
995     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
996     {
997     ORLir(i, d);
998     }
999     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1000    
1001     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1002     {
1003     ORLrr(s, d);
1004     }
1005     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1006    
1007     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1008     {
1009     ORWrr(s, d);
1010     }
1011     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1012    
1013     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1014     {
1015     ORBrr(s, d);
1016     }
1017     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1018    
1019     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1020     {
1021     ADCLrr(s, d);
1022     }
1023     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1024    
1025     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1026     {
1027     ADCWrr(s, d);
1028     }
1029     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1030    
1031     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1032     {
1033     ADCBrr(s, d);
1034     }
1035     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1036    
1037     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1038     {
1039     ADDLrr(s, d);
1040     }
1041     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1042    
1043     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1044     {
1045     ADDWrr(s, d);
1046     }
1047     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1048    
1049     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1050     {
1051     ADDBrr(s, d);
1052     }
1053     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1054    
1055     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1056     {
1057     SUBLir(i, d);
1058     }
1059     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1060    
1061     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1062     {
1063     SUBBir(i, d);
1064     }
1065     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1066    
1067     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1068     {
1069     ADDLir(i, d);
1070     }
1071     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1072    
1073     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1074     {
1075     ADDWir(i, d);
1076     }
1077     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1078    
1079     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1080     {
1081     ADDBir(i, d);
1082     }
1083     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1084    
1085     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1086     {
1087     SBBLrr(s, d);
1088     }
1089     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1090    
1091     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1092     {
1093     SBBWrr(s, d);
1094     }
1095     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1096    
1097     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1098     {
1099     SBBBrr(s, d);
1100     }
1101     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1102    
1103     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1104     {
1105     SUBLrr(s, d);
1106     }
1107     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1108    
1109     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1110     {
1111     SUBWrr(s, d);
1112     }
1113     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1114    
1115     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1116     {
1117     SUBBrr(s, d);
1118     }
1119     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1120    
1121     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1122     {
1123     CMPLrr(s, d);
1124     }
1125     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1126    
1127     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1128     {
1129     CMPLir(i, r);
1130     }
1131     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1132    
1133     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1134     {
1135     CMPWrr(s, d);
1136     }
1137     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1138    
1139     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1140     {
1141     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1142     }
1143     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1144    
1145     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1146     {
1147     CMPBir(i, d);
1148     }
1149     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1150    
1151     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1152     {
1153     CMPBrr(s, d);
1154     }
1155     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1156    
1157     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1158     {
1159 gbeauche 1.35 ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d);
1160 gbeauche 1.13 }
1161     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1162    
1163     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1164     {
1165     XORLrr(s, d);
1166     }
1167     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1168    
1169     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1170     {
1171     XORWrr(s, d);
1172     }
1173     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1174    
1175     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1176     {
1177     XORBrr(s, d);
1178     }
1179     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1180    
1181     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1182     {
1183     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1184     }
1185     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1186    
1187     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1188     {
1189     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1190     }
1191     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1192    
1193     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1194     {
1195     XCHGLrr(r2, r1);
1196     }
1197     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1198    
1199 gbeauche 1.36 LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1200     {
1201     XCHGBrr(r2, r1);
1202     }
1203     LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1204    
1205 gbeauche 1.13 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1206     {
1207 gbeauche 1.18 PUSHF();
1208 gbeauche 1.13 }
1209     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1210    
1211     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1212     {
1213 gbeauche 1.18 POPF();
1214 gbeauche 1.13 }
1215     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1216    
1217 gbeauche 1.34 /* Generate floating-point instructions */
1218     static inline void x86_fadd_m(MEMR s)
1219     {
1220     FADDLm(s,X86_NOREG,X86_NOREG,1);
1221     }
1222    
1223 gbeauche 1.13 #else
1224    
1225 gbeauche 1.2 const bool optimize_accum = true;
1226 gbeauche 1.1 const bool optimize_imm8 = true;
1227     const bool optimize_shift_once = true;
1228    
1229     /*************************************************************************
1230     * Actual encoding of the instructions on the target CPU *
1231     *************************************************************************/
1232    
1233 gbeauche 1.2 static __inline__ int isaccum(int r)
1234     {
1235     return (r == EAX_INDEX);
1236     }
1237    
1238 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1239     {
1240     return (x>=-128 && x<=127);
1241     }
1242    
1243     static __inline__ int isword(uae_s32 x)
1244     {
1245     return (x>=-32768 && x<=32767);
1246     }
1247    
1248     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1249     {
1250     emit_byte(0x50+r);
1251     }
1252     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1253    
1254     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1255     {
1256     emit_byte(0x58+r);
1257     }
1258     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1259    
1260 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1261     {
1262     emit_byte(0x8f);
1263     emit_byte(0x05);
1264     emit_long(d);
1265     }
1266     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1267    
1268 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1269     {
1270     emit_byte(0x0f);
1271     emit_byte(0xba);
1272     emit_byte(0xe0+r);
1273     emit_byte(i);
1274     }
1275     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1276    
1277     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1278     {
1279     emit_byte(0x0f);
1280     emit_byte(0xa3);
1281     emit_byte(0xc0+8*b+r);
1282     }
1283     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1284    
1285     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1286     {
1287     emit_byte(0x0f);
1288     emit_byte(0xba);
1289     emit_byte(0xf8+r);
1290     emit_byte(i);
1291     }
1292     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1293    
1294     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1295     {
1296     emit_byte(0x0f);
1297     emit_byte(0xbb);
1298     emit_byte(0xc0+8*b+r);
1299     }
1300     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1301    
1302    
1303     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1304     {
1305     emit_byte(0x0f);
1306     emit_byte(0xba);
1307     emit_byte(0xf0+r);
1308     emit_byte(i);
1309     }
1310     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1311    
1312     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1313     {
1314     emit_byte(0x0f);
1315     emit_byte(0xb3);
1316     emit_byte(0xc0+8*b+r);
1317     }
1318     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1319    
1320     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1321     {
1322     emit_byte(0x0f);
1323     emit_byte(0xba);
1324     emit_byte(0xe8+r);
1325     emit_byte(i);
1326     }
1327     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1328    
1329     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1330     {
1331     emit_byte(0x0f);
1332     emit_byte(0xab);
1333     emit_byte(0xc0+8*b+r);
1334     }
1335     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1336    
1337     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1338     {
1339     emit_byte(0x66);
1340     if (isbyte(i)) {
1341     emit_byte(0x83);
1342     emit_byte(0xe8+d);
1343     emit_byte(i);
1344     }
1345     else {
1346 gbeauche 1.2 if (optimize_accum && isaccum(d))
1347     emit_byte(0x2d);
1348     else {
1349 gbeauche 1.1 emit_byte(0x81);
1350     emit_byte(0xe8+d);
1351 gbeauche 1.2 }
1352 gbeauche 1.1 emit_word(i);
1353     }
1354     }
1355     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1356    
1357    
1358     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1359     {
1360     emit_byte(0x8b);
1361     emit_byte(0x05+8*d);
1362     emit_long(s);
1363     }
1364     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1365    
1366     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1367     {
1368     emit_byte(0xc7);
1369     emit_byte(0x05);
1370     emit_long(d);
1371     emit_long(s);
1372     }
1373     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1374    
1375     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1376     {
1377     emit_byte(0x66);
1378     emit_byte(0xc7);
1379     emit_byte(0x05);
1380     emit_long(d);
1381     emit_word(s);
1382     }
1383     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1384    
1385     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1386     {
1387     emit_byte(0xc6);
1388     emit_byte(0x05);
1389     emit_long(d);
1390     emit_byte(s);
1391     }
1392     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1393    
1394     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1395     {
1396     if (optimize_shift_once && (i == 1)) {
1397     emit_byte(0xd0);
1398     emit_byte(0x05);
1399     emit_long(d);
1400     }
1401     else {
1402     emit_byte(0xc0);
1403     emit_byte(0x05);
1404     emit_long(d);
1405     emit_byte(i);
1406     }
1407     }
1408     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1409    
1410     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1411     {
1412     if (optimize_shift_once && (i == 1)) {
1413     emit_byte(0xd0);
1414     emit_byte(0xc0+r);
1415     }
1416     else {
1417     emit_byte(0xc0);
1418     emit_byte(0xc0+r);
1419     emit_byte(i);
1420     }
1421     }
1422     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1423    
1424     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1425     {
1426     emit_byte(0x66);
1427     emit_byte(0xc1);
1428     emit_byte(0xc0+r);
1429     emit_byte(i);
1430     }
1431     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1432    
1433     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1434     {
1435     if (optimize_shift_once && (i == 1)) {
1436     emit_byte(0xd1);
1437     emit_byte(0xc0+r);
1438     }
1439     else {
1440     emit_byte(0xc1);
1441     emit_byte(0xc0+r);
1442     emit_byte(i);
1443     }
1444     }
1445     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1446    
1447     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1448     {
1449     emit_byte(0xd3);
1450     emit_byte(0xc0+d);
1451     }
1452     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1453    
1454     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1455     {
1456     emit_byte(0x66);
1457     emit_byte(0xd3);
1458     emit_byte(0xc0+d);
1459     }
1460     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1461    
1462     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1463     {
1464     emit_byte(0xd2);
1465     emit_byte(0xc0+d);
1466     }
1467     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1468    
1469     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1470     {
1471     emit_byte(0xd3);
1472     emit_byte(0xe0+d);
1473     }
1474     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1475    
1476     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1477     {
1478     emit_byte(0x66);
1479     emit_byte(0xd3);
1480     emit_byte(0xe0+d);
1481     }
1482     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1483    
1484     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1485     {
1486     emit_byte(0xd2);
1487     emit_byte(0xe0+d);
1488     }
1489     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1490    
1491     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1492     {
1493     if (optimize_shift_once && (i == 1)) {
1494     emit_byte(0xd0);
1495     emit_byte(0xc8+r);
1496     }
1497     else {
1498     emit_byte(0xc0);
1499     emit_byte(0xc8+r);
1500     emit_byte(i);
1501     }
1502     }
1503     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1504    
1505     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1506     {
1507     emit_byte(0x66);
1508     emit_byte(0xc1);
1509     emit_byte(0xc8+r);
1510     emit_byte(i);
1511     }
1512     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1513    
1514     // gb-- used for making an fpcr value in compemu_fpp.cpp
1515     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1516     {
1517     emit_byte(0x0b);
1518     emit_byte(0x05+8*d);
1519     emit_long(s);
1520     }
1521     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1522    
1523     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1524     {
1525     if (optimize_shift_once && (i == 1)) {
1526     emit_byte(0xd1);
1527     emit_byte(0xc8+r);
1528     }
1529     else {
1530     emit_byte(0xc1);
1531     emit_byte(0xc8+r);
1532     emit_byte(i);
1533     }
1534     }
1535     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1536    
1537     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1538     {
1539     emit_byte(0xd3);
1540     emit_byte(0xc8+d);
1541     }
1542     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1543    
1544     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1545     {
1546     emit_byte(0x66);
1547     emit_byte(0xd3);
1548     emit_byte(0xc8+d);
1549     }
1550     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1551    
1552     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1553     {
1554     emit_byte(0xd2);
1555     emit_byte(0xc8+d);
1556     }
1557     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1558    
1559     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1560     {
1561     emit_byte(0xd3);
1562     emit_byte(0xe8+d);
1563     }
1564     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1565    
1566     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1567     {
1568     emit_byte(0x66);
1569     emit_byte(0xd3);
1570     emit_byte(0xe8+d);
1571     }
1572     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1573    
1574     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1575     {
1576     emit_byte(0xd2);
1577     emit_byte(0xe8+d);
1578     }
1579     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1580    
1581     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1582     {
1583     emit_byte(0xd3);
1584     emit_byte(0xf8+d);
1585     }
1586     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1587    
1588     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1589     {
1590     emit_byte(0x66);
1591     emit_byte(0xd3);
1592     emit_byte(0xf8+d);
1593     }
1594     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1595    
1596     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1597     {
1598     emit_byte(0xd2);
1599     emit_byte(0xf8+d);
1600     }
1601     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1602    
1603     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1604     {
1605     if (optimize_shift_once && (i == 1)) {
1606     emit_byte(0xd1);
1607     emit_byte(0xe0+r);
1608     }
1609     else {
1610     emit_byte(0xc1);
1611     emit_byte(0xe0+r);
1612     emit_byte(i);
1613     }
1614     }
1615     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1616    
1617     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1618     {
1619     emit_byte(0x66);
1620     emit_byte(0xc1);
1621     emit_byte(0xe0+r);
1622     emit_byte(i);
1623     }
1624     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1625    
1626     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1627     {
1628     if (optimize_shift_once && (i == 1)) {
1629     emit_byte(0xd0);
1630     emit_byte(0xe0+r);
1631     }
1632     else {
1633     emit_byte(0xc0);
1634     emit_byte(0xe0+r);
1635     emit_byte(i);
1636     }
1637     }
1638     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1639    
1640     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1641     {
1642     if (optimize_shift_once && (i == 1)) {
1643     emit_byte(0xd1);
1644     emit_byte(0xe8+r);
1645     }
1646     else {
1647     emit_byte(0xc1);
1648     emit_byte(0xe8+r);
1649     emit_byte(i);
1650     }
1651     }
1652     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1653    
1654     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1655     {
1656     emit_byte(0x66);
1657     emit_byte(0xc1);
1658     emit_byte(0xe8+r);
1659     emit_byte(i);
1660     }
1661     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1662    
1663     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1664     {
1665     if (optimize_shift_once && (i == 1)) {
1666     emit_byte(0xd0);
1667     emit_byte(0xe8+r);
1668     }
1669     else {
1670     emit_byte(0xc0);
1671     emit_byte(0xe8+r);
1672     emit_byte(i);
1673     }
1674     }
1675     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1676    
1677     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1678     {
1679     if (optimize_shift_once && (i == 1)) {
1680     emit_byte(0xd1);
1681     emit_byte(0xf8+r);
1682     }
1683     else {
1684     emit_byte(0xc1);
1685     emit_byte(0xf8+r);
1686     emit_byte(i);
1687     }
1688     }
1689     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1690    
1691     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1692     {
1693     emit_byte(0x66);
1694     emit_byte(0xc1);
1695     emit_byte(0xf8+r);
1696     emit_byte(i);
1697     }
1698     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1699    
1700     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1701     {
1702     if (optimize_shift_once && (i == 1)) {
1703     emit_byte(0xd0);
1704     emit_byte(0xf8+r);
1705     }
1706     else {
1707     emit_byte(0xc0);
1708     emit_byte(0xf8+r);
1709     emit_byte(i);
1710     }
1711     }
1712     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1713    
1714     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1715     {
1716     emit_byte(0x9e);
1717     }
1718     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1719    
1720     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1721     {
1722     emit_byte(0x0f);
1723     emit_byte(0xa2);
1724     }
1725     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1726    
1727     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1728     {
1729     emit_byte(0x9f);
1730     }
1731     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1732    
1733     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1734     {
1735     emit_byte(0x0f);
1736     emit_byte(0x90+cc);
1737     emit_byte(0xc0+d);
1738     }
1739     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1740    
1741     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1742     {
1743     emit_byte(0x0f);
1744     emit_byte(0x90+cc);
1745     emit_byte(0x05);
1746     emit_long(d);
1747     }
1748     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1749    
1750     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1751     {
1752     if (have_cmov) {
1753     emit_byte(0x0f);
1754     emit_byte(0x40+cc);
1755     emit_byte(0xc0+8*d+s);
1756     }
1757     else { /* replacement using branch and mov */
1758     int uncc=(cc^1);
1759     emit_byte(0x70+uncc);
1760     emit_byte(2); /* skip next 2 bytes if not cc=true */
1761     emit_byte(0x89);
1762     emit_byte(0xc0+8*s+d);
1763     }
1764     }
1765     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1766    
1767     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1768     {
1769     emit_byte(0x0f);
1770     emit_byte(0xbc);
1771     emit_byte(0xc0+8*d+s);
1772     }
1773     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1774    
1775     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1776     {
1777     emit_byte(0x0f);
1778     emit_byte(0xbf);
1779     emit_byte(0xc0+8*d+s);
1780     }
1781     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1782    
1783     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1784     {
1785     emit_byte(0x0f);
1786     emit_byte(0xbe);
1787     emit_byte(0xc0+8*d+s);
1788     }
1789     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1790    
1791     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1792     {
1793     emit_byte(0x0f);
1794     emit_byte(0xb7);
1795     emit_byte(0xc0+8*d+s);
1796     }
1797     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1798    
1799     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1800     {
1801     emit_byte(0x0f);
1802     emit_byte(0xb6);
1803     emit_byte(0xc0+8*d+s);
1804     }
1805     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1806    
1807     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1808     {
1809     emit_byte(0x0f);
1810     emit_byte(0xaf);
1811     emit_byte(0xc0+8*d+s);
1812     }
1813     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1814    
1815     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1816     {
1817     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1818     abort();
1819     emit_byte(0xf7);
1820     emit_byte(0xea);
1821     }
1822     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1823    
1824     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1825     {
1826     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1827     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1828     abort();
1829     }
1830     emit_byte(0xf7);
1831     emit_byte(0xe2);
1832     }
1833     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1834    
1835     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1836     {
1837     abort(); /* %^$&%^$%#^ x86! */
1838     emit_byte(0x0f);
1839     emit_byte(0xaf);
1840     emit_byte(0xc0+8*d+s);
1841     }
1842     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1843    
1844     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1845     {
1846     emit_byte(0x88);
1847     emit_byte(0xc0+8*s+d);
1848     }
1849     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1850    
1851     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1852     {
1853     emit_byte(0x66);
1854     emit_byte(0x89);
1855     emit_byte(0xc0+8*s+d);
1856     }
1857     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1858    
1859     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1860     {
1861     int isebp=(baser==5)?0x40:0;
1862     int fi;
1863    
1864     switch(factor) {
1865     case 1: fi=0; break;
1866     case 2: fi=1; break;
1867     case 4: fi=2; break;
1868     case 8: fi=3; break;
1869     default: abort();
1870     }
1871    
1872    
1873     emit_byte(0x8b);
1874     emit_byte(0x04+8*d+isebp);
1875     emit_byte(baser+8*index+0x40*fi);
1876     if (isebp)
1877     emit_byte(0x00);
1878     }
1879     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1880    
1881     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1882     {
1883     int fi;
1884     int isebp;
1885    
1886     switch(factor) {
1887     case 1: fi=0; break;
1888     case 2: fi=1; break;
1889     case 4: fi=2; break;
1890     case 8: fi=3; break;
1891     default: abort();
1892     }
1893     isebp=(baser==5)?0x40:0;
1894    
1895     emit_byte(0x66);
1896     emit_byte(0x8b);
1897     emit_byte(0x04+8*d+isebp);
1898     emit_byte(baser+8*index+0x40*fi);
1899     if (isebp)
1900     emit_byte(0x00);
1901     }
1902     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1903    
1904     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1905     {
1906     int fi;
1907     int isebp;
1908    
1909     switch(factor) {
1910     case 1: fi=0; break;
1911     case 2: fi=1; break;
1912     case 4: fi=2; break;
1913     case 8: fi=3; break;
1914     default: abort();
1915     }
1916     isebp=(baser==5)?0x40:0;
1917    
1918     emit_byte(0x8a);
1919     emit_byte(0x04+8*d+isebp);
1920     emit_byte(baser+8*index+0x40*fi);
1921     if (isebp)
1922     emit_byte(0x00);
1923     }
1924     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1925    
1926     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1927     {
1928     int fi;
1929     int isebp;
1930    
1931     switch(factor) {
1932     case 1: fi=0; break;
1933     case 2: fi=1; break;
1934     case 4: fi=2; break;
1935     case 8: fi=3; break;
1936     default: abort();
1937     }
1938    
1939    
1940     isebp=(baser==5)?0x40:0;
1941    
1942     emit_byte(0x89);
1943     emit_byte(0x04+8*s+isebp);
1944     emit_byte(baser+8*index+0x40*fi);
1945     if (isebp)
1946     emit_byte(0x00);
1947     }
1948     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1949    
1950     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1951     {
1952     int fi;
1953     int isebp;
1954    
1955     switch(factor) {
1956     case 1: fi=0; break;
1957     case 2: fi=1; break;
1958     case 4: fi=2; break;
1959     case 8: fi=3; break;
1960     default: abort();
1961     }
1962     isebp=(baser==5)?0x40:0;
1963    
1964     emit_byte(0x66);
1965     emit_byte(0x89);
1966     emit_byte(0x04+8*s+isebp);
1967     emit_byte(baser+8*index+0x40*fi);
1968     if (isebp)
1969     emit_byte(0x00);
1970     }
1971     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1972    
1973     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1974     {
1975     int fi;
1976     int isebp;
1977    
1978     switch(factor) {
1979     case 1: fi=0; break;
1980     case 2: fi=1; break;
1981     case 4: fi=2; break;
1982     case 8: fi=3; break;
1983     default: abort();
1984     }
1985     isebp=(baser==5)?0x40:0;
1986    
1987     emit_byte(0x88);
1988     emit_byte(0x04+8*s+isebp);
1989     emit_byte(baser+8*index+0x40*fi);
1990     if (isebp)
1991     emit_byte(0x00);
1992     }
1993     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1994    
1995     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1996     {
1997     int fi;
1998    
1999     switch(factor) {
2000     case 1: fi=0; break;
2001     case 2: fi=1; break;
2002     case 4: fi=2; break;
2003     case 8: fi=3; break;
2004     default: abort();
2005     }
2006    
2007     emit_byte(0x89);
2008     emit_byte(0x84+8*s);
2009     emit_byte(baser+8*index+0x40*fi);
2010     emit_long(base);
2011     }
2012     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2013    
2014     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2015     {
2016     int fi;
2017    
2018     switch(factor) {
2019     case 1: fi=0; break;
2020     case 2: fi=1; break;
2021     case 4: fi=2; break;
2022     case 8: fi=3; break;
2023     default: abort();
2024     }
2025    
2026     emit_byte(0x66);
2027     emit_byte(0x89);
2028     emit_byte(0x84+8*s);
2029     emit_byte(baser+8*index+0x40*fi);
2030     emit_long(base);
2031     }
2032     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2033    
2034     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2035     {
2036     int fi;
2037    
2038     switch(factor) {
2039     case 1: fi=0; break;
2040     case 2: fi=1; break;
2041     case 4: fi=2; break;
2042     case 8: fi=3; break;
2043     default: abort();
2044     }
2045    
2046     emit_byte(0x88);
2047     emit_byte(0x84+8*s);
2048     emit_byte(baser+8*index+0x40*fi);
2049     emit_long(base);
2050     }
2051     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2052    
2053     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2054     {
2055     int fi;
2056    
2057     switch(factor) {
2058     case 1: fi=0; break;
2059     case 2: fi=1; break;
2060     case 4: fi=2; break;
2061     case 8: fi=3; break;
2062     default: abort();
2063     }
2064    
2065     emit_byte(0x8b);
2066     emit_byte(0x84+8*d);
2067     emit_byte(baser+8*index+0x40*fi);
2068     emit_long(base);
2069     }
2070     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2071    
2072     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2073     {
2074     int fi;
2075    
2076     switch(factor) {
2077     case 1: fi=0; break;
2078     case 2: fi=1; break;
2079     case 4: fi=2; break;
2080     case 8: fi=3; break;
2081     default: abort();
2082     }
2083    
2084     emit_byte(0x66);
2085     emit_byte(0x8b);
2086     emit_byte(0x84+8*d);
2087     emit_byte(baser+8*index+0x40*fi);
2088     emit_long(base);
2089     }
2090     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2091    
2092     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2093     {
2094     int fi;
2095    
2096     switch(factor) {
2097     case 1: fi=0; break;
2098     case 2: fi=1; break;
2099     case 4: fi=2; break;
2100     case 8: fi=3; break;
2101     default: abort();
2102     }
2103    
2104     emit_byte(0x8a);
2105     emit_byte(0x84+8*d);
2106     emit_byte(baser+8*index+0x40*fi);
2107     emit_long(base);
2108     }
2109     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2110    
2111     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2112     {
2113     int fi;
2114     switch(factor) {
2115     case 1: fi=0; break;
2116     case 2: fi=1; break;
2117     case 4: fi=2; break;
2118     case 8: fi=3; break;
2119     default:
2120     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2121     abort();
2122     }
2123     emit_byte(0x8b);
2124     emit_byte(0x04+8*d);
2125     emit_byte(0x05+8*index+64*fi);
2126     emit_long(base);
2127     }
2128     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2129    
2130     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2131     {
2132     int fi;
2133     switch(factor) {
2134     case 1: fi=0; break;
2135     case 2: fi=1; break;
2136     case 4: fi=2; break;
2137     case 8: fi=3; break;
2138     default:
2139     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2140     abort();
2141     }
2142     if (have_cmov) {
2143     emit_byte(0x0f);
2144     emit_byte(0x40+cond);
2145     emit_byte(0x04+8*d);
2146     emit_byte(0x05+8*index+64*fi);
2147     emit_long(base);
2148     }
2149     else { /* replacement using branch and mov */
2150     int uncc=(cond^1);
2151     emit_byte(0x70+uncc);
2152     emit_byte(7); /* skip next 7 bytes if not cc=true */
2153     emit_byte(0x8b);
2154     emit_byte(0x04+8*d);
2155     emit_byte(0x05+8*index+64*fi);
2156     emit_long(base);
2157     }
2158     }
2159     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2160    
2161     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2162     {
2163     if (have_cmov) {
2164     emit_byte(0x0f);
2165     emit_byte(0x40+cond);
2166     emit_byte(0x05+8*d);
2167     emit_long(mem);
2168     }
2169     else { /* replacement using branch and mov */
2170     int uncc=(cond^1);
2171     emit_byte(0x70+uncc);
2172     emit_byte(6); /* skip next 6 bytes if not cc=true */
2173     emit_byte(0x8b);
2174     emit_byte(0x05+8*d);
2175     emit_long(mem);
2176     }
2177     }
2178     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2179    
2180     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2181     {
2182 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2183 gbeauche 1.1 emit_byte(0x8b);
2184     emit_byte(0x40+8*d+s);
2185     emit_byte(offset);
2186     }
2187     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2188    
2189     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2190     {
2191 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2192 gbeauche 1.1 emit_byte(0x66);
2193     emit_byte(0x8b);
2194     emit_byte(0x40+8*d+s);
2195     emit_byte(offset);
2196     }
2197     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2198    
2199     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2200     {
2201 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2202 gbeauche 1.1 emit_byte(0x8a);
2203     emit_byte(0x40+8*d+s);
2204     emit_byte(offset);
2205     }
2206     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2207    
2208     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2209     {
2210     emit_byte(0x8b);
2211     emit_byte(0x80+8*d+s);
2212     emit_long(offset);
2213     }
2214     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2215    
2216     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2217     {
2218     emit_byte(0x66);
2219     emit_byte(0x8b);
2220     emit_byte(0x80+8*d+s);
2221     emit_long(offset);
2222     }
2223     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2224    
2225     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2226     {
2227     emit_byte(0x8a);
2228     emit_byte(0x80+8*d+s);
2229     emit_long(offset);
2230     }
2231     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2232    
2233     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2234     {
2235 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2236 gbeauche 1.1 emit_byte(0xc7);
2237     emit_byte(0x40+d);
2238     emit_byte(offset);
2239     emit_long(i);
2240     }
2241     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2242    
2243     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2244     {
2245 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2246 gbeauche 1.1 emit_byte(0x66);
2247     emit_byte(0xc7);
2248     emit_byte(0x40+d);
2249     emit_byte(offset);
2250     emit_word(i);
2251     }
2252     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2253    
2254     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2255     {
2256 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2257 gbeauche 1.1 emit_byte(0xc6);
2258     emit_byte(0x40+d);
2259     emit_byte(offset);
2260     emit_byte(i);
2261     }
2262     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2263    
2264     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2265     {
2266 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2267 gbeauche 1.1 emit_byte(0x89);
2268     emit_byte(0x40+8*s+d);
2269     emit_byte(offset);
2270     }
2271     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2272    
2273     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2274     {
2275 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2276 gbeauche 1.1 emit_byte(0x66);
2277     emit_byte(0x89);
2278     emit_byte(0x40+8*s+d);
2279     emit_byte(offset);
2280     }
2281     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2282    
2283     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2284     {
2285 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2286 gbeauche 1.1 emit_byte(0x88);
2287     emit_byte(0x40+8*s+d);
2288     emit_byte(offset);
2289     }
2290     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2291    
2292     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2293     {
2294     if (optimize_imm8 && isbyte(offset)) {
2295     emit_byte(0x8d);
2296     emit_byte(0x40+8*d+s);
2297     emit_byte(offset);
2298     }
2299     else {
2300     emit_byte(0x8d);
2301     emit_byte(0x80+8*d+s);
2302     emit_long(offset);
2303     }
2304     }
2305     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2306    
2307     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2308     {
2309     int fi;
2310    
2311     switch(factor) {
2312     case 1: fi=0; break;
2313     case 2: fi=1; break;
2314     case 4: fi=2; break;
2315     case 8: fi=3; break;
2316     default: abort();
2317     }
2318    
2319     if (optimize_imm8 && isbyte(offset)) {
2320     emit_byte(0x8d);
2321     emit_byte(0x44+8*d);
2322     emit_byte(0x40*fi+8*index+s);
2323     emit_byte(offset);
2324     }
2325     else {
2326     emit_byte(0x8d);
2327     emit_byte(0x84+8*d);
2328     emit_byte(0x40*fi+8*index+s);
2329     emit_long(offset);
2330     }
2331     }
2332     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2333    
2334     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2335     {
2336     int isebp=(s==5)?0x40:0;
2337     int fi;
2338    
2339     switch(factor) {
2340     case 1: fi=0; break;
2341     case 2: fi=1; break;
2342     case 4: fi=2; break;
2343     case 8: fi=3; break;
2344     default: abort();
2345     }
2346    
2347     emit_byte(0x8d);
2348     emit_byte(0x04+8*d+isebp);
2349     emit_byte(0x40*fi+8*index+s);
2350     if (isebp)
2351     emit_byte(0);
2352     }
2353     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2354    
2355     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2356     {
2357     if (optimize_imm8 && isbyte(offset)) {
2358     emit_byte(0x89);
2359     emit_byte(0x40+8*s+d);
2360     emit_byte(offset);
2361     }
2362     else {
2363     emit_byte(0x89);
2364     emit_byte(0x80+8*s+d);
2365     emit_long(offset);
2366     }
2367     }
2368     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2369    
2370     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2371     {
2372     emit_byte(0x66);
2373     emit_byte(0x89);
2374     emit_byte(0x80+8*s+d);
2375     emit_long(offset);
2376     }
2377     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2378    
2379     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2380     {
2381     if (optimize_imm8 && isbyte(offset)) {
2382     emit_byte(0x88);
2383     emit_byte(0x40+8*s+d);
2384     emit_byte(offset);
2385     }
2386     else {
2387     emit_byte(0x88);
2388     emit_byte(0x80+8*s+d);
2389     emit_long(offset);
2390     }
2391     }
2392     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2393    
2394     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2395     {
2396     emit_byte(0x0f);
2397     emit_byte(0xc8+r);
2398     }
2399     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2400    
2401     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2402     {
2403     emit_byte(0x66);
2404     emit_byte(0xc1);
2405     emit_byte(0xc0+r);
2406     emit_byte(0x08);
2407     }
2408     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2409    
2410     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2411     {
2412     emit_byte(0x89);
2413     emit_byte(0xc0+8*s+d);
2414     }
2415     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2416    
2417     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2418     {
2419     emit_byte(0x89);
2420     emit_byte(0x05+8*s);
2421     emit_long(d);
2422     }
2423     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2424    
2425     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2426     {
2427     emit_byte(0x66);
2428     emit_byte(0x89);
2429     emit_byte(0x05+8*s);
2430     emit_long(d);
2431     }
2432     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2433    
2434     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2435     {
2436     emit_byte(0x66);
2437     emit_byte(0x8b);
2438     emit_byte(0x05+8*d);
2439     emit_long(s);
2440     }
2441     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2442    
2443     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2444     {
2445     emit_byte(0x88);
2446 gbeauche 1.33 emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
2447 gbeauche 1.1 emit_long(d);
2448     }
2449     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2450    
2451     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2452     {
2453     emit_byte(0x8a);
2454     emit_byte(0x05+8*d);
2455     emit_long(s);
2456     }
2457     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2458    
2459     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2460     {
2461     emit_byte(0xb8+d);
2462     emit_long(s);
2463     }
2464     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2465    
2466     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2467     {
2468     emit_byte(0x66);
2469     emit_byte(0xb8+d);
2470     emit_word(s);
2471     }
2472     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2473    
2474     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2475     {
2476     emit_byte(0xb0+d);
2477     emit_byte(s);
2478     }
2479     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2480    
2481     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2482     {
2483     emit_byte(0x81);
2484     emit_byte(0x15);
2485     emit_long(d);
2486     emit_long(s);
2487     }
2488     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2489    
2490     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2491     {
2492     if (optimize_imm8 && isbyte(s)) {
2493     emit_byte(0x83);
2494     emit_byte(0x05);
2495     emit_long(d);
2496     emit_byte(s);
2497     }
2498     else {
2499     emit_byte(0x81);
2500     emit_byte(0x05);
2501     emit_long(d);
2502     emit_long(s);
2503     }
2504     }
2505     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2506    
2507     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2508     {
2509     emit_byte(0x66);
2510     emit_byte(0x81);
2511     emit_byte(0x05);
2512     emit_long(d);
2513     emit_word(s);
2514     }
2515     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2516    
2517     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2518     {
2519     emit_byte(0x80);
2520     emit_byte(0x05);
2521     emit_long(d);
2522     emit_byte(s);
2523     }
2524     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2525    
2526     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2527     {
2528 gbeauche 1.2 if (optimize_accum && isaccum(d))
2529     emit_byte(0xa9);
2530     else {
2531 gbeauche 1.1 emit_byte(0xf7);
2532     emit_byte(0xc0+d);
2533 gbeauche 1.2 }
2534 gbeauche 1.1 emit_long(i);
2535     }
2536     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2537    
2538     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2539     {
2540     emit_byte(0x85);
2541     emit_byte(0xc0+8*s+d);
2542     }
2543     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2544    
2545     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2546     {
2547     emit_byte(0x66);
2548     emit_byte(0x85);
2549     emit_byte(0xc0+8*s+d);
2550     }
2551     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2552    
2553     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2554     {
2555     emit_byte(0x84);
2556     emit_byte(0xc0+8*s+d);
2557     }
2558     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2559    
2560 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2561     {
2562     emit_byte(0x81);
2563     emit_byte(0xf0+d);
2564     emit_long(i);
2565     }
2566     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2567    
2568 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2569     {
2570     if (optimize_imm8 && isbyte(i)) {
2571 gbeauche 1.2 emit_byte(0x83);
2572     emit_byte(0xe0+d);
2573     emit_byte(i);
2574 gbeauche 1.1 }
2575     else {
2576 gbeauche 1.2 if (optimize_accum && isaccum(d))
2577     emit_byte(0x25);
2578     else {
2579     emit_byte(0x81);
2580     emit_byte(0xe0+d);
2581     }
2582     emit_long(i);
2583 gbeauche 1.1 }
2584     }
2585     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2586    
2587     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2588     {
2589 gbeauche 1.2 emit_byte(0x66);
2590     if (optimize_imm8 && isbyte(i)) {
2591     emit_byte(0x83);
2592     emit_byte(0xe0+d);
2593     emit_byte(i);
2594     }
2595     else {
2596     if (optimize_accum && isaccum(d))
2597     emit_byte(0x25);
2598     else {
2599     emit_byte(0x81);
2600     emit_byte(0xe0+d);
2601     }
2602     emit_word(i);
2603     }
2604 gbeauche 1.1 }
2605     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2606    
2607     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2608     {
2609     emit_byte(0x21);
2610     emit_byte(0xc0+8*s+d);
2611     }
2612     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2613    
2614     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2615     {
2616     emit_byte(0x66);
2617     emit_byte(0x21);
2618     emit_byte(0xc0+8*s+d);
2619     }
2620     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2621    
2622     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2623     {
2624     emit_byte(0x20);
2625     emit_byte(0xc0+8*s+d);
2626     }
2627     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2628    
2629     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2630     {
2631     if (optimize_imm8 && isbyte(i)) {
2632     emit_byte(0x83);
2633     emit_byte(0xc8+d);
2634     emit_byte(i);
2635     }
2636     else {
2637 gbeauche 1.2 if (optimize_accum && isaccum(d))
2638     emit_byte(0x0d);
2639     else {
2640 gbeauche 1.1 emit_byte(0x81);
2641     emit_byte(0xc8+d);
2642 gbeauche 1.2 }
2643 gbeauche 1.1 emit_long(i);
2644     }
2645     }
2646     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2647    
2648     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2649     {
2650     emit_byte(0x09);
2651     emit_byte(0xc0+8*s+d);
2652     }
2653     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2654    
2655     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2656     {
2657     emit_byte(0x66);
2658     emit_byte(0x09);
2659     emit_byte(0xc0+8*s+d);
2660     }
2661     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2662    
2663     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2664     {
2665     emit_byte(0x08);
2666     emit_byte(0xc0+8*s+d);
2667     }
2668     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2669    
2670     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2671     {
2672     emit_byte(0x11);
2673     emit_byte(0xc0+8*s+d);
2674     }
2675     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2676    
2677     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2678     {
2679     emit_byte(0x66);
2680     emit_byte(0x11);
2681     emit_byte(0xc0+8*s+d);
2682     }
2683     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2684    
2685     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2686     {
2687     emit_byte(0x10);
2688     emit_byte(0xc0+8*s+d);
2689     }
2690     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2691    
2692     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2693     {
2694     emit_byte(0x01);
2695     emit_byte(0xc0+8*s+d);
2696     }
2697     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2698    
2699     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2700     {
2701     emit_byte(0x66);
2702     emit_byte(0x01);
2703     emit_byte(0xc0+8*s+d);
2704     }
2705     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2706    
2707     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2708     {
2709     emit_byte(0x00);
2710     emit_byte(0xc0+8*s+d);
2711     }
2712     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2713    
2714     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2715     {
2716     if (isbyte(i)) {
2717     emit_byte(0x83);
2718     emit_byte(0xe8+d);
2719     emit_byte(i);
2720     }
2721     else {
2722 gbeauche 1.2 if (optimize_accum && isaccum(d))
2723     emit_byte(0x2d);
2724     else {
2725 gbeauche 1.1 emit_byte(0x81);
2726     emit_byte(0xe8+d);
2727 gbeauche 1.2 }
2728 gbeauche 1.1 emit_long(i);
2729     }
2730     }
2731     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2732    
2733     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2734     {
2735 gbeauche 1.2 if (optimize_accum && isaccum(d))
2736     emit_byte(0x2c);
2737     else {
2738 gbeauche 1.1 emit_byte(0x80);
2739     emit_byte(0xe8+d);
2740 gbeauche 1.2 }
2741 gbeauche 1.1 emit_byte(i);
2742     }
2743     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2744    
2745     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2746     {
2747     if (isbyte(i)) {
2748     emit_byte(0x83);
2749     emit_byte(0xc0+d);
2750     emit_byte(i);
2751     }
2752     else {
2753 gbeauche 1.2 if (optimize_accum && isaccum(d))
2754     emit_byte(0x05);
2755     else {
2756 gbeauche 1.1 emit_byte(0x81);
2757     emit_byte(0xc0+d);
2758 gbeauche 1.2 }
2759 gbeauche 1.1 emit_long(i);
2760     }
2761     }
2762     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2763    
2764     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2765     {
2766 gbeauche 1.2 emit_byte(0x66);
2767 gbeauche 1.1 if (isbyte(i)) {
2768     emit_byte(0x83);
2769     emit_byte(0xc0+d);
2770     emit_byte(i);
2771     }
2772     else {
2773 gbeauche 1.2 if (optimize_accum && isaccum(d))
2774     emit_byte(0x05);
2775     else {
2776 gbeauche 1.1 emit_byte(0x81);
2777     emit_byte(0xc0+d);
2778 gbeauche 1.2 }
2779 gbeauche 1.1 emit_word(i);
2780     }
2781     }
2782     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2783    
2784     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2785     {
2786 gbeauche 1.2 if (optimize_accum && isaccum(d))
2787     emit_byte(0x04);
2788     else {
2789     emit_byte(0x80);
2790     emit_byte(0xc0+d);
2791     }
2792 gbeauche 1.1 emit_byte(i);
2793     }
2794     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2795    
2796     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2797     {
2798     emit_byte(0x19);
2799     emit_byte(0xc0+8*s+d);
2800     }
2801     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2802    
2803     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2804     {
2805     emit_byte(0x66);
2806     emit_byte(0x19);
2807     emit_byte(0xc0+8*s+d);
2808     }
2809     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2810    
2811     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2812     {
2813     emit_byte(0x18);
2814     emit_byte(0xc0+8*s+d);
2815     }
2816     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2817    
2818     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2819     {
2820     emit_byte(0x29);
2821     emit_byte(0xc0+8*s+d);
2822     }
2823     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2824    
2825     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2826     {
2827     emit_byte(0x66);
2828     emit_byte(0x29);
2829     emit_byte(0xc0+8*s+d);
2830     }
2831     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2832    
2833     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2834     {
2835     emit_byte(0x28);
2836     emit_byte(0xc0+8*s+d);
2837     }
2838     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2839    
2840     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2841     {
2842     emit_byte(0x39);
2843     emit_byte(0xc0+8*s+d);
2844     }
2845     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2846    
2847     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2848     {
2849     if (optimize_imm8 && isbyte(i)) {
2850     emit_byte(0x83);
2851     emit_byte(0xf8+r);
2852     emit_byte(i);
2853     }
2854     else {
2855 gbeauche 1.2 if (optimize_accum && isaccum(r))
2856     emit_byte(0x3d);
2857     else {
2858 gbeauche 1.1 emit_byte(0x81);
2859     emit_byte(0xf8+r);
2860 gbeauche 1.2 }
2861 gbeauche 1.1 emit_long(i);
2862     }
2863     }
2864     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2865    
2866     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2867     {
2868     emit_byte(0x66);
2869     emit_byte(0x39);
2870     emit_byte(0xc0+8*s+d);
2871     }
2872     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2873    
2874 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2875     {
2876     emit_byte(0x80);
2877     emit_byte(0x3d);
2878     emit_long(d);
2879     emit_byte(s);
2880     }
2881     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2882    
2883 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2884     {
2885 gbeauche 1.2 if (optimize_accum && isaccum(d))
2886     emit_byte(0x3c);
2887     else {
2888 gbeauche 1.1 emit_byte(0x80);
2889     emit_byte(0xf8+d);
2890 gbeauche 1.2 }
2891 gbeauche 1.1 emit_byte(i);
2892     }
2893     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2894    
2895     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2896     {
2897     emit_byte(0x38);
2898     emit_byte(0xc0+8*s+d);
2899     }
2900     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2901    
2902     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2903     {
2904     int fi;
2905    
2906     switch(factor) {
2907     case 1: fi=0; break;
2908     case 2: fi=1; break;
2909     case 4: fi=2; break;
2910     case 8: fi=3; break;
2911     default: abort();
2912     }
2913     emit_byte(0x39);
2914     emit_byte(0x04+8*d);
2915     emit_byte(5+8*index+0x40*fi);
2916     emit_long(offset);
2917     }
2918     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2919    
2920     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2921     {
2922     emit_byte(0x31);
2923     emit_byte(0xc0+8*s+d);
2924     }
2925     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2926    
2927     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2928     {
2929     emit_byte(0x66);
2930     emit_byte(0x31);
2931     emit_byte(0xc0+8*s+d);
2932     }
2933     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2934    
2935     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2936     {
2937     emit_byte(0x30);
2938     emit_byte(0xc0+8*s+d);
2939     }
2940     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2941    
2942     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2943     {
2944     if (optimize_imm8 && isbyte(s)) {
2945     emit_byte(0x83);
2946     emit_byte(0x2d);
2947     emit_long(d);
2948     emit_byte(s);
2949     }
2950     else {
2951     emit_byte(0x81);
2952     emit_byte(0x2d);
2953     emit_long(d);
2954     emit_long(s);
2955     }
2956     }
2957     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2958    
2959     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2960     {
2961     if (optimize_imm8 && isbyte(s)) {
2962     emit_byte(0x83);
2963     emit_byte(0x3d);
2964     emit_long(d);
2965     emit_byte(s);
2966     }
2967     else {
2968     emit_byte(0x81);
2969     emit_byte(0x3d);
2970     emit_long(d);
2971     emit_long(s);
2972     }
2973     }
2974     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2975    
2976     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2977     {
2978     emit_byte(0x87);
2979     emit_byte(0xc0+8*r1+r2);
2980     }
2981     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2982    
2983 gbeauche 1.36 LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
2984     {
2985     emit_byte(0x86);
2986     emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */
2987     }
2988     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2989    
2990 gbeauche 1.1 /*************************************************************************
2991     * FIXME: mem access modes probably wrong *
2992     *************************************************************************/
2993    
2994     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2995     {
2996     emit_byte(0x9c);
2997     }
2998     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2999    
3000     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
3001     {
3002     emit_byte(0x9d);
3003     }
3004     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
3005 gbeauche 1.13
3006 gbeauche 1.34 /* Generate floating-point instructions */
3007     static inline void x86_fadd_m(MEMR s)
3008     {
3009     emit_byte(0xdc);
3010     emit_byte(0x05);
3011     emit_long(s);
3012     }
3013    
3014 gbeauche 1.13 #endif
3015 gbeauche 1.1
3016     /*************************************************************************
3017     * Unoptimizable stuff --- jump *
3018     *************************************************************************/
3019    
3020     static __inline__ void raw_call_r(R4 r)
3021     {
3022 gbeauche 1.20 #if USE_NEW_RTASM
3023     CALLsr(r);
3024     #else
3025 gbeauche 1.1 emit_byte(0xff);
3026     emit_byte(0xd0+r);
3027 gbeauche 1.20 #endif
3028 gbeauche 1.5 }
3029    
3030     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3031     {
3032 gbeauche 1.20 #if USE_NEW_RTASM
3033     CALLsm(base, X86_NOREG, r, m);
3034     #else
3035 gbeauche 1.5 int mu;
3036     switch(m) {
3037     case 1: mu=0; break;
3038     case 2: mu=1; break;
3039     case 4: mu=2; break;
3040     case 8: mu=3; break;
3041     default: abort();
3042     }
3043     emit_byte(0xff);
3044     emit_byte(0x14);
3045     emit_byte(0x05+8*r+0x40*mu);
3046     emit_long(base);
3047 gbeauche 1.20 #endif
3048 gbeauche 1.1 }
3049    
3050     static __inline__ void raw_jmp_r(R4 r)
3051     {
3052 gbeauche 1.20 #if USE_NEW_RTASM
3053     JMPsr(r);
3054     #else
3055 gbeauche 1.1 emit_byte(0xff);
3056     emit_byte(0xe0+r);
3057 gbeauche 1.20 #endif
3058 gbeauche 1.1 }
3059    
3060     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3061     {
3062 gbeauche 1.20 #if USE_NEW_RTASM
3063     JMPsm(base, X86_NOREG, r, m);
3064     #else
3065 gbeauche 1.1 int mu;
3066     switch(m) {
3067     case 1: mu=0; break;
3068     case 2: mu=1; break;
3069     case 4: mu=2; break;
3070     case 8: mu=3; break;
3071     default: abort();
3072     }
3073     emit_byte(0xff);
3074     emit_byte(0x24);
3075     emit_byte(0x05+8*r+0x40*mu);
3076     emit_long(base);
3077 gbeauche 1.20 #endif
3078 gbeauche 1.1 }
3079    
3080     static __inline__ void raw_jmp_m(uae_u32 base)
3081     {
3082     emit_byte(0xff);
3083     emit_byte(0x25);
3084     emit_long(base);
3085     }
3086    
3087    
3088     static __inline__ void raw_call(uae_u32 t)
3089     {
3090 gbeauche 1.20 #if USE_NEW_RTASM
3091     CALLm(t);
3092     #else
3093 gbeauche 1.1 emit_byte(0xe8);
3094     emit_long(t-(uae_u32)target-4);
3095 gbeauche 1.20 #endif
3096 gbeauche 1.1 }
3097    
3098     static __inline__ void raw_jmp(uae_u32 t)
3099     {
3100 gbeauche 1.20 #if USE_NEW_RTASM
3101     JMPm(t);
3102     #else
3103 gbeauche 1.1 emit_byte(0xe9);
3104     emit_long(t-(uae_u32)target-4);
3105 gbeauche 1.20 #endif
3106 gbeauche 1.1 }
3107    
3108     static __inline__ void raw_jl(uae_u32 t)
3109     {
3110     emit_byte(0x0f);
3111     emit_byte(0x8c);
3112 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3113 gbeauche 1.1 }
3114    
3115     static __inline__ void raw_jz(uae_u32 t)
3116     {
3117     emit_byte(0x0f);
3118     emit_byte(0x84);
3119 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3120 gbeauche 1.1 }
3121    
3122     static __inline__ void raw_jnz(uae_u32 t)
3123     {
3124     emit_byte(0x0f);
3125     emit_byte(0x85);
3126 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3127 gbeauche 1.1 }
3128    
3129     static __inline__ void raw_jnz_l_oponly(void)
3130     {
3131     emit_byte(0x0f);
3132     emit_byte(0x85);
3133     }
3134    
3135     static __inline__ void raw_jcc_l_oponly(int cc)
3136     {
3137     emit_byte(0x0f);
3138     emit_byte(0x80+cc);
3139     }
3140    
3141     static __inline__ void raw_jnz_b_oponly(void)
3142     {
3143     emit_byte(0x75);
3144     }
3145    
3146     static __inline__ void raw_jz_b_oponly(void)
3147     {
3148     emit_byte(0x74);
3149     }
3150    
3151     static __inline__ void raw_jcc_b_oponly(int cc)
3152     {
3153     emit_byte(0x70+cc);
3154     }
3155    
3156     static __inline__ void raw_jmp_l_oponly(void)
3157     {
3158     emit_byte(0xe9);
3159     }
3160    
3161     static __inline__ void raw_jmp_b_oponly(void)
3162     {
3163     emit_byte(0xeb);
3164     }
3165    
3166     static __inline__ void raw_ret(void)
3167     {
3168     emit_byte(0xc3);
3169     }
3170    
3171     static __inline__ void raw_nop(void)
3172     {
3173     emit_byte(0x90);
3174     }
3175    
3176 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3177     {
3178     /* Source: GNU Binutils 2.12.90.0.15 */
3179     /* Various efficient no-op patterns for aligning code labels.
3180     Note: Don't try to assemble the instructions in the comments.
3181     0L and 0w are not legal. */
3182     static const uae_u8 f32_1[] =
3183     {0x90}; /* nop */
3184     static const uae_u8 f32_2[] =
3185     {0x89,0xf6}; /* movl %esi,%esi */
3186     static const uae_u8 f32_3[] =
3187     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3188     static const uae_u8 f32_4[] =
3189     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3190     static const uae_u8 f32_5[] =
3191     {0x90, /* nop */
3192     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3193     static const uae_u8 f32_6[] =
3194     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3195     static const uae_u8 f32_7[] =
3196     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3197     static const uae_u8 f32_8[] =
3198     {0x90, /* nop */
3199     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3200     static const uae_u8 f32_9[] =
3201     {0x89,0xf6, /* movl %esi,%esi */
3202     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3203     static const uae_u8 f32_10[] =
3204     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3205     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3206     static const uae_u8 f32_11[] =
3207     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3208     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3209     static const uae_u8 f32_12[] =
3210     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3211     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3212     static const uae_u8 f32_13[] =
3213     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3214     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3215     static const uae_u8 f32_14[] =
3216     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3217     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3218     static const uae_u8 f32_15[] =
3219     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3220     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3221     static const uae_u8 f32_16[] =
3222     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3223     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3224     static const uae_u8 *const f32_patt[] = {
3225     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3226     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3227     };
3228 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3229 gbeauche 1.8
3230 gbeauche 1.21 #if defined(__x86_64__)
3231     /* The recommended way to pad 64bit code is to use NOPs preceded by
3232     maximally four 0x66 prefixes. Balance the size of nops. */
3233     if (nbytes == 0)
3234     return;
3235    
3236     int i;
3237     int nnops = (nbytes + 3) / 4;
3238     int len = nbytes / nnops;
3239     int remains = nbytes - nnops * len;
3240    
3241     for (i = 0; i < remains; i++) {
3242     emit_block(prefixes, len);
3243     raw_nop();
3244     }
3245     for (; i < nnops; i++) {
3246     emit_block(prefixes, len - 1);
3247     raw_nop();
3248     }
3249     #else
3250 gbeauche 1.8 int nloops = nbytes / 16;
3251     while (nloops-- > 0)
3252     emit_block(f32_16, sizeof(f32_16));
3253    
3254     nbytes %= 16;
3255     if (nbytes)
3256     emit_block(f32_patt[nbytes - 1], nbytes);
3257 gbeauche 1.21 #endif
3258 gbeauche 1.8 }
3259    
3260 gbeauche 1.1
3261     /*************************************************************************
3262     * Flag handling, to and fro UAE flag register *
3263     *************************************************************************/
3264    
3265 gbeauche 1.36 static __inline__ void raw_flags_evicted(int r)
3266 gbeauche 1.1 {
3267     //live.state[FLAGTMP].status=CLEAN;
3268     live.state[FLAGTMP].status=INMEM;
3269     live.state[FLAGTMP].realreg=-1;
3270     /* We just "evicted" FLAGTMP. */
3271     if (live.nat[r].nholds!=1) {
3272     /* Huh? */
3273     abort();
3274     }
3275     live.nat[r].nholds=0;
3276 gbeauche 1.36 }
3277    
3278     #define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */
3279     static __inline__ void raw_flags_to_reg_FLAGREG(int r)
3280     {
3281     raw_lahf(0); /* Most flags in AH */
3282     //raw_setcc(r,0); /* V flag in AL */
3283     raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3284    
3285     #if 1 /* Let's avoid those nasty partial register stalls */
3286     //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3287     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
3288     raw_flags_evicted(r);
3289 gbeauche 1.1 #endif
3290     }
3291    
3292 gbeauche 1.36 #define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */
3293     static __inline__ void raw_reg_to_flags_FLAGREG(int r)
3294 gbeauche 1.1 {
3295     raw_cmp_b_ri(r,-127); /* set V */
3296     raw_sahf(0);
3297     }
3298    
3299 gbeauche 1.36 #define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */
3300     static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp)
3301 gbeauche 1.24 {
3302     raw_mov_l_rr(tmp,s);
3303     raw_lahf(s); /* flags into ah */
3304     raw_and_l_ri(s,0xffffbfff);
3305     raw_and_l_ri(tmp,0x00004000);
3306     raw_xor_l_ri(tmp,0x00004000);
3307     raw_or_l(s,tmp);
3308     raw_sahf(s);
3309     }
3310    
3311 gbeauche 1.36 static __inline__ void raw_flags_init_FLAGREG(void) { }
3312 gbeauche 1.1
3313 gbeauche 1.36 #define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */
3314     static __inline__ void raw_flags_to_reg_FLAGSTK(int r)
3315 gbeauche 1.1 {
3316     raw_pushfl();
3317     raw_pop_l_r(r);
3318 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3319 gbeauche 1.36 raw_flags_evicted(r);
3320 gbeauche 1.1 }
3321    
3322 gbeauche 1.36 #define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */
3323     static __inline__ void raw_reg_to_flags_FLAGSTK(int r)
3324 gbeauche 1.1 {
3325     raw_push_l_r(r);
3326     raw_popfl();
3327     }
3328    
3329 gbeauche 1.36 #define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */
3330     static __inline__ void raw_flags_set_zero_FLAGSTK(int s, int tmp)
3331 gbeauche 1.24 {
3332     raw_mov_l_rr(tmp,s);
3333     raw_pushfl();
3334     raw_pop_l_r(s);
3335     raw_and_l_ri(s,0xffffffbf);
3336     raw_and_l_ri(tmp,0x00000040);
3337     raw_xor_l_ri(tmp,0x00000040);
3338     raw_or_l(s,tmp);
3339     raw_push_l_r(s);
3340     raw_popfl();
3341     }
3342 gbeauche 1.36
3343     static __inline__ void raw_flags_init_FLAGSTK(void) { }
3344    
3345     #if defined(__x86_64__)
3346     /* Try to use the LAHF/SETO method on x86_64 since it is faster.
3347     This can't be the default because some older CPUs don't support
3348     LAHF/SAHF in long mode. */
3349     static int FLAG_NREG1_FLAGGEN = 0;
3350     static __inline__ void raw_flags_to_reg_FLAGGEN(int r)
3351     {
3352     if (have_lahf_lm) {
3353     // NOTE: the interpreter uses the normal EFLAGS layout
3354     // pushf/popf CF(0) ZF( 6) SF( 7) OF(11)
3355     // sahf/lahf CF(8) ZF(14) SF(15) OF( 0)
3356     assert(r == 0);
3357     raw_setcc(r,0); /* V flag in AL */
3358     raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */
3359     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0);
3360     raw_lahf(0); /* most flags in AH */
3361     raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX);
3362     raw_flags_evicted(r);
3363     }
3364     else
3365     raw_flags_to_reg_FLAGSTK(r);
3366     }
3367    
3368     static int FLAG_NREG2_FLAGGEN = 0;
3369     static __inline__ void raw_reg_to_flags_FLAGGEN(int r)
3370     {
3371     if (have_lahf_lm) {
3372     raw_xchg_b_rr(0,AH_INDEX);
3373     raw_cmp_b_ri(r,-120); /* set V */
3374     raw_sahf(0);
3375     }
3376     else
3377     raw_reg_to_flags_FLAGSTK(r);
3378     }
3379    
3380     static int FLAG_NREG3_FLAGGEN = 0;
3381     static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp)
3382     {
3383     if (have_lahf_lm)
3384     raw_flags_set_zero_FLAGREG(s, tmp);
3385     else
3386     raw_flags_set_zero_FLAGSTK(s, tmp);
3387     }
3388    
3389     static __inline__ void raw_flags_init_FLAGGEN(void)
3390     {
3391     if (have_lahf_lm) {
3392     FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG;
3393     FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG;
3394     FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG;
3395     }
3396     else {
3397     FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK;
3398     FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK;
3399     FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK;
3400     }
3401     }
3402 gbeauche 1.1 #endif
3403    
3404 gbeauche 1.36 #ifdef SAHF_SETO_PROFITABLE
3405     #define FLAG_SUFFIX FLAGREG
3406     #elif defined __x86_64__
3407     #define FLAG_SUFFIX FLAGGEN
3408     #else
3409     #define FLAG_SUFFIX FLAGSTK
3410     #endif
3411    
3412     #define FLAG_GLUE_2(x, y) x ## _ ## y
3413     #define FLAG_GLUE_1(x, y) FLAG_GLUE_2(x, y)
3414     #define FLAG_GLUE(x) FLAG_GLUE_1(x, FLAG_SUFFIX)
3415    
3416     #define raw_flags_init FLAG_GLUE(raw_flags_init)
3417     #define FLAG_NREG1 FLAG_GLUE(FLAG_NREG1)
3418     #define raw_flags_to_reg FLAG_GLUE(raw_flags_to_reg)
3419     #define FLAG_NREG2 FLAG_GLUE(FLAG_NREG2)
3420     #define raw_reg_to_flags FLAG_GLUE(raw_reg_to_flags)
3421     #define FLAG_NREG3 FLAG_GLUE(FLAG_NREG3)
3422     #define raw_flags_set_zero FLAG_GLUE(raw_flags_set_zero)
3423    
3424 gbeauche 1.1 /* Apparently, there are enough instructions between flag store and
3425     flag reload to avoid the partial memory stall */
3426     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3427     {
3428     #if 1
3429 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3430 gbeauche 1.1 #else
3431 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3432     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3433 gbeauche 1.1 #endif
3434     }
3435    
3436     /* FLAGX is byte sized, and we *do* write it at that size */
3437     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3438     {
3439     if (live.nat[target].canbyte)
3440 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3441 gbeauche 1.1 else if (live.nat[target].canword)
3442 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3443 gbeauche 1.1 else
3444 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3445 gbeauche 1.1 }
3446    
3447 gbeauche 1.31 static __inline__ void raw_dec_sp(int off)
3448     {
3449     if (off) raw_sub_l_ri(ESP_INDEX,off);
3450     }
3451    
3452 gbeauche 1.1 static __inline__ void raw_inc_sp(int off)
3453     {
3454 gbeauche 1.31 if (off) raw_add_l_ri(ESP_INDEX,off);
3455 gbeauche 1.1 }
3456    
3457     /*************************************************************************
3458     * Handling mistaken direct memory access *
3459     *************************************************************************/
3460    
3461     // gb-- I don't need that part for JIT Basilisk II
3462     #if defined(NATMEM_OFFSET) && 0
3463     #include <asm/sigcontext.h>
3464     #include <signal.h>
3465    
3466     #define SIG_READ 1
3467     #define SIG_WRITE 2
3468    
3469     static int in_handler=0;
3470     static uae_u8 veccode[256];
3471    
3472     static void vec(int x, struct sigcontext sc)
3473     {
3474     uae_u8* i=(uae_u8*)sc.eip;
3475     uae_u32 addr=sc.cr2;
3476     int r=-1;
3477     int size=4;
3478     int dir=-1;
3479     int len=0;
3480     int j;
3481    
3482     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3483     if (!canbang)
3484     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3485     if (in_handler)
3486     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3487    
3488     if (canbang && i>=compiled_code && i<=current_compile_p) {
3489     if (*i==0x66) {
3490     i++;
3491     size=2;
3492     len++;
3493     }
3494    
3495     switch(i[0]) {
3496     case 0x8a:
3497     if ((i[1]&0xc0)==0x80) {
3498     r=(i[1]>>3)&7;
3499     dir=SIG_READ;
3500     size=1;
3501     len+=6;
3502     break;
3503     }
3504     break;
3505     case 0x88:
3506     if ((i[1]&0xc0)==0x80) {
3507     r=(i[1]>>3)&7;
3508     dir=SIG_WRITE;
3509     size=1;
3510     len+=6;
3511     break;
3512     }
3513     break;
3514     case 0x8b:
3515     if ((i[1]&0xc0)==0x80) {
3516     r=(i[1]>>3)&7;
3517     dir=SIG_READ;
3518     len+=6;
3519     break;
3520     }
3521     if ((i[1]&0xc0)==0x40) {
3522     r=(i[1]>>3)&7;
3523     dir=SIG_READ;
3524     len+=3;
3525     break;
3526     }
3527     break;
3528     case 0x89:
3529     if ((i[1]&0xc0)==0x80) {
3530     r=(i[1]>>3)&7;
3531     dir=SIG_WRITE;
3532     len+=6;
3533     break;
3534     }
3535     if ((i[1]&0xc0)==0x40) {
3536     r=(i[1]>>3)&7;
3537     dir=SIG_WRITE;
3538     len+=3;
3539     break;
3540     }
3541     break;
3542     }
3543     }
3544    
3545     if (r!=-1) {
3546     void* pr=NULL;
3547     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3548    
3549     switch(r) {
3550     case 0: pr=&(sc.eax); break;
3551     case 1: pr=&(sc.ecx); break;
3552     case 2: pr=&(sc.edx); break;
3553     case 3: pr=&(sc.ebx); break;
3554     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3555     case 5: pr=(size>1)?
3556     (void*)(&(sc.ebp)):
3557     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3558     case 6: pr=(size>1)?
3559     (void*)(&(sc.esi)):
3560     (void*)(((uae_u8*)&(sc.edx))+1); break;
3561     case 7: pr=(size>1)?
3562     (void*)(&(sc.edi)):
3563     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3564     default: abort();
3565     }
3566     if (pr) {
3567     blockinfo* bi;
3568    
3569     if (currprefs.comp_oldsegv) {
3570     addr-=NATMEM_OFFSET;
3571    
3572     if ((addr>=0x10000000 && addr<0x40000000) ||
3573     (addr>=0x50000000)) {
3574     write_log("Suspicious address in %x SEGV handler.\n",addr);
3575     }
3576     if (dir==SIG_READ) {
3577     switch(size) {
3578     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3579     case 2: *((uae_u16*)pr)=get_word(addr); break;
3580     case 4: *((uae_u32*)pr)=get_long(addr); break;
3581     default: abort();
3582     }
3583     }
3584     else { /* write */
3585     switch(size) {
3586     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3587     case 2: put_word(addr,*((uae_u16*)pr)); break;
3588     case 4: put_long(addr,*((uae_u32*)pr)); break;
3589     default: abort();
3590     }
3591     }
3592     write_log("Handled one access!\n");
3593     fflush(stdout);
3594     segvcount++;
3595     sc.eip+=len;
3596     }
3597     else {
3598     void* tmp=target;
3599     int i;
3600     uae_u8 vecbuf[5];
3601    
3602     addr-=NATMEM_OFFSET;
3603    
3604     if ((addr>=0x10000000 && addr<0x40000000) ||
3605     (addr>=0x50000000)) {
3606     write_log("Suspicious address in %x SEGV handler.\n",addr);
3607     }
3608    
3609     target=(uae_u8*)sc.eip;
3610     for (i=0;i<5;i++)
3611     vecbuf[i]=target[i];
3612     emit_byte(0xe9);
3613 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3614 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3615    
3616     write_log("Handled one access!\n");
3617     fflush(stdout);
3618     segvcount++;
3619    
3620     target=veccode;
3621    
3622     if (dir==SIG_READ) {
3623     switch(size) {
3624     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3625     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3626     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3627     default: abort();
3628     }
3629     }
3630     else { /* write */
3631     switch(size) {
3632     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3633     case 2: put_word(addr,*((uae_u16*)pr)); break;
3634     case 4: put_long(addr,*((uae_u32*)pr)); break;
3635     default: abort();
3636     }
3637     }
3638     for (i=0;i<5;i++)
3639     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3640 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3641 gbeauche 1.1 emit_byte(0xe9);
3642 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3643 gbeauche 1.1 in_handler=1;
3644     target=tmp;
3645     }
3646     bi=active;
3647     while (bi) {
3648     if (bi->handler &&
3649     (uae_u8*)bi->direct_handler<=i &&
3650     (uae_u8*)bi->nexthandler>i) {
3651     write_log("deleted trigger (%p<%p<%p) %p\n",
3652     bi->handler,
3653     i,
3654     bi->nexthandler,
3655     bi->pc_p);
3656     invalidate_block(bi);
3657     raise_in_cl_list(bi);
3658     set_special(0);
3659     return;
3660     }
3661     bi=bi->next;
3662     }
3663     /* Not found in the active list. Might be a rom routine that
3664     is in the dormant list */
3665     bi=dormant;
3666     while (bi) {
3667     if (bi->handler &&
3668     (uae_u8*)bi->direct_handler<=i &&
3669     (uae_u8*)bi->nexthandler>i) {
3670     write_log("deleted trigger (%p<%p<%p) %p\n",
3671     bi->handler,
3672     i,
3673     bi->nexthandler,
3674     bi->pc_p);
3675     invalidate_block(bi);
3676     raise_in_cl_list(bi);
3677     set_special(0);
3678     return;
3679     }
3680     bi=bi->next;
3681     }
3682     write_log("Huh? Could not find trigger!\n");
3683     return;
3684     }
3685     }
3686     write_log("Can't handle access!\n");
3687     for (j=0;j<10;j++) {
3688     write_log("instruction byte %2d is %02x\n",j,i[j]);
3689     }
3690     write_log("Please send the above info (starting at \"fault address\") to\n"
3691     "bmeyer@csse.monash.edu.au\n"
3692     "This shouldn't happen ;-)\n");
3693     fflush(stdout);
3694     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3695     }
3696     #endif
3697    
3698    
3699     /*************************************************************************
3700     * Checking for CPU features *
3701     *************************************************************************/
3702    
3703 gbeauche 1.3 struct cpuinfo_x86 {
3704     uae_u8 x86; // CPU family
3705     uae_u8 x86_vendor; // CPU vendor
3706     uae_u8 x86_processor; // CPU canonical processor type
3707     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3708     uae_u32 x86_hwcap;
3709     uae_u8 x86_model;
3710     uae_u8 x86_mask;
3711     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3712     char x86_vendor_id[16];
3713     };
3714     struct cpuinfo_x86 cpuinfo;
3715    
3716     enum {
3717     X86_VENDOR_INTEL = 0,
3718     X86_VENDOR_CYRIX = 1,
3719     X86_VENDOR_AMD = 2,
3720     X86_VENDOR_UMC = 3,
3721     X86_VENDOR_NEXGEN = 4,
3722     X86_VENDOR_CENTAUR = 5,
3723     X86_VENDOR_RISE = 6,
3724     X86_VENDOR_TRANSMETA = 7,
3725     X86_VENDOR_NSC = 8,
3726     X86_VENDOR_UNKNOWN = 0xff
3727     };
3728    
3729     enum {
3730     X86_PROCESSOR_I386, /* 80386 */
3731     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3732     X86_PROCESSOR_PENTIUM,
3733     X86_PROCESSOR_PENTIUMPRO,
3734     X86_PROCESSOR_K6,
3735     X86_PROCESSOR_ATHLON,
3736     X86_PROCESSOR_PENTIUM4,
3737 gbeauche 1.28 X86_PROCESSOR_X86_64,
3738 gbeauche 1.3 X86_PROCESSOR_max
3739     };
3740    
3741     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3742     "80386",
3743     "80486",
3744     "Pentium",
3745     "PentiumPro",
3746     "K6",
3747     "Athlon",
3748 gbeauche 1.16 "Pentium4",
3749 gbeauche 1.28 "x86-64"
3750 gbeauche 1.3 };
3751    
3752     static struct ptt {
3753     const int align_loop;
3754     const int align_loop_max_skip;
3755     const int align_jump;
3756     const int align_jump_max_skip;
3757     const int align_func;
3758     }
3759     x86_alignments[X86_PROCESSOR_max] = {
3760     { 4, 3, 4, 3, 4 },
3761     { 16, 15, 16, 15, 16 },
3762     { 16, 7, 16, 7, 16 },
3763     { 16, 15, 16, 7, 16 },
3764     { 32, 7, 32, 7, 32 },
3765 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3766 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3767     { 16, 7, 16, 7, 16 }
3768 gbeauche 1.3 };
3769 gbeauche 1.1
3770 gbeauche 1.3 static void
3771     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3772 gbeauche 1.1 {
3773 gbeauche 1.3 char *v = c->x86_vendor_id;
3774    
3775     if (!strcmp(v, "GenuineIntel"))
3776     c->x86_vendor = X86_VENDOR_INTEL;
3777     else if (!strcmp(v, "AuthenticAMD"))
3778     c->x86_vendor = X86_VENDOR_AMD;
3779     else if (!strcmp(v, "CyrixInstead"))
3780     c->x86_vendor = X86_VENDOR_CYRIX;
3781     else if (!strcmp(v, "Geode by NSC"))
3782     c->x86_vendor = X86_VENDOR_NSC;
3783     else if (!strcmp(v, "UMC UMC UMC "))
3784     c->x86_vendor = X86_VENDOR_UMC;
3785     else if (!strcmp(v, "CentaurHauls"))
3786     c->x86_vendor = X86_VENDOR_CENTAUR;
3787     else if (!strcmp(v, "NexGenDriven"))
3788     c->x86_vendor = X86_VENDOR_NEXGEN;
3789     else if (!strcmp(v, "RiseRiseRise"))
3790     c->x86_vendor = X86_VENDOR_RISE;
3791     else if (!strcmp(v, "GenuineTMx86") ||
3792     !strcmp(v, "TransmetaCPU"))
3793     c->x86_vendor = X86_VENDOR_TRANSMETA;
3794     else
3795     c->x86_vendor = X86_VENDOR_UNKNOWN;
3796     }
3797 gbeauche 1.1
3798 gbeauche 1.3 static void
3799     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3800     {
3801 gbeauche 1.27 const int CPUID_SPACE = 4096;
3802     uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3803     if (cpuid_space == VM_MAP_FAILED)
3804     abort();
3805     vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3806    
3807 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3808 gbeauche 1.3 uae_u8* tmp=get_target();
3809 gbeauche 1.1
3810 gbeauche 1.20 s_op = op;
3811 gbeauche 1.3 set_target(cpuid_space);
3812     raw_push_l_r(0); /* eax */
3813     raw_push_l_r(1); /* ecx */
3814     raw_push_l_r(2); /* edx */
3815     raw_push_l_r(3); /* ebx */
3816 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3817 gbeauche 1.3 raw_cpuid(0);
3818 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3819     raw_mov_l_mr((uintptr)&s_ebx,3);
3820     raw_mov_l_mr((uintptr)&s_ecx,1);
3821     raw_mov_l_mr((uintptr)&s_edx,2);
3822 gbeauche 1.3 raw_pop_l_r(3);
3823     raw_pop_l_r(2);
3824     raw_pop_l_r(1);
3825     raw_pop_l_r(0);
3826     raw_ret();
3827     set_target(tmp);
3828 gbeauche 1.1
3829 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3830 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3831     if (ebx != NULL) *ebx = s_ebx;
3832     if (ecx != NULL) *ecx = s_ecx;
3833     if (edx != NULL) *edx = s_edx;
3834 gbeauche 1.27
3835     vm_release(cpuid_space, CPUID_SPACE);
3836 gbeauche 1.1 }
3837    
3838 gbeauche 1.3 static void
3839     raw_init_cpu(void)
3840 gbeauche 1.1 {
3841 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3842    
3843     /* Defaults */
3844 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3845 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3846     c->cpuid_level = -1; /* CPUID not detected */
3847     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3848     c->x86_vendor_id[0] = '\0'; /* Unset */
3849     c->x86_hwcap = 0;
3850    
3851     /* Get vendor name */
3852     c->x86_vendor_id[12] = '\0';
3853     cpuid(0x00000000,
3854     (uae_u32 *)&c->cpuid_level,
3855     (uae_u32 *)&c->x86_vendor_id[0],
3856     (uae_u32 *)&c->x86_vendor_id[8],
3857     (uae_u32 *)&c->x86_vendor_id[4]);
3858     x86_get_cpu_vendor(c);
3859    
3860     /* Intel-defined flags: level 0x00000001 */
3861     c->x86_brand_id = 0;
3862     if ( c->cpuid_level >= 0x00000001 ) {
3863     uae_u32 tfms, brand_id;
3864     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3865     c->x86 = (tfms >> 8) & 15;
3866 gbeauche 1.29 if (c->x86 == 0xf)
3867     c->x86 += (tfms >> 20) & 0xff; /* extended family */
3868 gbeauche 1.3 c->x86_model = (tfms >> 4) & 15;
3869 gbeauche 1.29 if (c->x86_model == 0xf)
3870     c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3871 gbeauche 1.3 c->x86_brand_id = brand_id & 0xff;
3872     c->x86_mask = tfms & 15;
3873     } else {
3874     /* Have CPUID level 0 only - unheard of */
3875     c->x86 = 4;
3876     }
3877    
3878 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3879     uae_u32 xlvl;
3880     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3881     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3882     if ( xlvl >= 0x80000001 ) {
3883 gbeauche 1.28 uae_u32 features, extra_features;
3884     cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3885 gbeauche 1.16 if (features & (1 << 29)) {
3886     /* Assume x86-64 if long mode is supported */
3887 gbeauche 1.28 c->x86_processor = X86_PROCESSOR_X86_64;
3888 gbeauche 1.16 }
3889 gbeauche 1.28 if (extra_features & (1 << 0))
3890     have_lahf_lm = true;
3891 gbeauche 1.16 }
3892     }
3893    
3894 gbeauche 1.3 /* Canonicalize processor ID */
3895     switch (c->x86) {
3896     case 3:
3897     c->x86_processor = X86_PROCESSOR_I386;
3898     break;
3899     case 4:
3900     c->x86_processor = X86_PROCESSOR_I486;
3901     break;
3902     case 5:
3903     if (c->x86_vendor == X86_VENDOR_AMD)
3904     c->x86_processor = X86_PROCESSOR_K6;
3905     else
3906     c->x86_processor = X86_PROCESSOR_PENTIUM;
3907     break;
3908     case 6:
3909     if (c->x86_vendor == X86_VENDOR_AMD)
3910     c->x86_processor = X86_PROCESSOR_ATHLON;
3911     else
3912     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3913     break;
3914     case 15:
3915 gbeauche 1.29 if (c->x86_processor == X86_PROCESSOR_max) {
3916     switch (c->x86_vendor) {
3917     case X86_VENDOR_INTEL:
3918     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3919     break;
3920     case X86_VENDOR_AMD:
3921     /* Assume a 32-bit Athlon processor if not in long mode */
3922     c->x86_processor = X86_PROCESSOR_ATHLON;
3923     break;
3924     }
3925     }
3926     break;
3927 gbeauche 1.3 }
3928     if (c->x86_processor == X86_PROCESSOR_max) {
3929 gbeauche 1.30 c->x86_processor = X86_PROCESSOR_I386;
3930     fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3931 gbeauche 1.3 fprintf(stderr, " Family : %d\n", c->x86);
3932     fprintf(stderr, " Model : %d\n", c->x86_model);
3933     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3934 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3935 gbeauche 1.3 if (c->x86_brand_id)
3936     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3937     }
3938    
3939     /* Have CMOV support? */
3940 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3941 gbeauche 1.3
3942     /* Can the host CPU suffer from partial register stalls? */
3943     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3944     #if 1
3945     /* It appears that partial register writes are a bad idea even on
3946 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3947     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3948 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3949     have_rat_stall = true;
3950 gbeauche 1.1 #endif
3951 gbeauche 1.3
3952     /* Alignments */
3953     if (tune_alignment) {
3954     align_loops = x86_alignments[c->x86_processor].align_loop;
3955     align_jumps = x86_alignments[c->x86_processor].align_jump;
3956     }
3957    
3958     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3959     c->cpuid_level, c->x86_vendor_id,
3960     x86_processor_string_table[c->x86_processor]);
3961 gbeauche 1.36
3962     raw_flags_init();
3963 gbeauche 1.1 }
3964    
3965 gbeauche 1.10 static bool target_check_bsf(void)
3966     {
3967     bool mismatch = false;
3968     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3969     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3970     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3971     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3972     for (int value = -1; value <= 1; value++) {
3973 gbeauche 1.25 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3974     unsigned long tmp = value;
3975 gbeauche 1.10 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3976 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3977 gbeauche 1.10 int OF = (flags >> 11) & 1;
3978     int SF = (flags >> 7) & 1;
3979     int ZF = (flags >> 6) & 1;
3980     int CF = flags & 1;
3981     tmp = (value == 0);
3982     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3983     mismatch = true;
3984     }
3985     }}}}
3986     if (mismatch)
3987     write_log("Target CPU defines all flags on BSF instruction\n");
3988     return !mismatch;
3989     }
3990    
3991 gbeauche 1.1
3992     /*************************************************************************
3993     * FPU stuff *
3994     *************************************************************************/
3995    
3996    
3997     static __inline__ void raw_fp_init(void)
3998     {
3999     int i;
4000    
4001     for (i=0;i<N_FREGS;i++)
4002     live.spos[i]=-2;
4003     live.tos=-1; /* Stack is empty */
4004     }
4005    
4006     static __inline__ void raw_fp_cleanup_drop(void)
4007     {
4008     #if 0
4009     /* using FINIT instead of popping all the entries.
4010     Seems to have side effects --- there is display corruption in
4011     Quake when this is used */
4012     if (live.tos>1) {
4013     emit_byte(0x9b);
4014     emit_byte(0xdb);
4015     emit_byte(0xe3);
4016     live.tos=-1;
4017     }
4018     #endif
4019     while (live.tos>=1) {
4020     emit_byte(0xde);
4021     emit_byte(0xd9);
4022     live.tos-=2;
4023     }
4024     while (live.tos>=0) {
4025     emit_byte(0xdd);
4026     emit_byte(0xd8);
4027     live.tos--;
4028     }
4029     raw_fp_init();
4030     }
4031    
4032     static __inline__ void make_tos(int r)
4033     {
4034     int p,q;
4035    
4036     if (live.spos[r]<0) { /* Register not yet on stack */
4037     emit_byte(0xd9);
4038     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
4039     live.tos++;
4040     live.spos[r]=live.tos;
4041     live.onstack[live.tos]=r;
4042     return;
4043     }
4044     /* Register is on stack */
4045     if (live.tos==live.spos[r])
4046     return;
4047     p=live.spos[r];
4048     q=live.onstack[live.tos];
4049    
4050     emit_byte(0xd9);
4051     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
4052     live.onstack[live.tos]=r;
4053     live.spos[r]=live.tos;
4054     live.onstack[p]=q;
4055     live.spos[q]=p;
4056     }
4057    
4058     static __inline__ void make_tos2(int r, int r2)
4059     {
4060     int q;
4061    
4062     make_tos(r2); /* Put the reg that's supposed to end up in position2
4063     on top */
4064    
4065     if (live.spos[r]<0) { /* Register not yet on stack */
4066     make_tos(r); /* This will extend the stack */
4067     return;
4068     }
4069     /* Register is on stack */
4070     emit_byte(0xd9);
4071     emit_byte(0xc9); /* Move r2 into position 2 */
4072    
4073     q=live.onstack[live.tos-1];
4074     live.onstack[live.tos]=q;
4075     live.spos[q]=live.tos;
4076     live.onstack[live.tos-1]=r2;
4077     live.spos[r2]=live.tos-1;
4078    
4079     make_tos(r); /* And r into 1 */
4080     }
4081    
4082     static __inline__ int stackpos(int r)
4083     {
4084     if (live.spos[r]<0)
4085     abort();
4086     if (live.tos<live.spos[r]) {
4087     printf("Looking for spos for fnreg %d\n",r);
4088     abort();
4089     }
4090     return live.tos-live.spos[r];
4091     }
4092    
4093     static __inline__ void usereg(int r)
4094     {
4095     if (live.spos[r]<0)
4096     make_tos(r);
4097     }
4098    
4099     /* This is called with one FP value in a reg *above* tos, which it will
4100     pop off the stack if necessary */
4101     static __inline__ void tos_make(int r)
4102     {
4103     if (live.spos[r]<0) {
4104     live.tos++;
4105     live.spos[r]=live.tos;
4106     live.onstack[live.tos]=r;
4107     return;
4108     }
4109     emit_byte(0xdd);
4110     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
4111     and pop it*/
4112     }
4113 gbeauche 1.23
4114     /* FP helper functions */
4115     #if USE_NEW_RTASM
4116     #define DEFINE_OP(NAME, GEN) \
4117     static inline void raw_##NAME(uint32 m) \
4118     { \
4119     GEN(m, X86_NOREG, X86_NOREG, 1); \
4120     }
4121     DEFINE_OP(fstl, FSTLm);
4122     DEFINE_OP(fstpl, FSTPLm);
4123     DEFINE_OP(fldl, FLDLm);
4124     DEFINE_OP(fildl, FILDLm);
4125     DEFINE_OP(fistl, FISTLm);
4126     DEFINE_OP(flds, FLDSm);
4127     DEFINE_OP(fsts, FSTSm);
4128     DEFINE_OP(fstpt, FSTPTm);
4129     DEFINE_OP(fldt, FLDTm);
4130     #else
4131     #define DEFINE_OP(NAME, OP1, OP2) \
4132     static inline void raw_##NAME(uint32 m) \
4133     { \
4134     emit_byte(OP1); \
4135     emit_byte(OP2); \
4136     emit_long(m); \
4137     }
4138     DEFINE_OP(fstl, 0xdd, 0x15);
4139     DEFINE_OP(fstpl, 0xdd, 0x1d);
4140     DEFINE_OP(fldl, 0xdd, 0x05);
4141     DEFINE_OP(fildl, 0xdb, 0x05);
4142     DEFINE_OP(fistl, 0xdb, 0x15);
4143     DEFINE_OP(flds, 0xd9, 0x05);
4144     DEFINE_OP(fsts, 0xd9, 0x15);
4145     DEFINE_OP(fstpt, 0xdb, 0x3d);
4146     DEFINE_OP(fldt, 0xdb, 0x2d);
4147     #endif
4148     #undef DEFINE_OP
4149    
4150 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4151     {
4152     make_tos(r);
4153 gbeauche 1.23 raw_fstl(m);
4154 gbeauche 1.1 }
4155     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4156    
4157     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4158     {
4159     make_tos(r);
4160 gbeauche 1.23 raw_fstpl(m);
4161 gbeauche 1.1 live.onstack[live.tos]=-1;
4162     live.tos--;
4163     live.spos[r]=-2;
4164     }
4165     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4166    
4167     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4168     {
4169 gbeauche 1.23 raw_fldl(m);
4170 gbeauche 1.1 tos_make(r);
4171     }
4172     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4173    
4174     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4175     {
4176 gbeauche 1.23 raw_fildl(m);
4177 gbeauche 1.1 tos_make(r);
4178     }
4179     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4180    
4181     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4182     {
4183     make_tos(r);
4184 gbeauche 1.23 raw_fistl(m);
4185 gbeauche 1.1 }
4186     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4187    
4188     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4189     {
4190 gbeauche 1.23 raw_flds(m);
4191 gbeauche 1.1 tos_make(r);
4192     }
4193     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4194    
4195     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4196     {
4197     make_tos(r);
4198 gbeauche 1.23 raw_fsts(m);
4199 gbeauche 1.1 }
4200     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4201    
4202     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4203     {
4204     int rs;
4205    
4206     /* Stupid x87 can't write a long double to mem without popping the
4207     stack! */
4208     usereg(r);
4209     rs=stackpos(r);
4210     emit_byte(0xd9); /* Get a copy to the top of stack */
4211     emit_byte(0xc0+rs);
4212    
4213 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4214 gbeauche 1.1 }
4215     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4216    
4217     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4218     {
4219     int rs;
4220    
4221     make_tos(r);
4222 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4223 gbeauche 1.1 live.onstack[live.tos]=-1;
4224     live.tos--;
4225     live.spos[r]=-2;
4226     }
4227     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4228    
4229     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4230     {
4231 gbeauche 1.23 raw_fldt(m);
4232 gbeauche 1.1 tos_make(r);
4233     }
4234     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4235    
4236     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4237     {
4238     emit_byte(0xd9);
4239     emit_byte(0xeb);
4240     tos_make(r);
4241     }
4242     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4243    
4244     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4245     {
4246     emit_byte(0xd9);
4247     emit_byte(0xec);
4248     tos_make(r);
4249     }
4250     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4251    
4252     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4253     {
4254     emit_byte(0xd9);
4255     emit_byte(0xea);
4256     tos_make(r);
4257     }
4258     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4259    
4260     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4261     {
4262     emit_byte(0xd9);
4263     emit_byte(0xed);
4264     tos_make(r);
4265     }
4266     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4267    
4268     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4269     {
4270     emit_byte(0xd9);
4271     emit_byte(0xe8);
4272     tos_make(r);
4273     }
4274     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4275    
4276     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4277     {
4278     emit_byte(0xd9);
4279     emit_byte(0xee);
4280     tos_make(r);
4281     }
4282     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4283    
4284     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4285     {
4286     int ds;
4287    
4288     usereg(s);
4289     ds=stackpos(s);
4290     if (ds==0 && live.spos[d]>=0) {
4291     /* source is on top of stack, and we already have the dest */
4292     int dd=stackpos(d);
4293     emit_byte(0xdd);
4294     emit_byte(0xd0+dd);
4295     }
4296     else {
4297     emit_byte(0xd9);
4298     emit_byte(0xc0+ds); /* duplicate source on tos */
4299     tos_make(d); /* store to destination, pop if necessary */
4300     }
4301     }
4302     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4303    
4304     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4305     {
4306     emit_byte(0xd9);
4307     emit_byte(0xa8+index);
4308     emit_long(base);
4309     }
4310     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4311    
4312    
4313     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4314     {
4315     int ds;
4316    
4317     if (d!=s) {
4318     usereg(s);
4319     ds=stackpos(s);
4320     emit_byte(0xd9);
4321     emit_byte(0xc0+ds); /* duplicate source */
4322     emit_byte(0xd9);
4323     emit_byte(0xfa); /* take square root */
4324     tos_make(d); /* store to destination */
4325     }
4326     else {
4327     make_tos(d);
4328     emit_byte(0xd9);
4329     emit_byte(0xfa); /* take square root */
4330     }
4331     }
4332     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4333    
4334     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4335     {
4336     int ds;
4337    
4338     if (d!=s) {
4339     usereg(s);
4340     ds=stackpos(s);
4341     emit_byte(0xd9);
4342     emit_byte(0xc0+ds); /* duplicate source */
4343     emit_byte(0xd9);
4344     emit_byte(0xe1); /* take fabs */
4345     tos_make(d); /* store to destination */
4346     }
4347     else {
4348     make_tos(d);
4349     emit_byte(0xd9);
4350     emit_byte(0xe1); /* take fabs */
4351     }
4352     }
4353     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4354    
4355     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4356     {
4357     int ds;
4358    
4359     if (d!=s) {
4360     usereg(s);
4361     ds=stackpos(s);
4362     emit_byte(0xd9);
4363     emit_byte(0xc0+ds); /* duplicate source */
4364     emit_byte(0xd9);
4365     emit_byte(0xfc); /* take frndint */
4366     tos_make(d); /* store to destination */
4367     }
4368     else {
4369     make_tos(d);
4370     emit_byte(0xd9);
4371     emit_byte(0xfc); /* take frndint */
4372     }
4373     }
4374     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4375    
4376     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4377     {
4378     int ds;
4379    
4380     if (d!=s) {
4381     usereg(s);
4382     ds=stackpos(s);
4383     emit_byte(0xd9);
4384     emit_byte(0xc0+ds); /* duplicate source */
4385     emit_byte(0xd9);
4386     emit_byte(0xff); /* take cos */
4387     tos_make(d); /* store to destination */
4388     }
4389     else {
4390     make_tos(d);
4391     emit_byte(0xd9);
4392     emit_byte(0xff); /* take cos */
4393     }
4394     }
4395     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4396    
4397     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4398     {
4399     int ds;
4400    
4401     if (d!=s) {
4402     usereg(s);
4403     ds=stackpos(s);
4404     emit_byte(0xd9);
4405     emit_byte(0xc0+ds); /* duplicate source */
4406     emit_byte(0xd9);
4407     emit_byte(0xfe); /* take sin */
4408     tos_make(d); /* store to destination */
4409     }
4410     else {
4411     make_tos(d);
4412     emit_byte(0xd9);
4413     emit_byte(0xfe); /* take sin */
4414     }
4415     }
4416     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4417    
4418 gbeauche 1.34 static const double one=1;
4419 gbeauche 1.1 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4420     {
4421     int ds;
4422    
4423     usereg(s);
4424     ds=stackpos(s);
4425     emit_byte(0xd9);
4426     emit_byte(0xc0+ds); /* duplicate source */
4427    
4428     emit_byte(0xd9);
4429     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4430     emit_byte(0xd9);
4431     emit_byte(0xfc); /* rndint */
4432     emit_byte(0xd9);
4433     emit_byte(0xc9); /* swap top two elements */
4434     emit_byte(0xd8);
4435     emit_byte(0xe1); /* subtract rounded from original */
4436     emit_byte(0xd9);
4437     emit_byte(0xf0); /* f2xm1 */
4438 gbeauche 1.34 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4439 gbeauche 1.1 emit_byte(0xd9);
4440     emit_byte(0xfd); /* and scale it */
4441     emit_byte(0xdd);
4442     emit_byte(0xd9); /* take he rounded value off */
4443     tos_make(d); /* store to destination */
4444     }
4445     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4446    
4447     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4448     {
4449     int ds;
4450    
4451     usereg(s);
4452     ds=stackpos(s);
4453     emit_byte(0xd9);
4454     emit_byte(0xc0+ds); /* duplicate source */
4455     emit_byte(0xd9);
4456     emit_byte(0xea); /* fldl2e */
4457     emit_byte(0xde);
4458     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4459    
4460     emit_byte(0xd9);
4461     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4462     emit_byte(0xd9);
4463     emit_byte(0xfc); /* rndint */
4464     emit_byte(0xd9);
4465     emit_byte(0xc9); /* swap top two elements */
4466     emit_byte(0xd8);
4467     emit_byte(0xe1); /* subtract rounded from original */
4468     emit_byte(0xd9);
4469     emit_byte(0xf0); /* f2xm1 */
4470 gbeauche 1.34 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4471 gbeauche 1.1 emit_byte(0xd9);
4472     emit_byte(0xfd); /* and scale it */
4473     emit_byte(0xdd);
4474     emit_byte(0xd9); /* take he rounded value off */
4475     tos_make(d); /* store to destination */
4476     }
4477     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4478    
4479     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4480     {
4481     int ds;
4482    
4483     usereg(s);
4484     ds=stackpos(s);
4485     emit_byte(0xd9);
4486     emit_byte(0xc0+ds); /* duplicate source */
4487     emit_byte(0xd9);
4488     emit_byte(0xe8); /* push '1' */
4489     emit_byte(0xd9);
4490     emit_byte(0xc9); /* swap top two */
4491     emit_byte(0xd9);
4492     emit_byte(0xf1); /* take 1*log2(x) */
4493     tos_make(d); /* store to destination */
4494     }
4495     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4496    
4497    
4498     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4499     {
4500     int ds;
4501    
4502     if (d!=s) {
4503     usereg(s);
4504     ds=stackpos(s);
4505     emit_byte(0xd9);
4506     emit_byte(0xc0+ds); /* duplicate source */
4507     emit_byte(0xd9);
4508     emit_byte(0xe0); /* take fchs */
4509     tos_make(d); /* store to destination */
4510     }
4511     else {
4512     make_tos(d);
4513     emit_byte(0xd9);
4514     emit_byte(0xe0); /* take fchs */
4515     }
4516     }
4517     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4518    
4519     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4520     {
4521     int ds;
4522    
4523     usereg(s);
4524     usereg(d);
4525    
4526     if (live.spos[s]==live.tos) {
4527     /* Source is on top of stack */
4528     ds=stackpos(d);
4529     emit_byte(0xdc);
4530     emit_byte(0xc0+ds); /* add source to dest*/
4531     }
4532     else {
4533     make_tos(d);
4534     ds=stackpos(s);
4535    
4536     emit_byte(0xd8);
4537     emit_byte(0xc0+ds); /* add source to dest*/
4538     }
4539     }
4540     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4541    
4542     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4543     {
4544     int ds;
4545    
4546     usereg(s);
4547     usereg(d);
4548    
4549     if (live.spos[s]==live.tos) {
4550     /* Source is on top of stack */
4551     ds=stackpos(d);
4552     emit_byte(0xdc);
4553     emit_byte(0xe8+ds); /* sub source from dest*/
4554     }
4555     else {
4556     make_tos(d);
4557     ds=stackpos(s);
4558    
4559     emit_byte(0xd8);
4560     emit_byte(0xe0+ds); /* sub src from dest */
4561     }
4562     }
4563     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4564    
4565     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4566     {
4567     int ds;
4568    
4569     usereg(s);
4570     usereg(d);
4571    
4572     make_tos(d);
4573     ds=stackpos(s);
4574    
4575     emit_byte(0xdd);
4576     emit_byte(0xe0+ds); /* cmp dest with source*/
4577     }
4578     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4579    
4580     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4581     {
4582     int ds;
4583    
4584     usereg(s);
4585     usereg(d);
4586    
4587     if (live.spos[s]==live.tos) {
4588     /* Source is on top of stack */
4589     ds=stackpos(d);
4590     emit_byte(0xdc);
4591     emit_byte(0xc8+ds); /* mul dest by source*/
4592     }
4593     else {
4594     make_tos(d);
4595     ds=stackpos(s);
4596    
4597     emit_byte(0xd8);
4598     emit_byte(0xc8+ds); /* mul dest by source*/
4599     }
4600     }
4601     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4602    
4603     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4604     {
4605     int ds;
4606    
4607     usereg(s);
4608     usereg(d);
4609    
4610     if (live.spos[s]==live.tos) {
4611     /* Source is on top of stack */
4612     ds=stackpos(d);
4613     emit_byte(0xdc);
4614     emit_byte(0xf8+ds); /* div dest by source */
4615     }
4616     else {
4617     make_tos(d);
4618     ds=stackpos(s);
4619    
4620     emit_byte(0xd8);
4621     emit_byte(0xf0+ds); /* div dest by source*/
4622     }
4623     }
4624     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4625    
4626     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4627     {
4628     int ds;
4629    
4630     usereg(s);
4631     usereg(d);
4632    
4633     make_tos2(d,s);
4634     ds=stackpos(s);
4635    
4636     if (ds!=1) {
4637     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4638     abort();
4639     }
4640     emit_byte(0xd9);
4641     emit_byte(0xf8); /* take rem from dest by source */
4642     }
4643     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4644    
4645     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4646     {
4647     int ds;
4648    
4649     usereg(s);
4650     usereg(d);
4651    
4652     make_tos2(d,s);
4653     ds=stackpos(s);
4654    
4655     if (ds!=1) {
4656     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4657     abort();
4658     }
4659     emit_byte(0xd9);
4660     emit_byte(0xf5); /* take rem1 from dest by source */
4661     }
4662     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4663    
4664    
4665     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4666     {
4667     make_tos(r);
4668     emit_byte(0xd9); /* ftst */
4669     emit_byte(0xe4);
4670     }
4671     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4672    
4673     /* %eax register is clobbered if target processor doesn't support fucomi */
4674     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4675     #define FFLAG_NREG EAX_INDEX
4676    
4677     static __inline__ void raw_fflags_into_flags(int r)
4678     {
4679     int p;
4680    
4681     usereg(r);
4682     p=stackpos(r);
4683    
4684     emit_byte(0xd9);
4685     emit_byte(0xee); /* Push 0 */
4686     emit_byte(0xd9);
4687     emit_byte(0xc9+p); /* swap top two around */
4688     if (have_cmov) {
4689     // gb-- fucomi is for P6 cores only, not K6-2 then...
4690     emit_byte(0xdb);
4691     emit_byte(0xe9+p); /* fucomi them */
4692     }
4693     else {
4694     emit_byte(0xdd);
4695     emit_byte(0xe1+p); /* fucom them */
4696     emit_byte(0x9b);
4697     emit_byte(0xdf);
4698     emit_byte(0xe0); /* fstsw ax */
4699     raw_sahf(0); /* sahf */
4700     }
4701     emit_byte(0xdd);
4702     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4703     }