ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.37
Committed: 2007-01-14T13:07:22Z (17 years, 8 months ago) by gbeauche
Branch: MAIN
Changes since 1.36: +1 -3 lines
Log Message:
The older code generator is now deprecated on x86-32 too.

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.26 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 gbeauche 1.26 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.33 /* XXX this has to match X86_Reg8H_Base + 4 */
56     #define AH_INDEX (0x10+4+EAX_INDEX)
57     #define CH_INDEX (0x10+4+ECX_INDEX)
58     #define DH_INDEX (0x10+4+EDX_INDEX)
59     #define BH_INDEX (0x10+4+EBX_INDEX)
60 gbeauche 1.1
61     /* The register in which subroutines return an integer return value */
62 gbeauche 1.20 #define REG_RESULT EAX_INDEX
63 gbeauche 1.1
64     /* The registers subroutines take their first and second argument in */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     /* Handle the _fastcall parameters of ECX and EDX */
67 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
68     #define REG_PAR2 EDX_INDEX
69     #elif defined(__x86_64__)
70     #define REG_PAR1 EDI_INDEX
71     #define REG_PAR2 ESI_INDEX
72 gbeauche 1.1 #else
73 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
74     #define REG_PAR2 EDX_INDEX
75 gbeauche 1.1 #endif
76    
77 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
78 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
79 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
80 gbeauche 1.1 #else
81 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
82 gbeauche 1.1 #endif
83    
84 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
85 gbeauche 1.1 -1 if any reg will do */
86 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
87     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
88 gbeauche 1.1
89 gbeauche 1.31 #define STACK_ALIGN 16
90     #define STACK_OFFSET sizeof(void *)
91    
92 gbeauche 1.1 uae_s8 always_used[]={4,-1};
93 gbeauche 1.20 #if defined(__x86_64__)
94     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
95     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
96     #else
97 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
98     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
99 gbeauche 1.20 #endif
100 gbeauche 1.1
101 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
102     /* Make sure interpretive core does not use cpuopti */
103     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
104 gbeauche 1.20 #error FIXME: code not ready
105 gbeauche 1.17 #else
106 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
107     by the caller */
108 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
109 gbeauche 1.17 #endif
110 gbeauche 1.1
111     /* This *should* be the same as call_saved. But:
112     - We might not really know which registers are saved, and which aren't,
113     so we need to preserve some, but don't want to rely on everyone else
114     also saving those registers
115     - Special registers (such like the stack pointer) should not be "preserved"
116     by pushing, even though they are "saved" across function calls
117     */
118 gbeauche 1.21 #if defined(__x86_64__)
119 gbeauche 1.32 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
120 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
121     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
122 gbeauche 1.21 #else
123 gbeauche 1.32 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
124     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
125 gbeauche 1.21 #endif
126 gbeauche 1.1
127     /* Whether classes of instructions do or don't clobber the native flags */
128     #define CLOBBER_MOV
129     #define CLOBBER_LEA
130     #define CLOBBER_CMOV
131     #define CLOBBER_POP
132     #define CLOBBER_PUSH
133     #define CLOBBER_SUB clobber_flags()
134     #define CLOBBER_SBB clobber_flags()
135     #define CLOBBER_CMP clobber_flags()
136     #define CLOBBER_ADD clobber_flags()
137     #define CLOBBER_ADC clobber_flags()
138     #define CLOBBER_AND clobber_flags()
139     #define CLOBBER_OR clobber_flags()
140     #define CLOBBER_XOR clobber_flags()
141    
142     #define CLOBBER_ROL clobber_flags()
143     #define CLOBBER_ROR clobber_flags()
144     #define CLOBBER_SHLL clobber_flags()
145     #define CLOBBER_SHRL clobber_flags()
146     #define CLOBBER_SHRA clobber_flags()
147     #define CLOBBER_TEST clobber_flags()
148     #define CLOBBER_CL16
149     #define CLOBBER_CL8
150 gbeauche 1.20 #define CLOBBER_SE32
151 gbeauche 1.1 #define CLOBBER_SE16
152     #define CLOBBER_SE8
153 gbeauche 1.20 #define CLOBBER_ZE32
154 gbeauche 1.1 #define CLOBBER_ZE16
155     #define CLOBBER_ZE8
156     #define CLOBBER_SW16 clobber_flags()
157     #define CLOBBER_SW32
158     #define CLOBBER_SETCC
159     #define CLOBBER_MUL clobber_flags()
160     #define CLOBBER_BT clobber_flags()
161     #define CLOBBER_BSF clobber_flags()
162    
163 gbeauche 1.37 /* The older code generator is now deprecated. */
164 gbeauche 1.20 #define USE_NEW_RTASM 1
165    
166     #if USE_NEW_RTASM
167 gbeauche 1.13
168     #if defined(__x86_64__)
169     #define X86_TARGET_64BIT 1
170 gbeauche 1.35 /* The address override prefix causes a 5 cycles penalty on Intel Core
171     processors. Another solution would be to decompose the load in an LEA,
172     MOV (to zero-extend), MOV (from memory): is it better? */
173     #define ADDR32 x86_emit_byte(0x67),
174     #else
175     #define ADDR32 /**/
176 gbeauche 1.13 #endif
177     #define X86_FLAT_REGISTERS 0
178 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
179     #define X86_OPTIMIZE_ROTSHI 1
180 gbeauche 1.13 #include "codegen_x86.h"
181    
182     #define x86_emit_byte(B) emit_byte(B)
183     #define x86_emit_word(W) emit_word(W)
184     #define x86_emit_long(L) emit_long(L)
185 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
186 gbeauche 1.13 #define x86_get_target() get_target()
187     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
188    
189     static void jit_fail(const char *msg, const char *file, int line, const char *function)
190     {
191     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
192     function, file, line, msg);
193     abort();
194     }
195    
196     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
197     {
198 gbeauche 1.20 #if defined(__x86_64__)
199     PUSHQr(r);
200     #else
201 gbeauche 1.13 PUSHLr(r);
202 gbeauche 1.20 #endif
203 gbeauche 1.13 }
204     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
205    
206     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
207     {
208 gbeauche 1.20 #if defined(__x86_64__)
209     POPQr(r);
210     #else
211 gbeauche 1.13 POPLr(r);
212 gbeauche 1.20 #endif
213 gbeauche 1.13 }
214     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
215    
216 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
217     {
218     #if defined(__x86_64__)
219     POPQm(d, X86_NOREG, X86_NOREG, 1);
220     #else
221     POPLm(d, X86_NOREG, X86_NOREG, 1);
222     #endif
223     }
224     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
225    
226 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
227     {
228     BTLir(i, r);
229     }
230     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
231    
232     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
233     {
234     BTLrr(b, r);
235     }
236     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
237    
238     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
239     {
240     BTCLir(i, r);
241     }
242     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
243    
244     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
245     {
246     BTCLrr(b, r);
247     }
248     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
249    
250     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
251     {
252     BTRLir(i, r);
253     }
254     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
255    
256     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
257     {
258     BTRLrr(b, r);
259     }
260     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
261    
262     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
263     {
264     BTSLir(i, r);
265     }
266     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
267    
268     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
269     {
270     BTSLrr(b, r);
271     }
272     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
273    
274     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
275     {
276     SUBWir(i, d);
277     }
278     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
279    
280     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
281     {
282     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
283     }
284     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
285    
286     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
287     {
288     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
289     }
290     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
291    
292     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
293     {
294     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
295     }
296     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
297    
298     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
299     {
300     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
301     }
302     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
303    
304     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
305     {
306     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
307     }
308     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
309    
310     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
311     {
312     ROLBir(i, r);
313     }
314     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
315    
316     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
317     {
318     ROLWir(i, r);
319     }
320     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
321    
322     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
323     {
324     ROLLir(i, r);
325     }
326     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
327    
328     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
329     {
330     ROLLrr(r, d);
331     }
332     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
333    
334     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
335     {
336     ROLWrr(r, d);
337     }
338     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
339    
340     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
341     {
342     ROLBrr(r, d);
343     }
344     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
345    
346     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
347     {
348     SHLLrr(r, d);
349     }
350     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
351    
352     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
353     {
354     SHLWrr(r, d);
355     }
356     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
357    
358     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
359     {
360     SHLBrr(r, d);
361     }
362     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
363    
364     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
365     {
366     RORBir(i, r);
367     }
368     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
369    
370     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
371     {
372     RORWir(i, r);
373     }
374     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
375    
376     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
377     {
378     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
379     }
380     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
381    
382     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
383     {
384     RORLir(i, r);
385     }
386     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
387    
388     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
389     {
390     RORLrr(r, d);
391     }
392     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
393    
394     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
395     {
396     RORWrr(r, d);
397     }
398     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
399    
400     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
401     {
402     RORBrr(r, d);
403     }
404     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
405    
406     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
407     {
408     SHRLrr(r, d);
409     }
410     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
411    
412     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
413     {
414     SHRWrr(r, d);
415     }
416     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
417    
418     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
419     {
420     SHRBrr(r, d);
421     }
422     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
423    
424     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
425     {
426 gbeauche 1.14 SARLrr(r, d);
427 gbeauche 1.13 }
428     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
429    
430     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
431     {
432 gbeauche 1.14 SARWrr(r, d);
433 gbeauche 1.13 }
434     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
435    
436     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
437     {
438 gbeauche 1.14 SARBrr(r, d);
439 gbeauche 1.13 }
440     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
441    
442     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
443     {
444     SHLLir(i, r);
445     }
446     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
447    
448     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
449     {
450     SHLWir(i, r);
451     }
452     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
453    
454     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
455     {
456     SHLBir(i, r);
457     }
458     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
459    
460     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
461     {
462     SHRLir(i, r);
463     }
464     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
465    
466     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
467     {
468     SHRWir(i, r);
469     }
470     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
471    
472     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
473     {
474     SHRBir(i, r);
475     }
476     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
477    
478     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
479     {
480 gbeauche 1.14 SARLir(i, r);
481 gbeauche 1.13 }
482     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
483    
484     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
485     {
486 gbeauche 1.14 SARWir(i, r);
487 gbeauche 1.13 }
488     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
489    
490     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
491     {
492 gbeauche 1.14 SARBir(i, r);
493 gbeauche 1.13 }
494     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
495    
496     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
497     {
498     SAHF();
499     }
500     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
501    
502     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
503     {
504     CPUID();
505     }
506     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
507    
508     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
509     {
510     LAHF();
511     }
512     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
513    
514     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
515     {
516     SETCCir(cc, d);
517     }
518     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
519    
520     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
521     {
522     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
523     }
524     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
525    
526     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
527     {
528 gbeauche 1.15 if (have_cmov)
529     CMOVLrr(cc, s, d);
530     else { /* replacement using branch and mov */
531     #if defined(__x86_64__)
532     write_log("x86-64 implementations are bound to have CMOV!\n");
533     abort();
534     #endif
535     JCCSii(cc^1, 2);
536     MOVLrr(s, d);
537     }
538 gbeauche 1.13 }
539     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
540    
541     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
542     {
543     BSFLrr(s, d);
544     }
545     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
546    
547 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
548     {
549     MOVSLQrr(s, d);
550     }
551     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
552    
553 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
554     {
555     MOVSWLrr(s, d);
556     }
557     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
558    
559     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
560     {
561     MOVSBLrr(s, d);
562     }
563     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
564    
565     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
566     {
567     MOVZWLrr(s, d);
568     }
569     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
570    
571     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
572     {
573     MOVZBLrr(s, d);
574     }
575     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
576    
577     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
578     {
579 gbeauche 1.14 IMULLrr(s, d);
580 gbeauche 1.13 }
581     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
582    
583     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
584     {
585 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
586     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
587 gbeauche 1.13 abort();
588 gbeauche 1.14 }
589     IMULLr(s);
590 gbeauche 1.13 }
591     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
592    
593     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
594     {
595 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
596     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
597 gbeauche 1.13 abort();
598 gbeauche 1.14 }
599     MULLr(s);
600 gbeauche 1.13 }
601     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
602    
603     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
604     {
605 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
606 gbeauche 1.13 }
607     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
608    
609     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
610     {
611     MOVBrr(s, d);
612     }
613     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
614    
615     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
616     {
617     MOVWrr(s, d);
618     }
619     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
620    
621     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
622     {
623 gbeauche 1.35 ADDR32 MOVLmr(0, baser, index, factor, d);
624 gbeauche 1.13 }
625     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
626    
627     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
628     {
629 gbeauche 1.35 ADDR32 MOVWmr(0, baser, index, factor, d);
630 gbeauche 1.13 }
631     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
632    
633     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
634     {
635 gbeauche 1.35 ADDR32 MOVBmr(0, baser, index, factor, d);
636 gbeauche 1.13 }
637     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
638    
639     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
640     {
641 gbeauche 1.35 ADDR32 MOVLrm(s, 0, baser, index, factor);
642 gbeauche 1.13 }
643     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
644    
645     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
646     {
647 gbeauche 1.35 ADDR32 MOVWrm(s, 0, baser, index, factor);
648 gbeauche 1.13 }
649     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
650    
651     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
652     {
653 gbeauche 1.35 ADDR32 MOVBrm(s, 0, baser, index, factor);
654 gbeauche 1.13 }
655     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
656    
657     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
658     {
659 gbeauche 1.35 ADDR32 MOVLrm(s, base, baser, index, factor);
660 gbeauche 1.13 }
661     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
662    
663     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
664     {
665 gbeauche 1.35 ADDR32 MOVWrm(s, base, baser, index, factor);
666 gbeauche 1.13 }
667     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
668    
669     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
670     {
671 gbeauche 1.35 ADDR32 MOVBrm(s, base, baser, index, factor);
672 gbeauche 1.13 }
673     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
674    
675     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
676     {
677 gbeauche 1.35 ADDR32 MOVLmr(base, baser, index, factor, d);
678 gbeauche 1.13 }
679     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
680    
681     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
682     {
683 gbeauche 1.35 ADDR32 MOVWmr(base, baser, index, factor, d);
684 gbeauche 1.13 }
685     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
686    
687     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
688     {
689 gbeauche 1.35 ADDR32 MOVBmr(base, baser, index, factor, d);
690 gbeauche 1.13 }
691     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
692    
693     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
694     {
695 gbeauche 1.35 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
696 gbeauche 1.13 }
697     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
698    
699     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
700     {
701 gbeauche 1.15 if (have_cmov)
702 gbeauche 1.35 ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
703 gbeauche 1.15 else { /* replacement using branch and mov */
704     #if defined(__x86_64__)
705     write_log("x86-64 implementations are bound to have CMOV!\n");
706     abort();
707     #endif
708     JCCSii(cond^1, 7);
709 gbeauche 1.35 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
710 gbeauche 1.15 }
711 gbeauche 1.13 }
712     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
713    
714     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
715     {
716 gbeauche 1.15 if (have_cmov)
717     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
718     else { /* replacement using branch and mov */
719     #if defined(__x86_64__)
720     write_log("x86-64 implementations are bound to have CMOV!\n");
721     abort();
722     #endif
723     JCCSii(cond^1, 6);
724     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
725     }
726 gbeauche 1.13 }
727     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
728    
729     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
730     {
731 gbeauche 1.35 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
732 gbeauche 1.13 }
733     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
734    
735     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
736     {
737 gbeauche 1.35 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
738 gbeauche 1.13 }
739     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
740    
741     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
742     {
743 gbeauche 1.35 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
744 gbeauche 1.13 }
745     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
746    
747     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
748     {
749 gbeauche 1.35 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
750 gbeauche 1.13 }
751     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
752    
753     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
754     {
755 gbeauche 1.35 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
756 gbeauche 1.13 }
757     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
758    
759     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
760     {
761 gbeauche 1.35 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
762 gbeauche 1.13 }
763     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
764    
765     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
766     {
767 gbeauche 1.35 ADDR32 MOVLim(i, offset, d, X86_NOREG, 1);
768 gbeauche 1.13 }
769     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
770    
771     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
772     {
773 gbeauche 1.35 ADDR32 MOVWim(i, offset, d, X86_NOREG, 1);
774 gbeauche 1.13 }
775     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
776    
777     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
778     {
779 gbeauche 1.35 ADDR32 MOVBim(i, offset, d, X86_NOREG, 1);
780 gbeauche 1.13 }
781     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
782    
783     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
784     {
785 gbeauche 1.35 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
786 gbeauche 1.13 }
787     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
788    
789     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
790     {
791 gbeauche 1.35 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
792 gbeauche 1.13 }
793     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
794    
795     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
796     {
797 gbeauche 1.35 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
798 gbeauche 1.13 }
799     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
800    
801     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
802     {
803     LEALmr(offset, s, X86_NOREG, 1, d);
804     }
805     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
806    
807     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
808     {
809     LEALmr(offset, s, index, factor, d);
810     }
811     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
812    
813     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
814     {
815     LEALmr(0, s, index, factor, d);
816     }
817     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
818    
819 gbeauche 1.36 LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
820     {
821     LEALmr(0, X86_NOREG, index, factor, d);
822     }
823     LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
824    
825 gbeauche 1.13 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
826     {
827 gbeauche 1.35 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
828 gbeauche 1.13 }
829     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
830    
831     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
832     {
833 gbeauche 1.35 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
834 gbeauche 1.13 }
835     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
836    
837     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
838     {
839 gbeauche 1.35 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
840 gbeauche 1.13 }
841     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
842    
843     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
844     {
845     BSWAPLr(r);
846     }
847     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
848    
849     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
850     {
851     ROLWir(8, r);
852     }
853     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
854    
855     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
856     {
857     MOVLrr(s, d);
858     }
859     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
860    
861     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
862     {
863     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
864     }
865     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
866    
867     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
868     {
869     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
870     }
871     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
872    
873     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
874     {
875     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
876     }
877     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
878    
879     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
880     {
881     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
882     }
883     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
884    
885     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
886     {
887     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
888     }
889     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
890    
891     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
892     {
893     MOVLir(s, d);
894     }
895     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
896    
897     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
898     {
899     MOVWir(s, d);
900     }
901     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
902    
903     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
904     {
905     MOVBir(s, d);
906     }
907     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
908    
909     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
910     {
911     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
912     }
913     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
914    
915     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
916     {
917     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
918     }
919     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
920    
921     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
922     {
923     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
924     }
925     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
926    
927     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
928     {
929     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
930     }
931     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
932    
933     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
934     {
935     TESTLir(i, d);
936     }
937     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
938    
939     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
940     {
941     TESTLrr(s, d);
942     }
943     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
944    
945     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
946     {
947     TESTWrr(s, d);
948     }
949     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
950    
951     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
952     {
953     TESTBrr(s, d);
954     }
955     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
956    
957 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
958     {
959     XORLir(i, d);
960     }
961     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
962    
963 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
964     {
965     ANDLir(i, d);
966     }
967     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
968    
969     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
970     {
971     ANDWir(i, d);
972     }
973     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
974    
975     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
976     {
977     ANDLrr(s, d);
978     }
979     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
980    
981     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
982     {
983     ANDWrr(s, d);
984     }
985     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
986    
987     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
988     {
989     ANDBrr(s, d);
990     }
991     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
992    
993     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
994     {
995     ORLir(i, d);
996     }
997     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
998    
999     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1000     {
1001     ORLrr(s, d);
1002     }
1003     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1004    
1005     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1006     {
1007     ORWrr(s, d);
1008     }
1009     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1010    
1011     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1012     {
1013     ORBrr(s, d);
1014     }
1015     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1016    
1017     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1018     {
1019     ADCLrr(s, d);
1020     }
1021     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1022    
1023     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1024     {
1025     ADCWrr(s, d);
1026     }
1027     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1028    
1029     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1030     {
1031     ADCBrr(s, d);
1032     }
1033     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1034    
1035     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1036     {
1037     ADDLrr(s, d);
1038     }
1039     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1040    
1041     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1042     {
1043     ADDWrr(s, d);
1044     }
1045     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1046    
1047     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1048     {
1049     ADDBrr(s, d);
1050     }
1051     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1052    
1053     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1054     {
1055     SUBLir(i, d);
1056     }
1057     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1058    
1059     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1060     {
1061     SUBBir(i, d);
1062     }
1063     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1064    
1065     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1066     {
1067     ADDLir(i, d);
1068     }
1069     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1070    
1071     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1072     {
1073     ADDWir(i, d);
1074     }
1075     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1076    
1077     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1078     {
1079     ADDBir(i, d);
1080     }
1081     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1082    
1083     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1084     {
1085     SBBLrr(s, d);
1086     }
1087     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1088    
1089     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1090     {
1091     SBBWrr(s, d);
1092     }
1093     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1094    
1095     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1096     {
1097     SBBBrr(s, d);
1098     }
1099     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1100    
1101     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1102     {
1103     SUBLrr(s, d);
1104     }
1105     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1106    
1107     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1108     {
1109     SUBWrr(s, d);
1110     }
1111     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1112    
1113     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1114     {
1115     SUBBrr(s, d);
1116     }
1117     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1118    
1119     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1120     {
1121     CMPLrr(s, d);
1122     }
1123     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1124    
1125     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1126     {
1127     CMPLir(i, r);
1128     }
1129     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1130    
1131     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1132     {
1133     CMPWrr(s, d);
1134     }
1135     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1136    
1137     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1138     {
1139     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1140     }
1141     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1142    
1143     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1144     {
1145     CMPBir(i, d);
1146     }
1147     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1148    
1149     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1150     {
1151     CMPBrr(s, d);
1152     }
1153     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1154    
1155     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1156     {
1157 gbeauche 1.35 ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d);
1158 gbeauche 1.13 }
1159     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1160    
1161     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1162     {
1163     XORLrr(s, d);
1164     }
1165     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1166    
1167     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1168     {
1169     XORWrr(s, d);
1170     }
1171     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1172    
1173     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1174     {
1175     XORBrr(s, d);
1176     }
1177     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1178    
1179     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1180     {
1181     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1182     }
1183     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1184    
1185     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1186     {
1187     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1188     }
1189     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1190    
1191     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1192     {
1193     XCHGLrr(r2, r1);
1194     }
1195     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1196    
1197 gbeauche 1.36 LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1198     {
1199     XCHGBrr(r2, r1);
1200     }
1201     LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1202    
1203 gbeauche 1.13 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1204     {
1205 gbeauche 1.18 PUSHF();
1206 gbeauche 1.13 }
1207     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1208    
1209     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1210     {
1211 gbeauche 1.18 POPF();
1212 gbeauche 1.13 }
1213     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1214    
1215 gbeauche 1.34 /* Generate floating-point instructions */
1216     static inline void x86_fadd_m(MEMR s)
1217     {
1218     FADDLm(s,X86_NOREG,X86_NOREG,1);
1219     }
1220    
1221 gbeauche 1.13 #else
1222    
1223 gbeauche 1.2 const bool optimize_accum = true;
1224 gbeauche 1.1 const bool optimize_imm8 = true;
1225     const bool optimize_shift_once = true;
1226    
1227     /*************************************************************************
1228     * Actual encoding of the instructions on the target CPU *
1229     *************************************************************************/
1230    
1231 gbeauche 1.2 static __inline__ int isaccum(int r)
1232     {
1233     return (r == EAX_INDEX);
1234     }
1235    
1236 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1237     {
1238     return (x>=-128 && x<=127);
1239     }
1240    
1241     static __inline__ int isword(uae_s32 x)
1242     {
1243     return (x>=-32768 && x<=32767);
1244     }
1245    
1246     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1247     {
1248     emit_byte(0x50+r);
1249     }
1250     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1251    
1252     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1253     {
1254     emit_byte(0x58+r);
1255     }
1256     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1257    
1258 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1259     {
1260     emit_byte(0x8f);
1261     emit_byte(0x05);
1262     emit_long(d);
1263     }
1264     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1265    
1266 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1267     {
1268     emit_byte(0x0f);
1269     emit_byte(0xba);
1270     emit_byte(0xe0+r);
1271     emit_byte(i);
1272     }
1273     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1274    
1275     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1276     {
1277     emit_byte(0x0f);
1278     emit_byte(0xa3);
1279     emit_byte(0xc0+8*b+r);
1280     }
1281     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1282    
1283     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1284     {
1285     emit_byte(0x0f);
1286     emit_byte(0xba);
1287     emit_byte(0xf8+r);
1288     emit_byte(i);
1289     }
1290     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1291    
1292     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1293     {
1294     emit_byte(0x0f);
1295     emit_byte(0xbb);
1296     emit_byte(0xc0+8*b+r);
1297     }
1298     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1299    
1300    
1301     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1302     {
1303     emit_byte(0x0f);
1304     emit_byte(0xba);
1305     emit_byte(0xf0+r);
1306     emit_byte(i);
1307     }
1308     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1309    
1310     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1311     {
1312     emit_byte(0x0f);
1313     emit_byte(0xb3);
1314     emit_byte(0xc0+8*b+r);
1315     }
1316     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1317    
1318     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1319     {
1320     emit_byte(0x0f);
1321     emit_byte(0xba);
1322     emit_byte(0xe8+r);
1323     emit_byte(i);
1324     }
1325     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1326    
1327     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1328     {
1329     emit_byte(0x0f);
1330     emit_byte(0xab);
1331     emit_byte(0xc0+8*b+r);
1332     }
1333     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1334    
1335     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1336     {
1337     emit_byte(0x66);
1338     if (isbyte(i)) {
1339     emit_byte(0x83);
1340     emit_byte(0xe8+d);
1341     emit_byte(i);
1342     }
1343     else {
1344 gbeauche 1.2 if (optimize_accum && isaccum(d))
1345     emit_byte(0x2d);
1346     else {
1347 gbeauche 1.1 emit_byte(0x81);
1348     emit_byte(0xe8+d);
1349 gbeauche 1.2 }
1350 gbeauche 1.1 emit_word(i);
1351     }
1352     }
1353     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1354    
1355    
1356     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1357     {
1358     emit_byte(0x8b);
1359     emit_byte(0x05+8*d);
1360     emit_long(s);
1361     }
1362     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1363    
1364     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1365     {
1366     emit_byte(0xc7);
1367     emit_byte(0x05);
1368     emit_long(d);
1369     emit_long(s);
1370     }
1371     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1372    
1373     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1374     {
1375     emit_byte(0x66);
1376     emit_byte(0xc7);
1377     emit_byte(0x05);
1378     emit_long(d);
1379     emit_word(s);
1380     }
1381     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1382    
1383     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1384     {
1385     emit_byte(0xc6);
1386     emit_byte(0x05);
1387     emit_long(d);
1388     emit_byte(s);
1389     }
1390     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1391    
1392     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1393     {
1394     if (optimize_shift_once && (i == 1)) {
1395     emit_byte(0xd0);
1396     emit_byte(0x05);
1397     emit_long(d);
1398     }
1399     else {
1400     emit_byte(0xc0);
1401     emit_byte(0x05);
1402     emit_long(d);
1403     emit_byte(i);
1404     }
1405     }
1406     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1407    
1408     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1409     {
1410     if (optimize_shift_once && (i == 1)) {
1411     emit_byte(0xd0);
1412     emit_byte(0xc0+r);
1413     }
1414     else {
1415     emit_byte(0xc0);
1416     emit_byte(0xc0+r);
1417     emit_byte(i);
1418     }
1419     }
1420     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1421    
1422     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1423     {
1424     emit_byte(0x66);
1425     emit_byte(0xc1);
1426     emit_byte(0xc0+r);
1427     emit_byte(i);
1428     }
1429     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1430    
1431     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1432     {
1433     if (optimize_shift_once && (i == 1)) {
1434     emit_byte(0xd1);
1435     emit_byte(0xc0+r);
1436     }
1437     else {
1438     emit_byte(0xc1);
1439     emit_byte(0xc0+r);
1440     emit_byte(i);
1441     }
1442     }
1443     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1444    
1445     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1446     {
1447     emit_byte(0xd3);
1448     emit_byte(0xc0+d);
1449     }
1450     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1451    
1452     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1453     {
1454     emit_byte(0x66);
1455     emit_byte(0xd3);
1456     emit_byte(0xc0+d);
1457     }
1458     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1459    
1460     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1461     {
1462     emit_byte(0xd2);
1463     emit_byte(0xc0+d);
1464     }
1465     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1466    
1467     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1468     {
1469     emit_byte(0xd3);
1470     emit_byte(0xe0+d);
1471     }
1472     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1473    
1474     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1475     {
1476     emit_byte(0x66);
1477     emit_byte(0xd3);
1478     emit_byte(0xe0+d);
1479     }
1480     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1481    
1482     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1483     {
1484     emit_byte(0xd2);
1485     emit_byte(0xe0+d);
1486     }
1487     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1488    
1489     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1490     {
1491     if (optimize_shift_once && (i == 1)) {
1492     emit_byte(0xd0);
1493     emit_byte(0xc8+r);
1494     }
1495     else {
1496     emit_byte(0xc0);
1497     emit_byte(0xc8+r);
1498     emit_byte(i);
1499     }
1500     }
1501     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1502    
1503     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1504     {
1505     emit_byte(0x66);
1506     emit_byte(0xc1);
1507     emit_byte(0xc8+r);
1508     emit_byte(i);
1509     }
1510     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1511    
1512     // gb-- used for making an fpcr value in compemu_fpp.cpp
1513     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1514     {
1515     emit_byte(0x0b);
1516     emit_byte(0x05+8*d);
1517     emit_long(s);
1518     }
1519     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1520    
1521     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1522     {
1523     if (optimize_shift_once && (i == 1)) {
1524     emit_byte(0xd1);
1525     emit_byte(0xc8+r);
1526     }
1527     else {
1528     emit_byte(0xc1);
1529     emit_byte(0xc8+r);
1530     emit_byte(i);
1531     }
1532     }
1533     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1534    
1535     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1536     {
1537     emit_byte(0xd3);
1538     emit_byte(0xc8+d);
1539     }
1540     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1541    
1542     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1543     {
1544     emit_byte(0x66);
1545     emit_byte(0xd3);
1546     emit_byte(0xc8+d);
1547     }
1548     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1549    
1550     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1551     {
1552     emit_byte(0xd2);
1553     emit_byte(0xc8+d);
1554     }
1555     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1556    
1557     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1558     {
1559     emit_byte(0xd3);
1560     emit_byte(0xe8+d);
1561     }
1562     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1563    
1564     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1565     {
1566     emit_byte(0x66);
1567     emit_byte(0xd3);
1568     emit_byte(0xe8+d);
1569     }
1570     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1571    
1572     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1573     {
1574     emit_byte(0xd2);
1575     emit_byte(0xe8+d);
1576     }
1577     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1578    
1579     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1580     {
1581     emit_byte(0xd3);
1582     emit_byte(0xf8+d);
1583     }
1584     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1585    
1586     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1587     {
1588     emit_byte(0x66);
1589     emit_byte(0xd3);
1590     emit_byte(0xf8+d);
1591     }
1592     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1593    
1594     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1595     {
1596     emit_byte(0xd2);
1597     emit_byte(0xf8+d);
1598     }
1599     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1600    
1601     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1602     {
1603     if (optimize_shift_once && (i == 1)) {
1604     emit_byte(0xd1);
1605     emit_byte(0xe0+r);
1606     }
1607     else {
1608     emit_byte(0xc1);
1609     emit_byte(0xe0+r);
1610     emit_byte(i);
1611     }
1612     }
1613     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1614    
1615     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1616     {
1617     emit_byte(0x66);
1618     emit_byte(0xc1);
1619     emit_byte(0xe0+r);
1620     emit_byte(i);
1621     }
1622     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1623    
1624     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1625     {
1626     if (optimize_shift_once && (i == 1)) {
1627     emit_byte(0xd0);
1628     emit_byte(0xe0+r);
1629     }
1630     else {
1631     emit_byte(0xc0);
1632     emit_byte(0xe0+r);
1633     emit_byte(i);
1634     }
1635     }
1636     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1637    
1638     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1639     {
1640     if (optimize_shift_once && (i == 1)) {
1641     emit_byte(0xd1);
1642     emit_byte(0xe8+r);
1643     }
1644     else {
1645     emit_byte(0xc1);
1646     emit_byte(0xe8+r);
1647     emit_byte(i);
1648     }
1649     }
1650     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1651    
1652     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1653     {
1654     emit_byte(0x66);
1655     emit_byte(0xc1);
1656     emit_byte(0xe8+r);
1657     emit_byte(i);
1658     }
1659     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1660    
1661     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1662     {
1663     if (optimize_shift_once && (i == 1)) {
1664     emit_byte(0xd0);
1665     emit_byte(0xe8+r);
1666     }
1667     else {
1668     emit_byte(0xc0);
1669     emit_byte(0xe8+r);
1670     emit_byte(i);
1671     }
1672     }
1673     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1674    
1675     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1676     {
1677     if (optimize_shift_once && (i == 1)) {
1678     emit_byte(0xd1);
1679     emit_byte(0xf8+r);
1680     }
1681     else {
1682     emit_byte(0xc1);
1683     emit_byte(0xf8+r);
1684     emit_byte(i);
1685     }
1686     }
1687     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1688    
1689     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1690     {
1691     emit_byte(0x66);
1692     emit_byte(0xc1);
1693     emit_byte(0xf8+r);
1694     emit_byte(i);
1695     }
1696     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1697    
1698     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1699     {
1700     if (optimize_shift_once && (i == 1)) {
1701     emit_byte(0xd0);
1702     emit_byte(0xf8+r);
1703     }
1704     else {
1705     emit_byte(0xc0);
1706     emit_byte(0xf8+r);
1707     emit_byte(i);
1708     }
1709     }
1710     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1711    
1712     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1713     {
1714     emit_byte(0x9e);
1715     }
1716     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1717    
1718     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1719     {
1720     emit_byte(0x0f);
1721     emit_byte(0xa2);
1722     }
1723     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1724    
1725     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1726     {
1727     emit_byte(0x9f);
1728     }
1729     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1730    
1731     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1732     {
1733     emit_byte(0x0f);
1734     emit_byte(0x90+cc);
1735     emit_byte(0xc0+d);
1736     }
1737     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1738    
1739     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1740     {
1741     emit_byte(0x0f);
1742     emit_byte(0x90+cc);
1743     emit_byte(0x05);
1744     emit_long(d);
1745     }
1746     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1747    
1748     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1749     {
1750     if (have_cmov) {
1751     emit_byte(0x0f);
1752     emit_byte(0x40+cc);
1753     emit_byte(0xc0+8*d+s);
1754     }
1755     else { /* replacement using branch and mov */
1756     int uncc=(cc^1);
1757     emit_byte(0x70+uncc);
1758     emit_byte(2); /* skip next 2 bytes if not cc=true */
1759     emit_byte(0x89);
1760     emit_byte(0xc0+8*s+d);
1761     }
1762     }
1763     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1764    
1765     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1766     {
1767     emit_byte(0x0f);
1768     emit_byte(0xbc);
1769     emit_byte(0xc0+8*d+s);
1770     }
1771     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1772    
1773     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1774     {
1775     emit_byte(0x0f);
1776     emit_byte(0xbf);
1777     emit_byte(0xc0+8*d+s);
1778     }
1779     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1780    
1781     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1782     {
1783     emit_byte(0x0f);
1784     emit_byte(0xbe);
1785     emit_byte(0xc0+8*d+s);
1786     }
1787     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1788    
1789     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1790     {
1791     emit_byte(0x0f);
1792     emit_byte(0xb7);
1793     emit_byte(0xc0+8*d+s);
1794     }
1795     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1796    
1797     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1798     {
1799     emit_byte(0x0f);
1800     emit_byte(0xb6);
1801     emit_byte(0xc0+8*d+s);
1802     }
1803     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1804    
1805     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1806     {
1807     emit_byte(0x0f);
1808     emit_byte(0xaf);
1809     emit_byte(0xc0+8*d+s);
1810     }
1811     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1812    
1813     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1814     {
1815     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1816     abort();
1817     emit_byte(0xf7);
1818     emit_byte(0xea);
1819     }
1820     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1821    
1822     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1823     {
1824     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1825     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1826     abort();
1827     }
1828     emit_byte(0xf7);
1829     emit_byte(0xe2);
1830     }
1831     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1832    
1833     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1834     {
1835     abort(); /* %^$&%^$%#^ x86! */
1836     emit_byte(0x0f);
1837     emit_byte(0xaf);
1838     emit_byte(0xc0+8*d+s);
1839     }
1840     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1841    
1842     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1843     {
1844     emit_byte(0x88);
1845     emit_byte(0xc0+8*s+d);
1846     }
1847     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1848    
1849     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1850     {
1851     emit_byte(0x66);
1852     emit_byte(0x89);
1853     emit_byte(0xc0+8*s+d);
1854     }
1855     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1856    
1857     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1858     {
1859     int isebp=(baser==5)?0x40:0;
1860     int fi;
1861    
1862     switch(factor) {
1863     case 1: fi=0; break;
1864     case 2: fi=1; break;
1865     case 4: fi=2; break;
1866     case 8: fi=3; break;
1867     default: abort();
1868     }
1869    
1870    
1871     emit_byte(0x8b);
1872     emit_byte(0x04+8*d+isebp);
1873     emit_byte(baser+8*index+0x40*fi);
1874     if (isebp)
1875     emit_byte(0x00);
1876     }
1877     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1878    
1879     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1880     {
1881     int fi;
1882     int isebp;
1883    
1884     switch(factor) {
1885     case 1: fi=0; break;
1886     case 2: fi=1; break;
1887     case 4: fi=2; break;
1888     case 8: fi=3; break;
1889     default: abort();
1890     }
1891     isebp=(baser==5)?0x40:0;
1892    
1893     emit_byte(0x66);
1894     emit_byte(0x8b);
1895     emit_byte(0x04+8*d+isebp);
1896     emit_byte(baser+8*index+0x40*fi);
1897     if (isebp)
1898     emit_byte(0x00);
1899     }
1900     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1901    
1902     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1903     {
1904     int fi;
1905     int isebp;
1906    
1907     switch(factor) {
1908     case 1: fi=0; break;
1909     case 2: fi=1; break;
1910     case 4: fi=2; break;
1911     case 8: fi=3; break;
1912     default: abort();
1913     }
1914     isebp=(baser==5)?0x40:0;
1915    
1916     emit_byte(0x8a);
1917     emit_byte(0x04+8*d+isebp);
1918     emit_byte(baser+8*index+0x40*fi);
1919     if (isebp)
1920     emit_byte(0x00);
1921     }
1922     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1923    
1924     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1925     {
1926     int fi;
1927     int isebp;
1928    
1929     switch(factor) {
1930     case 1: fi=0; break;
1931     case 2: fi=1; break;
1932     case 4: fi=2; break;
1933     case 8: fi=3; break;
1934     default: abort();
1935     }
1936    
1937    
1938     isebp=(baser==5)?0x40:0;
1939    
1940     emit_byte(0x89);
1941     emit_byte(0x04+8*s+isebp);
1942     emit_byte(baser+8*index+0x40*fi);
1943     if (isebp)
1944     emit_byte(0x00);
1945     }
1946     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1947    
1948     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1949     {
1950     int fi;
1951     int isebp;
1952    
1953     switch(factor) {
1954     case 1: fi=0; break;
1955     case 2: fi=1; break;
1956     case 4: fi=2; break;
1957     case 8: fi=3; break;
1958     default: abort();
1959     }
1960     isebp=(baser==5)?0x40:0;
1961    
1962     emit_byte(0x66);
1963     emit_byte(0x89);
1964     emit_byte(0x04+8*s+isebp);
1965     emit_byte(baser+8*index+0x40*fi);
1966     if (isebp)
1967     emit_byte(0x00);
1968     }
1969     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1970    
1971     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1972     {
1973     int fi;
1974     int isebp;
1975    
1976     switch(factor) {
1977     case 1: fi=0; break;
1978     case 2: fi=1; break;
1979     case 4: fi=2; break;
1980     case 8: fi=3; break;
1981     default: abort();
1982     }
1983     isebp=(baser==5)?0x40:0;
1984    
1985     emit_byte(0x88);
1986     emit_byte(0x04+8*s+isebp);
1987     emit_byte(baser+8*index+0x40*fi);
1988     if (isebp)
1989     emit_byte(0x00);
1990     }
1991     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1992    
1993     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1994     {
1995     int fi;
1996    
1997     switch(factor) {
1998     case 1: fi=0; break;
1999     case 2: fi=1; break;
2000     case 4: fi=2; break;
2001     case 8: fi=3; break;
2002     default: abort();
2003     }
2004    
2005     emit_byte(0x89);
2006     emit_byte(0x84+8*s);
2007     emit_byte(baser+8*index+0x40*fi);
2008     emit_long(base);
2009     }
2010     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2011    
2012     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2013     {
2014     int fi;
2015    
2016     switch(factor) {
2017     case 1: fi=0; break;
2018     case 2: fi=1; break;
2019     case 4: fi=2; break;
2020     case 8: fi=3; break;
2021     default: abort();
2022     }
2023    
2024     emit_byte(0x66);
2025     emit_byte(0x89);
2026     emit_byte(0x84+8*s);
2027     emit_byte(baser+8*index+0x40*fi);
2028     emit_long(base);
2029     }
2030     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2031    
2032     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2033     {
2034     int fi;
2035    
2036     switch(factor) {
2037     case 1: fi=0; break;
2038     case 2: fi=1; break;
2039     case 4: fi=2; break;
2040     case 8: fi=3; break;
2041     default: abort();
2042     }
2043    
2044     emit_byte(0x88);
2045     emit_byte(0x84+8*s);
2046     emit_byte(baser+8*index+0x40*fi);
2047     emit_long(base);
2048     }
2049     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2050    
2051     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2052     {
2053     int fi;
2054    
2055     switch(factor) {
2056     case 1: fi=0; break;
2057     case 2: fi=1; break;
2058     case 4: fi=2; break;
2059     case 8: fi=3; break;
2060     default: abort();
2061     }
2062    
2063     emit_byte(0x8b);
2064     emit_byte(0x84+8*d);
2065     emit_byte(baser+8*index+0x40*fi);
2066     emit_long(base);
2067     }
2068     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2069    
2070     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2071     {
2072     int fi;
2073    
2074     switch(factor) {
2075     case 1: fi=0; break;
2076     case 2: fi=1; break;
2077     case 4: fi=2; break;
2078     case 8: fi=3; break;
2079     default: abort();
2080     }
2081    
2082     emit_byte(0x66);
2083     emit_byte(0x8b);
2084     emit_byte(0x84+8*d);
2085     emit_byte(baser+8*index+0x40*fi);
2086     emit_long(base);
2087     }
2088     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2089    
2090     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2091     {
2092     int fi;
2093    
2094     switch(factor) {
2095     case 1: fi=0; break;
2096     case 2: fi=1; break;
2097     case 4: fi=2; break;
2098     case 8: fi=3; break;
2099     default: abort();
2100     }
2101    
2102     emit_byte(0x8a);
2103     emit_byte(0x84+8*d);
2104     emit_byte(baser+8*index+0x40*fi);
2105     emit_long(base);
2106     }
2107     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2108    
2109     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2110     {
2111     int fi;
2112     switch(factor) {
2113     case 1: fi=0; break;
2114     case 2: fi=1; break;
2115     case 4: fi=2; break;
2116     case 8: fi=3; break;
2117     default:
2118     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2119     abort();
2120     }
2121     emit_byte(0x8b);
2122     emit_byte(0x04+8*d);
2123     emit_byte(0x05+8*index+64*fi);
2124     emit_long(base);
2125     }
2126     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2127    
2128     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2129     {
2130     int fi;
2131     switch(factor) {
2132     case 1: fi=0; break;
2133     case 2: fi=1; break;
2134     case 4: fi=2; break;
2135     case 8: fi=3; break;
2136     default:
2137     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2138     abort();
2139     }
2140     if (have_cmov) {
2141     emit_byte(0x0f);
2142     emit_byte(0x40+cond);
2143     emit_byte(0x04+8*d);
2144     emit_byte(0x05+8*index+64*fi);
2145     emit_long(base);
2146     }
2147     else { /* replacement using branch and mov */
2148     int uncc=(cond^1);
2149     emit_byte(0x70+uncc);
2150     emit_byte(7); /* skip next 7 bytes if not cc=true */
2151     emit_byte(0x8b);
2152     emit_byte(0x04+8*d);
2153     emit_byte(0x05+8*index+64*fi);
2154     emit_long(base);
2155     }
2156     }
2157     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2158    
2159     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2160     {
2161     if (have_cmov) {
2162     emit_byte(0x0f);
2163     emit_byte(0x40+cond);
2164     emit_byte(0x05+8*d);
2165     emit_long(mem);
2166     }
2167     else { /* replacement using branch and mov */
2168     int uncc=(cond^1);
2169     emit_byte(0x70+uncc);
2170     emit_byte(6); /* skip next 6 bytes if not cc=true */
2171     emit_byte(0x8b);
2172     emit_byte(0x05+8*d);
2173     emit_long(mem);
2174     }
2175     }
2176     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2177    
2178     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2179     {
2180 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2181 gbeauche 1.1 emit_byte(0x8b);
2182     emit_byte(0x40+8*d+s);
2183     emit_byte(offset);
2184     }
2185     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2186    
2187     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2188     {
2189 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2190 gbeauche 1.1 emit_byte(0x66);
2191     emit_byte(0x8b);
2192     emit_byte(0x40+8*d+s);
2193     emit_byte(offset);
2194     }
2195     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2196    
2197     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2198     {
2199 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2200 gbeauche 1.1 emit_byte(0x8a);
2201     emit_byte(0x40+8*d+s);
2202     emit_byte(offset);
2203     }
2204     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2205    
2206     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2207     {
2208     emit_byte(0x8b);
2209     emit_byte(0x80+8*d+s);
2210     emit_long(offset);
2211     }
2212     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2213    
2214     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2215     {
2216     emit_byte(0x66);
2217     emit_byte(0x8b);
2218     emit_byte(0x80+8*d+s);
2219     emit_long(offset);
2220     }
2221     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2222    
2223     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2224     {
2225     emit_byte(0x8a);
2226     emit_byte(0x80+8*d+s);
2227     emit_long(offset);
2228     }
2229     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2230    
2231     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2232     {
2233 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2234 gbeauche 1.1 emit_byte(0xc7);
2235     emit_byte(0x40+d);
2236     emit_byte(offset);
2237     emit_long(i);
2238     }
2239     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2240    
2241     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2242     {
2243 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2244 gbeauche 1.1 emit_byte(0x66);
2245     emit_byte(0xc7);
2246     emit_byte(0x40+d);
2247     emit_byte(offset);
2248     emit_word(i);
2249     }
2250     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2251    
2252     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2253     {
2254 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2255 gbeauche 1.1 emit_byte(0xc6);
2256     emit_byte(0x40+d);
2257     emit_byte(offset);
2258     emit_byte(i);
2259     }
2260     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2261    
2262     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2263     {
2264 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2265 gbeauche 1.1 emit_byte(0x89);
2266     emit_byte(0x40+8*s+d);
2267     emit_byte(offset);
2268     }
2269     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2270    
2271     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2272     {
2273 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2274 gbeauche 1.1 emit_byte(0x66);
2275     emit_byte(0x89);
2276     emit_byte(0x40+8*s+d);
2277     emit_byte(offset);
2278     }
2279     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2280    
2281     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2282     {
2283 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2284 gbeauche 1.1 emit_byte(0x88);
2285     emit_byte(0x40+8*s+d);
2286     emit_byte(offset);
2287     }
2288     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2289    
2290     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2291     {
2292     if (optimize_imm8 && isbyte(offset)) {
2293     emit_byte(0x8d);
2294     emit_byte(0x40+8*d+s);
2295     emit_byte(offset);
2296     }
2297     else {
2298     emit_byte(0x8d);
2299     emit_byte(0x80+8*d+s);
2300     emit_long(offset);
2301     }
2302     }
2303     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2304    
2305     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2306     {
2307     int fi;
2308    
2309     switch(factor) {
2310     case 1: fi=0; break;
2311     case 2: fi=1; break;
2312     case 4: fi=2; break;
2313     case 8: fi=3; break;
2314     default: abort();
2315     }
2316    
2317     if (optimize_imm8 && isbyte(offset)) {
2318     emit_byte(0x8d);
2319     emit_byte(0x44+8*d);
2320     emit_byte(0x40*fi+8*index+s);
2321     emit_byte(offset);
2322     }
2323     else {
2324     emit_byte(0x8d);
2325     emit_byte(0x84+8*d);
2326     emit_byte(0x40*fi+8*index+s);
2327     emit_long(offset);
2328     }
2329     }
2330     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2331    
2332     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2333     {
2334     int isebp=(s==5)?0x40:0;
2335     int fi;
2336    
2337     switch(factor) {
2338     case 1: fi=0; break;
2339     case 2: fi=1; break;
2340     case 4: fi=2; break;
2341     case 8: fi=3; break;
2342     default: abort();
2343     }
2344    
2345     emit_byte(0x8d);
2346     emit_byte(0x04+8*d+isebp);
2347     emit_byte(0x40*fi+8*index+s);
2348     if (isebp)
2349     emit_byte(0);
2350     }
2351     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2352    
2353     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2354     {
2355     if (optimize_imm8 && isbyte(offset)) {
2356     emit_byte(0x89);
2357     emit_byte(0x40+8*s+d);
2358     emit_byte(offset);
2359     }
2360     else {
2361     emit_byte(0x89);
2362     emit_byte(0x80+8*s+d);
2363     emit_long(offset);
2364     }
2365     }
2366     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2367    
2368     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2369     {
2370     emit_byte(0x66);
2371     emit_byte(0x89);
2372     emit_byte(0x80+8*s+d);
2373     emit_long(offset);
2374     }
2375     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2376    
2377     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2378     {
2379     if (optimize_imm8 && isbyte(offset)) {
2380     emit_byte(0x88);
2381     emit_byte(0x40+8*s+d);
2382     emit_byte(offset);
2383     }
2384     else {
2385     emit_byte(0x88);
2386     emit_byte(0x80+8*s+d);
2387     emit_long(offset);
2388     }
2389     }
2390     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2391    
2392     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2393     {
2394     emit_byte(0x0f);
2395     emit_byte(0xc8+r);
2396     }
2397     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2398    
2399     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2400     {
2401     emit_byte(0x66);
2402     emit_byte(0xc1);
2403     emit_byte(0xc0+r);
2404     emit_byte(0x08);
2405     }
2406     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2407    
2408     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2409     {
2410     emit_byte(0x89);
2411     emit_byte(0xc0+8*s+d);
2412     }
2413     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2414    
2415     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2416     {
2417     emit_byte(0x89);
2418     emit_byte(0x05+8*s);
2419     emit_long(d);
2420     }
2421     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2422    
2423     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2424     {
2425     emit_byte(0x66);
2426     emit_byte(0x89);
2427     emit_byte(0x05+8*s);
2428     emit_long(d);
2429     }
2430     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2431    
2432     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2433     {
2434     emit_byte(0x66);
2435     emit_byte(0x8b);
2436     emit_byte(0x05+8*d);
2437     emit_long(s);
2438     }
2439     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2440    
2441     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2442     {
2443     emit_byte(0x88);
2444 gbeauche 1.33 emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
2445 gbeauche 1.1 emit_long(d);
2446     }
2447     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2448    
2449     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2450     {
2451     emit_byte(0x8a);
2452     emit_byte(0x05+8*d);
2453     emit_long(s);
2454     }
2455     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2456    
2457     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2458     {
2459     emit_byte(0xb8+d);
2460     emit_long(s);
2461     }
2462     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2463    
2464     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2465     {
2466     emit_byte(0x66);
2467     emit_byte(0xb8+d);
2468     emit_word(s);
2469     }
2470     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2471    
2472     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2473     {
2474     emit_byte(0xb0+d);
2475     emit_byte(s);
2476     }
2477     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2478    
2479     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2480     {
2481     emit_byte(0x81);
2482     emit_byte(0x15);
2483     emit_long(d);
2484     emit_long(s);
2485     }
2486     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2487    
2488     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2489     {
2490     if (optimize_imm8 && isbyte(s)) {
2491     emit_byte(0x83);
2492     emit_byte(0x05);
2493     emit_long(d);
2494     emit_byte(s);
2495     }
2496     else {
2497     emit_byte(0x81);
2498     emit_byte(0x05);
2499     emit_long(d);
2500     emit_long(s);
2501     }
2502     }
2503     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2504    
2505     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2506     {
2507     emit_byte(0x66);
2508     emit_byte(0x81);
2509     emit_byte(0x05);
2510     emit_long(d);
2511     emit_word(s);
2512     }
2513     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2514    
2515     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2516     {
2517     emit_byte(0x80);
2518     emit_byte(0x05);
2519     emit_long(d);
2520     emit_byte(s);
2521     }
2522     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2523    
2524     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2525     {
2526 gbeauche 1.2 if (optimize_accum && isaccum(d))
2527     emit_byte(0xa9);
2528     else {
2529 gbeauche 1.1 emit_byte(0xf7);
2530     emit_byte(0xc0+d);
2531 gbeauche 1.2 }
2532 gbeauche 1.1 emit_long(i);
2533     }
2534     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2535    
2536     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2537     {
2538     emit_byte(0x85);
2539     emit_byte(0xc0+8*s+d);
2540     }
2541     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2542    
2543     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2544     {
2545     emit_byte(0x66);
2546     emit_byte(0x85);
2547     emit_byte(0xc0+8*s+d);
2548     }
2549     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2550    
2551     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2552     {
2553     emit_byte(0x84);
2554     emit_byte(0xc0+8*s+d);
2555     }
2556     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2557    
2558 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2559     {
2560     emit_byte(0x81);
2561     emit_byte(0xf0+d);
2562     emit_long(i);
2563     }
2564     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2565    
2566 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2567     {
2568     if (optimize_imm8 && isbyte(i)) {
2569 gbeauche 1.2 emit_byte(0x83);
2570     emit_byte(0xe0+d);
2571     emit_byte(i);
2572 gbeauche 1.1 }
2573     else {
2574 gbeauche 1.2 if (optimize_accum && isaccum(d))
2575     emit_byte(0x25);
2576     else {
2577     emit_byte(0x81);
2578     emit_byte(0xe0+d);
2579     }
2580     emit_long(i);
2581 gbeauche 1.1 }
2582     }
2583     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2584    
2585     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2586     {
2587 gbeauche 1.2 emit_byte(0x66);
2588     if (optimize_imm8 && isbyte(i)) {
2589     emit_byte(0x83);
2590     emit_byte(0xe0+d);
2591     emit_byte(i);
2592     }
2593     else {
2594     if (optimize_accum && isaccum(d))
2595     emit_byte(0x25);
2596     else {
2597     emit_byte(0x81);
2598     emit_byte(0xe0+d);
2599     }
2600     emit_word(i);
2601     }
2602 gbeauche 1.1 }
2603     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2604    
2605     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2606     {
2607     emit_byte(0x21);
2608     emit_byte(0xc0+8*s+d);
2609     }
2610     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2611    
2612     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2613     {
2614     emit_byte(0x66);
2615     emit_byte(0x21);
2616     emit_byte(0xc0+8*s+d);
2617     }
2618     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2619    
2620     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2621     {
2622     emit_byte(0x20);
2623     emit_byte(0xc0+8*s+d);
2624     }
2625     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2626    
2627     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2628     {
2629     if (optimize_imm8 && isbyte(i)) {
2630     emit_byte(0x83);
2631     emit_byte(0xc8+d);
2632     emit_byte(i);
2633     }
2634     else {
2635 gbeauche 1.2 if (optimize_accum && isaccum(d))
2636     emit_byte(0x0d);
2637     else {
2638 gbeauche 1.1 emit_byte(0x81);
2639     emit_byte(0xc8+d);
2640 gbeauche 1.2 }
2641 gbeauche 1.1 emit_long(i);
2642     }
2643     }
2644     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2645    
2646     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2647     {
2648     emit_byte(0x09);
2649     emit_byte(0xc0+8*s+d);
2650     }
2651     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2652    
2653     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2654     {
2655     emit_byte(0x66);
2656     emit_byte(0x09);
2657     emit_byte(0xc0+8*s+d);
2658     }
2659     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2660    
2661     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2662     {
2663     emit_byte(0x08);
2664     emit_byte(0xc0+8*s+d);
2665     }
2666     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2667    
2668     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2669     {
2670     emit_byte(0x11);
2671     emit_byte(0xc0+8*s+d);
2672     }
2673     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2674    
2675     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2676     {
2677     emit_byte(0x66);
2678     emit_byte(0x11);
2679     emit_byte(0xc0+8*s+d);
2680     }
2681     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2682    
2683     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2684     {
2685     emit_byte(0x10);
2686     emit_byte(0xc0+8*s+d);
2687     }
2688     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2689    
2690     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2691     {
2692     emit_byte(0x01);
2693     emit_byte(0xc0+8*s+d);
2694     }
2695     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2696    
2697     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2698     {
2699     emit_byte(0x66);
2700     emit_byte(0x01);
2701     emit_byte(0xc0+8*s+d);
2702     }
2703     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2704    
2705     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2706     {
2707     emit_byte(0x00);
2708     emit_byte(0xc0+8*s+d);
2709     }
2710     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2711    
2712     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2713     {
2714     if (isbyte(i)) {
2715     emit_byte(0x83);
2716     emit_byte(0xe8+d);
2717     emit_byte(i);
2718     }
2719     else {
2720 gbeauche 1.2 if (optimize_accum && isaccum(d))
2721     emit_byte(0x2d);
2722     else {
2723 gbeauche 1.1 emit_byte(0x81);
2724     emit_byte(0xe8+d);
2725 gbeauche 1.2 }
2726 gbeauche 1.1 emit_long(i);
2727     }
2728     }
2729     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2730    
2731     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2732     {
2733 gbeauche 1.2 if (optimize_accum && isaccum(d))
2734     emit_byte(0x2c);
2735     else {
2736 gbeauche 1.1 emit_byte(0x80);
2737     emit_byte(0xe8+d);
2738 gbeauche 1.2 }
2739 gbeauche 1.1 emit_byte(i);
2740     }
2741     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2742    
2743     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2744     {
2745     if (isbyte(i)) {
2746     emit_byte(0x83);
2747     emit_byte(0xc0+d);
2748     emit_byte(i);
2749     }
2750     else {
2751 gbeauche 1.2 if (optimize_accum && isaccum(d))
2752     emit_byte(0x05);
2753     else {
2754 gbeauche 1.1 emit_byte(0x81);
2755     emit_byte(0xc0+d);
2756 gbeauche 1.2 }
2757 gbeauche 1.1 emit_long(i);
2758     }
2759     }
2760     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2761    
2762     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2763     {
2764 gbeauche 1.2 emit_byte(0x66);
2765 gbeauche 1.1 if (isbyte(i)) {
2766     emit_byte(0x83);
2767     emit_byte(0xc0+d);
2768     emit_byte(i);
2769     }
2770     else {
2771 gbeauche 1.2 if (optimize_accum && isaccum(d))
2772     emit_byte(0x05);
2773     else {
2774 gbeauche 1.1 emit_byte(0x81);
2775     emit_byte(0xc0+d);
2776 gbeauche 1.2 }
2777 gbeauche 1.1 emit_word(i);
2778     }
2779     }
2780     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2781    
2782     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2783     {
2784 gbeauche 1.2 if (optimize_accum && isaccum(d))
2785     emit_byte(0x04);
2786     else {
2787     emit_byte(0x80);
2788     emit_byte(0xc0+d);
2789     }
2790 gbeauche 1.1 emit_byte(i);
2791     }
2792     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2793    
2794     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2795     {
2796     emit_byte(0x19);
2797     emit_byte(0xc0+8*s+d);
2798     }
2799     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2800    
2801     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2802     {
2803     emit_byte(0x66);
2804     emit_byte(0x19);
2805     emit_byte(0xc0+8*s+d);
2806     }
2807     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2808    
2809     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2810     {
2811     emit_byte(0x18);
2812     emit_byte(0xc0+8*s+d);
2813     }
2814     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2815    
2816     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2817     {
2818     emit_byte(0x29);
2819     emit_byte(0xc0+8*s+d);
2820     }
2821     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2822    
2823     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2824     {
2825     emit_byte(0x66);
2826     emit_byte(0x29);
2827     emit_byte(0xc0+8*s+d);
2828     }
2829     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2830    
2831     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2832     {
2833     emit_byte(0x28);
2834     emit_byte(0xc0+8*s+d);
2835     }
2836     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2837    
2838     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2839     {
2840     emit_byte(0x39);
2841     emit_byte(0xc0+8*s+d);
2842     }
2843     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2844    
2845     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2846     {
2847     if (optimize_imm8 && isbyte(i)) {
2848     emit_byte(0x83);
2849     emit_byte(0xf8+r);
2850     emit_byte(i);
2851     }
2852     else {
2853 gbeauche 1.2 if (optimize_accum && isaccum(r))
2854     emit_byte(0x3d);
2855     else {
2856 gbeauche 1.1 emit_byte(0x81);
2857     emit_byte(0xf8+r);
2858 gbeauche 1.2 }
2859 gbeauche 1.1 emit_long(i);
2860     }
2861     }
2862     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2863    
2864     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2865     {
2866     emit_byte(0x66);
2867     emit_byte(0x39);
2868     emit_byte(0xc0+8*s+d);
2869     }
2870     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2871    
2872 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2873     {
2874     emit_byte(0x80);
2875     emit_byte(0x3d);
2876     emit_long(d);
2877     emit_byte(s);
2878     }
2879     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2880    
2881 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2882     {
2883 gbeauche 1.2 if (optimize_accum && isaccum(d))
2884     emit_byte(0x3c);
2885     else {
2886 gbeauche 1.1 emit_byte(0x80);
2887     emit_byte(0xf8+d);
2888 gbeauche 1.2 }
2889 gbeauche 1.1 emit_byte(i);
2890     }
2891     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2892    
2893     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2894     {
2895     emit_byte(0x38);
2896     emit_byte(0xc0+8*s+d);
2897     }
2898     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2899    
2900     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2901     {
2902     int fi;
2903    
2904     switch(factor) {
2905     case 1: fi=0; break;
2906     case 2: fi=1; break;
2907     case 4: fi=2; break;
2908     case 8: fi=3; break;
2909     default: abort();
2910     }
2911     emit_byte(0x39);
2912     emit_byte(0x04+8*d);
2913     emit_byte(5+8*index+0x40*fi);
2914     emit_long(offset);
2915     }
2916     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2917    
2918     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2919     {
2920     emit_byte(0x31);
2921     emit_byte(0xc0+8*s+d);
2922     }
2923     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2924    
2925     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2926     {
2927     emit_byte(0x66);
2928     emit_byte(0x31);
2929     emit_byte(0xc0+8*s+d);
2930     }
2931     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2932    
2933     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2934     {
2935     emit_byte(0x30);
2936     emit_byte(0xc0+8*s+d);
2937     }
2938     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2939    
2940     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2941     {
2942     if (optimize_imm8 && isbyte(s)) {
2943     emit_byte(0x83);
2944     emit_byte(0x2d);
2945     emit_long(d);
2946     emit_byte(s);
2947     }
2948     else {
2949     emit_byte(0x81);
2950     emit_byte(0x2d);
2951     emit_long(d);
2952     emit_long(s);
2953     }
2954     }
2955     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2956    
2957     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2958     {
2959     if (optimize_imm8 && isbyte(s)) {
2960     emit_byte(0x83);
2961     emit_byte(0x3d);
2962     emit_long(d);
2963     emit_byte(s);
2964     }
2965     else {
2966     emit_byte(0x81);
2967     emit_byte(0x3d);
2968     emit_long(d);
2969     emit_long(s);
2970     }
2971     }
2972     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2973    
2974     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2975     {
2976     emit_byte(0x87);
2977     emit_byte(0xc0+8*r1+r2);
2978     }
2979     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2980    
2981 gbeauche 1.36 LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
2982     {
2983     emit_byte(0x86);
2984     emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */
2985     }
2986     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2987    
2988 gbeauche 1.1 /*************************************************************************
2989     * FIXME: mem access modes probably wrong *
2990     *************************************************************************/
2991    
2992     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2993     {
2994     emit_byte(0x9c);
2995     }
2996     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2997    
2998     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2999     {
3000     emit_byte(0x9d);
3001     }
3002     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
3003 gbeauche 1.13
3004 gbeauche 1.34 /* Generate floating-point instructions */
3005     static inline void x86_fadd_m(MEMR s)
3006     {
3007     emit_byte(0xdc);
3008     emit_byte(0x05);
3009     emit_long(s);
3010     }
3011    
3012 gbeauche 1.13 #endif
3013 gbeauche 1.1
3014     /*************************************************************************
3015     * Unoptimizable stuff --- jump *
3016     *************************************************************************/
3017    
3018     static __inline__ void raw_call_r(R4 r)
3019     {
3020 gbeauche 1.20 #if USE_NEW_RTASM
3021     CALLsr(r);
3022     #else
3023 gbeauche 1.1 emit_byte(0xff);
3024     emit_byte(0xd0+r);
3025 gbeauche 1.20 #endif
3026 gbeauche 1.5 }
3027    
3028     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3029     {
3030 gbeauche 1.20 #if USE_NEW_RTASM
3031     CALLsm(base, X86_NOREG, r, m);
3032     #else
3033 gbeauche 1.5 int mu;
3034     switch(m) {
3035     case 1: mu=0; break;
3036     case 2: mu=1; break;
3037     case 4: mu=2; break;
3038     case 8: mu=3; break;
3039     default: abort();
3040     }
3041     emit_byte(0xff);
3042     emit_byte(0x14);
3043     emit_byte(0x05+8*r+0x40*mu);
3044     emit_long(base);
3045 gbeauche 1.20 #endif
3046 gbeauche 1.1 }
3047    
3048     static __inline__ void raw_jmp_r(R4 r)
3049     {
3050 gbeauche 1.20 #if USE_NEW_RTASM
3051     JMPsr(r);
3052     #else
3053 gbeauche 1.1 emit_byte(0xff);
3054     emit_byte(0xe0+r);
3055 gbeauche 1.20 #endif
3056 gbeauche 1.1 }
3057    
3058     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3059     {
3060 gbeauche 1.20 #if USE_NEW_RTASM
3061     JMPsm(base, X86_NOREG, r, m);
3062     #else
3063 gbeauche 1.1 int mu;
3064     switch(m) {
3065     case 1: mu=0; break;
3066     case 2: mu=1; break;
3067     case 4: mu=2; break;
3068     case 8: mu=3; break;
3069     default: abort();
3070     }
3071     emit_byte(0xff);
3072     emit_byte(0x24);
3073     emit_byte(0x05+8*r+0x40*mu);
3074     emit_long(base);
3075 gbeauche 1.20 #endif
3076 gbeauche 1.1 }
3077    
3078     static __inline__ void raw_jmp_m(uae_u32 base)
3079     {
3080     emit_byte(0xff);
3081     emit_byte(0x25);
3082     emit_long(base);
3083     }
3084    
3085    
3086     static __inline__ void raw_call(uae_u32 t)
3087     {
3088 gbeauche 1.20 #if USE_NEW_RTASM
3089     CALLm(t);
3090     #else
3091 gbeauche 1.1 emit_byte(0xe8);
3092     emit_long(t-(uae_u32)target-4);
3093 gbeauche 1.20 #endif
3094 gbeauche 1.1 }
3095    
3096     static __inline__ void raw_jmp(uae_u32 t)
3097     {
3098 gbeauche 1.20 #if USE_NEW_RTASM
3099     JMPm(t);
3100     #else
3101 gbeauche 1.1 emit_byte(0xe9);
3102     emit_long(t-(uae_u32)target-4);
3103 gbeauche 1.20 #endif
3104 gbeauche 1.1 }
3105    
3106     static __inline__ void raw_jl(uae_u32 t)
3107     {
3108     emit_byte(0x0f);
3109     emit_byte(0x8c);
3110 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3111 gbeauche 1.1 }
3112    
3113     static __inline__ void raw_jz(uae_u32 t)
3114     {
3115     emit_byte(0x0f);
3116     emit_byte(0x84);
3117 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3118 gbeauche 1.1 }
3119    
3120     static __inline__ void raw_jnz(uae_u32 t)
3121     {
3122     emit_byte(0x0f);
3123     emit_byte(0x85);
3124 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3125 gbeauche 1.1 }
3126    
3127     static __inline__ void raw_jnz_l_oponly(void)
3128     {
3129     emit_byte(0x0f);
3130     emit_byte(0x85);
3131     }
3132    
3133     static __inline__ void raw_jcc_l_oponly(int cc)
3134     {
3135     emit_byte(0x0f);
3136     emit_byte(0x80+cc);
3137     }
3138    
3139     static __inline__ void raw_jnz_b_oponly(void)
3140     {
3141     emit_byte(0x75);
3142     }
3143    
3144     static __inline__ void raw_jz_b_oponly(void)
3145     {
3146     emit_byte(0x74);
3147     }
3148    
3149     static __inline__ void raw_jcc_b_oponly(int cc)
3150     {
3151     emit_byte(0x70+cc);
3152     }
3153    
3154     static __inline__ void raw_jmp_l_oponly(void)
3155     {
3156     emit_byte(0xe9);
3157     }
3158    
3159     static __inline__ void raw_jmp_b_oponly(void)
3160     {
3161     emit_byte(0xeb);
3162     }
3163    
3164     static __inline__ void raw_ret(void)
3165     {
3166     emit_byte(0xc3);
3167     }
3168    
3169     static __inline__ void raw_nop(void)
3170     {
3171     emit_byte(0x90);
3172     }
3173    
3174 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3175     {
3176     /* Source: GNU Binutils 2.12.90.0.15 */
3177     /* Various efficient no-op patterns for aligning code labels.
3178     Note: Don't try to assemble the instructions in the comments.
3179     0L and 0w are not legal. */
3180     static const uae_u8 f32_1[] =
3181     {0x90}; /* nop */
3182     static const uae_u8 f32_2[] =
3183     {0x89,0xf6}; /* movl %esi,%esi */
3184     static const uae_u8 f32_3[] =
3185     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3186     static const uae_u8 f32_4[] =
3187     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3188     static const uae_u8 f32_5[] =
3189     {0x90, /* nop */
3190     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3191     static const uae_u8 f32_6[] =
3192     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3193     static const uae_u8 f32_7[] =
3194     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3195     static const uae_u8 f32_8[] =
3196     {0x90, /* nop */
3197     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3198     static const uae_u8 f32_9[] =
3199     {0x89,0xf6, /* movl %esi,%esi */
3200     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3201     static const uae_u8 f32_10[] =
3202     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3203     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3204     static const uae_u8 f32_11[] =
3205     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3206     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3207     static const uae_u8 f32_12[] =
3208     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3209     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3210     static const uae_u8 f32_13[] =
3211     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3212     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3213     static const uae_u8 f32_14[] =
3214     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3215     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3216     static const uae_u8 f32_15[] =
3217     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3218     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3219     static const uae_u8 f32_16[] =
3220     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3221     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3222     static const uae_u8 *const f32_patt[] = {
3223     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3224     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3225     };
3226 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3227 gbeauche 1.8
3228 gbeauche 1.21 #if defined(__x86_64__)
3229     /* The recommended way to pad 64bit code is to use NOPs preceded by
3230     maximally four 0x66 prefixes. Balance the size of nops. */
3231     if (nbytes == 0)
3232     return;
3233    
3234     int i;
3235     int nnops = (nbytes + 3) / 4;
3236     int len = nbytes / nnops;
3237     int remains = nbytes - nnops * len;
3238    
3239     for (i = 0; i < remains; i++) {
3240     emit_block(prefixes, len);
3241     raw_nop();
3242     }
3243     for (; i < nnops; i++) {
3244     emit_block(prefixes, len - 1);
3245     raw_nop();
3246     }
3247     #else
3248 gbeauche 1.8 int nloops = nbytes / 16;
3249     while (nloops-- > 0)
3250     emit_block(f32_16, sizeof(f32_16));
3251    
3252     nbytes %= 16;
3253     if (nbytes)
3254     emit_block(f32_patt[nbytes - 1], nbytes);
3255 gbeauche 1.21 #endif
3256 gbeauche 1.8 }
3257    
3258 gbeauche 1.1
3259     /*************************************************************************
3260     * Flag handling, to and fro UAE flag register *
3261     *************************************************************************/
3262    
3263 gbeauche 1.36 static __inline__ void raw_flags_evicted(int r)
3264 gbeauche 1.1 {
3265     //live.state[FLAGTMP].status=CLEAN;
3266     live.state[FLAGTMP].status=INMEM;
3267     live.state[FLAGTMP].realreg=-1;
3268     /* We just "evicted" FLAGTMP. */
3269     if (live.nat[r].nholds!=1) {
3270     /* Huh? */
3271     abort();
3272     }
3273     live.nat[r].nholds=0;
3274 gbeauche 1.36 }
3275    
3276     #define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */
3277     static __inline__ void raw_flags_to_reg_FLAGREG(int r)
3278     {
3279     raw_lahf(0); /* Most flags in AH */
3280     //raw_setcc(r,0); /* V flag in AL */
3281     raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3282    
3283     #if 1 /* Let's avoid those nasty partial register stalls */
3284     //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3285     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
3286     raw_flags_evicted(r);
3287 gbeauche 1.1 #endif
3288     }
3289    
3290 gbeauche 1.36 #define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */
3291     static __inline__ void raw_reg_to_flags_FLAGREG(int r)
3292 gbeauche 1.1 {
3293     raw_cmp_b_ri(r,-127); /* set V */
3294     raw_sahf(0);
3295     }
3296    
3297 gbeauche 1.36 #define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */
3298     static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp)
3299 gbeauche 1.24 {
3300     raw_mov_l_rr(tmp,s);
3301     raw_lahf(s); /* flags into ah */
3302     raw_and_l_ri(s,0xffffbfff);
3303     raw_and_l_ri(tmp,0x00004000);
3304     raw_xor_l_ri(tmp,0x00004000);
3305     raw_or_l(s,tmp);
3306     raw_sahf(s);
3307     }
3308    
3309 gbeauche 1.36 static __inline__ void raw_flags_init_FLAGREG(void) { }
3310 gbeauche 1.1
3311 gbeauche 1.36 #define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */
3312     static __inline__ void raw_flags_to_reg_FLAGSTK(int r)
3313 gbeauche 1.1 {
3314     raw_pushfl();
3315     raw_pop_l_r(r);
3316 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3317 gbeauche 1.36 raw_flags_evicted(r);
3318 gbeauche 1.1 }
3319    
3320 gbeauche 1.36 #define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */
3321     static __inline__ void raw_reg_to_flags_FLAGSTK(int r)
3322 gbeauche 1.1 {
3323     raw_push_l_r(r);
3324     raw_popfl();
3325     }
3326    
3327 gbeauche 1.36 #define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */
3328     static __inline__ void raw_flags_set_zero_FLAGSTK(int s, int tmp)
3329 gbeauche 1.24 {
3330     raw_mov_l_rr(tmp,s);
3331     raw_pushfl();
3332     raw_pop_l_r(s);
3333     raw_and_l_ri(s,0xffffffbf);
3334     raw_and_l_ri(tmp,0x00000040);
3335     raw_xor_l_ri(tmp,0x00000040);
3336     raw_or_l(s,tmp);
3337     raw_push_l_r(s);
3338     raw_popfl();
3339     }
3340 gbeauche 1.36
3341     static __inline__ void raw_flags_init_FLAGSTK(void) { }
3342    
3343     #if defined(__x86_64__)
3344     /* Try to use the LAHF/SETO method on x86_64 since it is faster.
3345     This can't be the default because some older CPUs don't support
3346     LAHF/SAHF in long mode. */
3347     static int FLAG_NREG1_FLAGGEN = 0;
3348     static __inline__ void raw_flags_to_reg_FLAGGEN(int r)
3349     {
3350     if (have_lahf_lm) {
3351     // NOTE: the interpreter uses the normal EFLAGS layout
3352     // pushf/popf CF(0) ZF( 6) SF( 7) OF(11)
3353     // sahf/lahf CF(8) ZF(14) SF(15) OF( 0)
3354     assert(r == 0);
3355     raw_setcc(r,0); /* V flag in AL */
3356     raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */
3357     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0);
3358     raw_lahf(0); /* most flags in AH */
3359     raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX);
3360     raw_flags_evicted(r);
3361     }
3362     else
3363     raw_flags_to_reg_FLAGSTK(r);
3364     }
3365    
3366     static int FLAG_NREG2_FLAGGEN = 0;
3367     static __inline__ void raw_reg_to_flags_FLAGGEN(int r)
3368     {
3369     if (have_lahf_lm) {
3370     raw_xchg_b_rr(0,AH_INDEX);
3371     raw_cmp_b_ri(r,-120); /* set V */
3372     raw_sahf(0);
3373     }
3374     else
3375     raw_reg_to_flags_FLAGSTK(r);
3376     }
3377    
3378     static int FLAG_NREG3_FLAGGEN = 0;
3379     static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp)
3380     {
3381     if (have_lahf_lm)
3382     raw_flags_set_zero_FLAGREG(s, tmp);
3383     else
3384     raw_flags_set_zero_FLAGSTK(s, tmp);
3385     }
3386    
3387     static __inline__ void raw_flags_init_FLAGGEN(void)
3388     {
3389     if (have_lahf_lm) {
3390     FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG;
3391     FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG;
3392     FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG;
3393     }
3394     else {
3395     FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK;
3396     FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK;
3397     FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK;
3398     }
3399     }
3400 gbeauche 1.1 #endif
3401    
3402 gbeauche 1.36 #ifdef SAHF_SETO_PROFITABLE
3403     #define FLAG_SUFFIX FLAGREG
3404     #elif defined __x86_64__
3405     #define FLAG_SUFFIX FLAGGEN
3406     #else
3407     #define FLAG_SUFFIX FLAGSTK
3408     #endif
3409    
3410     #define FLAG_GLUE_2(x, y) x ## _ ## y
3411     #define FLAG_GLUE_1(x, y) FLAG_GLUE_2(x, y)
3412     #define FLAG_GLUE(x) FLAG_GLUE_1(x, FLAG_SUFFIX)
3413    
3414     #define raw_flags_init FLAG_GLUE(raw_flags_init)
3415     #define FLAG_NREG1 FLAG_GLUE(FLAG_NREG1)
3416     #define raw_flags_to_reg FLAG_GLUE(raw_flags_to_reg)
3417     #define FLAG_NREG2 FLAG_GLUE(FLAG_NREG2)
3418     #define raw_reg_to_flags FLAG_GLUE(raw_reg_to_flags)
3419     #define FLAG_NREG3 FLAG_GLUE(FLAG_NREG3)
3420     #define raw_flags_set_zero FLAG_GLUE(raw_flags_set_zero)
3421    
3422 gbeauche 1.1 /* Apparently, there are enough instructions between flag store and
3423     flag reload to avoid the partial memory stall */
3424     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3425     {
3426     #if 1
3427 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3428 gbeauche 1.1 #else
3429 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3430     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3431 gbeauche 1.1 #endif
3432     }
3433    
3434     /* FLAGX is byte sized, and we *do* write it at that size */
3435     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3436     {
3437     if (live.nat[target].canbyte)
3438 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3439 gbeauche 1.1 else if (live.nat[target].canword)
3440 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3441 gbeauche 1.1 else
3442 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3443 gbeauche 1.1 }
3444    
3445 gbeauche 1.31 static __inline__ void raw_dec_sp(int off)
3446     {
3447     if (off) raw_sub_l_ri(ESP_INDEX,off);
3448     }
3449    
3450 gbeauche 1.1 static __inline__ void raw_inc_sp(int off)
3451     {
3452 gbeauche 1.31 if (off) raw_add_l_ri(ESP_INDEX,off);
3453 gbeauche 1.1 }
3454    
3455     /*************************************************************************
3456     * Handling mistaken direct memory access *
3457     *************************************************************************/
3458    
3459     // gb-- I don't need that part for JIT Basilisk II
3460     #if defined(NATMEM_OFFSET) && 0
3461     #include <asm/sigcontext.h>
3462     #include <signal.h>
3463    
3464     #define SIG_READ 1
3465     #define SIG_WRITE 2
3466    
3467     static int in_handler=0;
3468     static uae_u8 veccode[256];
3469    
3470     static void vec(int x, struct sigcontext sc)
3471     {
3472     uae_u8* i=(uae_u8*)sc.eip;
3473     uae_u32 addr=sc.cr2;
3474     int r=-1;
3475     int size=4;
3476     int dir=-1;
3477     int len=0;
3478     int j;
3479    
3480     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3481     if (!canbang)
3482     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3483     if (in_handler)
3484     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3485    
3486     if (canbang && i>=compiled_code && i<=current_compile_p) {
3487     if (*i==0x66) {
3488     i++;
3489     size=2;
3490     len++;
3491     }
3492    
3493     switch(i[0]) {
3494     case 0x8a:
3495     if ((i[1]&0xc0)==0x80) {
3496     r=(i[1]>>3)&7;
3497     dir=SIG_READ;
3498     size=1;
3499     len+=6;
3500     break;
3501     }
3502     break;
3503     case 0x88:
3504     if ((i[1]&0xc0)==0x80) {
3505     r=(i[1]>>3)&7;
3506     dir=SIG_WRITE;
3507     size=1;
3508     len+=6;
3509     break;
3510     }
3511     break;
3512     case 0x8b:
3513     if ((i[1]&0xc0)==0x80) {
3514     r=(i[1]>>3)&7;
3515     dir=SIG_READ;
3516     len+=6;
3517     break;
3518     }
3519     if ((i[1]&0xc0)==0x40) {
3520     r=(i[1]>>3)&7;
3521     dir=SIG_READ;
3522     len+=3;
3523     break;
3524     }
3525     break;
3526     case 0x89:
3527     if ((i[1]&0xc0)==0x80) {
3528     r=(i[1]>>3)&7;
3529     dir=SIG_WRITE;
3530     len+=6;
3531     break;
3532     }
3533     if ((i[1]&0xc0)==0x40) {
3534     r=(i[1]>>3)&7;
3535     dir=SIG_WRITE;
3536     len+=3;
3537     break;
3538     }
3539     break;
3540     }
3541     }
3542    
3543     if (r!=-1) {
3544     void* pr=NULL;
3545     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3546    
3547     switch(r) {
3548     case 0: pr=&(sc.eax); break;
3549     case 1: pr=&(sc.ecx); break;
3550     case 2: pr=&(sc.edx); break;
3551     case 3: pr=&(sc.ebx); break;
3552     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3553     case 5: pr=(size>1)?
3554     (void*)(&(sc.ebp)):
3555     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3556     case 6: pr=(size>1)?
3557     (void*)(&(sc.esi)):
3558     (void*)(((uae_u8*)&(sc.edx))+1); break;
3559     case 7: pr=(size>1)?
3560     (void*)(&(sc.edi)):
3561     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3562     default: abort();
3563     }
3564     if (pr) {
3565     blockinfo* bi;
3566    
3567     if (currprefs.comp_oldsegv) {
3568     addr-=NATMEM_OFFSET;
3569    
3570     if ((addr>=0x10000000 && addr<0x40000000) ||
3571     (addr>=0x50000000)) {
3572     write_log("Suspicious address in %x SEGV handler.\n",addr);
3573     }
3574     if (dir==SIG_READ) {
3575     switch(size) {
3576     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3577     case 2: *((uae_u16*)pr)=get_word(addr); break;
3578     case 4: *((uae_u32*)pr)=get_long(addr); break;
3579     default: abort();
3580     }
3581     }
3582     else { /* write */
3583     switch(size) {
3584     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3585     case 2: put_word(addr,*((uae_u16*)pr)); break;
3586     case 4: put_long(addr,*((uae_u32*)pr)); break;
3587     default: abort();
3588     }
3589     }
3590     write_log("Handled one access!\n");
3591     fflush(stdout);
3592     segvcount++;
3593     sc.eip+=len;
3594     }
3595     else {
3596     void* tmp=target;
3597     int i;
3598     uae_u8 vecbuf[5];
3599    
3600     addr-=NATMEM_OFFSET;
3601    
3602     if ((addr>=0x10000000 && addr<0x40000000) ||
3603     (addr>=0x50000000)) {
3604     write_log("Suspicious address in %x SEGV handler.\n",addr);
3605     }
3606    
3607     target=(uae_u8*)sc.eip;
3608     for (i=0;i<5;i++)
3609     vecbuf[i]=target[i];
3610     emit_byte(0xe9);
3611 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3612 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3613    
3614     write_log("Handled one access!\n");
3615     fflush(stdout);
3616     segvcount++;
3617    
3618     target=veccode;
3619    
3620     if (dir==SIG_READ) {
3621     switch(size) {
3622     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3623     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3624     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3625     default: abort();
3626     }
3627     }
3628     else { /* write */
3629     switch(size) {
3630     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3631     case 2: put_word(addr,*((uae_u16*)pr)); break;
3632     case 4: put_long(addr,*((uae_u32*)pr)); break;
3633     default: abort();
3634     }
3635     }
3636     for (i=0;i<5;i++)
3637     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3638 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3639 gbeauche 1.1 emit_byte(0xe9);
3640 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3641 gbeauche 1.1 in_handler=1;
3642     target=tmp;
3643     }
3644     bi=active;
3645     while (bi) {
3646     if (bi->handler &&
3647     (uae_u8*)bi->direct_handler<=i &&
3648     (uae_u8*)bi->nexthandler>i) {
3649     write_log("deleted trigger (%p<%p<%p) %p\n",
3650     bi->handler,
3651     i,
3652     bi->nexthandler,
3653     bi->pc_p);
3654     invalidate_block(bi);
3655     raise_in_cl_list(bi);
3656     set_special(0);
3657     return;
3658     }
3659     bi=bi->next;
3660     }
3661     /* Not found in the active list. Might be a rom routine that
3662     is in the dormant list */
3663     bi=dormant;
3664     while (bi) {
3665     if (bi->handler &&
3666     (uae_u8*)bi->direct_handler<=i &&
3667     (uae_u8*)bi->nexthandler>i) {
3668     write_log("deleted trigger (%p<%p<%p) %p\n",
3669     bi->handler,
3670     i,
3671     bi->nexthandler,
3672     bi->pc_p);
3673     invalidate_block(bi);
3674     raise_in_cl_list(bi);
3675     set_special(0);
3676     return;
3677     }
3678     bi=bi->next;
3679     }
3680     write_log("Huh? Could not find trigger!\n");
3681     return;
3682     }
3683     }
3684     write_log("Can't handle access!\n");
3685     for (j=0;j<10;j++) {
3686     write_log("instruction byte %2d is %02x\n",j,i[j]);
3687     }
3688     write_log("Please send the above info (starting at \"fault address\") to\n"
3689     "bmeyer@csse.monash.edu.au\n"
3690     "This shouldn't happen ;-)\n");
3691     fflush(stdout);
3692     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3693     }
3694     #endif
3695    
3696    
3697     /*************************************************************************
3698     * Checking for CPU features *
3699     *************************************************************************/
3700    
3701 gbeauche 1.3 struct cpuinfo_x86 {
3702     uae_u8 x86; // CPU family
3703     uae_u8 x86_vendor; // CPU vendor
3704     uae_u8 x86_processor; // CPU canonical processor type
3705     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3706     uae_u32 x86_hwcap;
3707     uae_u8 x86_model;
3708     uae_u8 x86_mask;
3709     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3710     char x86_vendor_id[16];
3711     };
3712     struct cpuinfo_x86 cpuinfo;
3713    
3714     enum {
3715     X86_VENDOR_INTEL = 0,
3716     X86_VENDOR_CYRIX = 1,
3717     X86_VENDOR_AMD = 2,
3718     X86_VENDOR_UMC = 3,
3719     X86_VENDOR_NEXGEN = 4,
3720     X86_VENDOR_CENTAUR = 5,
3721     X86_VENDOR_RISE = 6,
3722     X86_VENDOR_TRANSMETA = 7,
3723     X86_VENDOR_NSC = 8,
3724     X86_VENDOR_UNKNOWN = 0xff
3725     };
3726    
3727     enum {
3728     X86_PROCESSOR_I386, /* 80386 */
3729     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3730     X86_PROCESSOR_PENTIUM,
3731     X86_PROCESSOR_PENTIUMPRO,
3732     X86_PROCESSOR_K6,
3733     X86_PROCESSOR_ATHLON,
3734     X86_PROCESSOR_PENTIUM4,
3735 gbeauche 1.28 X86_PROCESSOR_X86_64,
3736 gbeauche 1.3 X86_PROCESSOR_max
3737     };
3738    
3739     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3740     "80386",
3741     "80486",
3742     "Pentium",
3743     "PentiumPro",
3744     "K6",
3745     "Athlon",
3746 gbeauche 1.16 "Pentium4",
3747 gbeauche 1.28 "x86-64"
3748 gbeauche 1.3 };
3749    
3750     static struct ptt {
3751     const int align_loop;
3752     const int align_loop_max_skip;
3753     const int align_jump;
3754     const int align_jump_max_skip;
3755     const int align_func;
3756     }
3757     x86_alignments[X86_PROCESSOR_max] = {
3758     { 4, 3, 4, 3, 4 },
3759     { 16, 15, 16, 15, 16 },
3760     { 16, 7, 16, 7, 16 },
3761     { 16, 15, 16, 7, 16 },
3762     { 32, 7, 32, 7, 32 },
3763 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3764 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3765     { 16, 7, 16, 7, 16 }
3766 gbeauche 1.3 };
3767 gbeauche 1.1
3768 gbeauche 1.3 static void
3769     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3770 gbeauche 1.1 {
3771 gbeauche 1.3 char *v = c->x86_vendor_id;
3772    
3773     if (!strcmp(v, "GenuineIntel"))
3774     c->x86_vendor = X86_VENDOR_INTEL;
3775     else if (!strcmp(v, "AuthenticAMD"))
3776     c->x86_vendor = X86_VENDOR_AMD;
3777     else if (!strcmp(v, "CyrixInstead"))
3778     c->x86_vendor = X86_VENDOR_CYRIX;
3779     else if (!strcmp(v, "Geode by NSC"))
3780     c->x86_vendor = X86_VENDOR_NSC;
3781     else if (!strcmp(v, "UMC UMC UMC "))
3782     c->x86_vendor = X86_VENDOR_UMC;
3783     else if (!strcmp(v, "CentaurHauls"))
3784     c->x86_vendor = X86_VENDOR_CENTAUR;
3785     else if (!strcmp(v, "NexGenDriven"))
3786     c->x86_vendor = X86_VENDOR_NEXGEN;
3787     else if (!strcmp(v, "RiseRiseRise"))
3788     c->x86_vendor = X86_VENDOR_RISE;
3789     else if (!strcmp(v, "GenuineTMx86") ||
3790     !strcmp(v, "TransmetaCPU"))
3791     c->x86_vendor = X86_VENDOR_TRANSMETA;
3792     else
3793     c->x86_vendor = X86_VENDOR_UNKNOWN;
3794     }
3795 gbeauche 1.1
3796 gbeauche 1.3 static void
3797     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3798     {
3799 gbeauche 1.27 const int CPUID_SPACE = 4096;
3800     uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3801     if (cpuid_space == VM_MAP_FAILED)
3802     abort();
3803     vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3804    
3805 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3806 gbeauche 1.3 uae_u8* tmp=get_target();
3807 gbeauche 1.1
3808 gbeauche 1.20 s_op = op;
3809 gbeauche 1.3 set_target(cpuid_space);
3810     raw_push_l_r(0); /* eax */
3811     raw_push_l_r(1); /* ecx */
3812     raw_push_l_r(2); /* edx */
3813     raw_push_l_r(3); /* ebx */
3814 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3815 gbeauche 1.3 raw_cpuid(0);
3816 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3817     raw_mov_l_mr((uintptr)&s_ebx,3);
3818     raw_mov_l_mr((uintptr)&s_ecx,1);
3819     raw_mov_l_mr((uintptr)&s_edx,2);
3820 gbeauche 1.3 raw_pop_l_r(3);
3821     raw_pop_l_r(2);
3822     raw_pop_l_r(1);
3823     raw_pop_l_r(0);
3824     raw_ret();
3825     set_target(tmp);
3826 gbeauche 1.1
3827 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3828 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3829     if (ebx != NULL) *ebx = s_ebx;
3830     if (ecx != NULL) *ecx = s_ecx;
3831     if (edx != NULL) *edx = s_edx;
3832 gbeauche 1.27
3833     vm_release(cpuid_space, CPUID_SPACE);
3834 gbeauche 1.1 }
3835    
3836 gbeauche 1.3 static void
3837     raw_init_cpu(void)
3838 gbeauche 1.1 {
3839 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3840    
3841     /* Defaults */
3842 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3843 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3844     c->cpuid_level = -1; /* CPUID not detected */
3845     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3846     c->x86_vendor_id[0] = '\0'; /* Unset */
3847     c->x86_hwcap = 0;
3848    
3849     /* Get vendor name */
3850     c->x86_vendor_id[12] = '\0';
3851     cpuid(0x00000000,
3852     (uae_u32 *)&c->cpuid_level,
3853     (uae_u32 *)&c->x86_vendor_id[0],
3854     (uae_u32 *)&c->x86_vendor_id[8],
3855     (uae_u32 *)&c->x86_vendor_id[4]);
3856     x86_get_cpu_vendor(c);
3857    
3858     /* Intel-defined flags: level 0x00000001 */
3859     c->x86_brand_id = 0;
3860     if ( c->cpuid_level >= 0x00000001 ) {
3861     uae_u32 tfms, brand_id;
3862     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3863     c->x86 = (tfms >> 8) & 15;
3864 gbeauche 1.29 if (c->x86 == 0xf)
3865     c->x86 += (tfms >> 20) & 0xff; /* extended family */
3866 gbeauche 1.3 c->x86_model = (tfms >> 4) & 15;
3867 gbeauche 1.29 if (c->x86_model == 0xf)
3868     c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3869 gbeauche 1.3 c->x86_brand_id = brand_id & 0xff;
3870     c->x86_mask = tfms & 15;
3871     } else {
3872     /* Have CPUID level 0 only - unheard of */
3873     c->x86 = 4;
3874     }
3875    
3876 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3877     uae_u32 xlvl;
3878     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3879     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3880     if ( xlvl >= 0x80000001 ) {
3881 gbeauche 1.28 uae_u32 features, extra_features;
3882     cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3883 gbeauche 1.16 if (features & (1 << 29)) {
3884     /* Assume x86-64 if long mode is supported */
3885 gbeauche 1.28 c->x86_processor = X86_PROCESSOR_X86_64;
3886 gbeauche 1.16 }
3887 gbeauche 1.28 if (extra_features & (1 << 0))
3888     have_lahf_lm = true;
3889 gbeauche 1.16 }
3890     }
3891    
3892 gbeauche 1.3 /* Canonicalize processor ID */
3893     switch (c->x86) {
3894     case 3:
3895     c->x86_processor = X86_PROCESSOR_I386;
3896     break;
3897     case 4:
3898     c->x86_processor = X86_PROCESSOR_I486;
3899     break;
3900     case 5:
3901     if (c->x86_vendor == X86_VENDOR_AMD)
3902     c->x86_processor = X86_PROCESSOR_K6;
3903     else
3904     c->x86_processor = X86_PROCESSOR_PENTIUM;
3905     break;
3906     case 6:
3907     if (c->x86_vendor == X86_VENDOR_AMD)
3908     c->x86_processor = X86_PROCESSOR_ATHLON;
3909     else
3910     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3911     break;
3912     case 15:
3913 gbeauche 1.29 if (c->x86_processor == X86_PROCESSOR_max) {
3914     switch (c->x86_vendor) {
3915     case X86_VENDOR_INTEL:
3916     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3917     break;
3918     case X86_VENDOR_AMD:
3919     /* Assume a 32-bit Athlon processor if not in long mode */
3920     c->x86_processor = X86_PROCESSOR_ATHLON;
3921     break;
3922     }
3923     }
3924     break;
3925 gbeauche 1.3 }
3926     if (c->x86_processor == X86_PROCESSOR_max) {
3927 gbeauche 1.30 c->x86_processor = X86_PROCESSOR_I386;
3928     fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3929 gbeauche 1.3 fprintf(stderr, " Family : %d\n", c->x86);
3930     fprintf(stderr, " Model : %d\n", c->x86_model);
3931     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3932 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3933 gbeauche 1.3 if (c->x86_brand_id)
3934     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3935     }
3936    
3937     /* Have CMOV support? */
3938 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3939 gbeauche 1.3
3940     /* Can the host CPU suffer from partial register stalls? */
3941     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3942     #if 1
3943     /* It appears that partial register writes are a bad idea even on
3944 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3945     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3946 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3947     have_rat_stall = true;
3948 gbeauche 1.1 #endif
3949 gbeauche 1.3
3950     /* Alignments */
3951     if (tune_alignment) {
3952     align_loops = x86_alignments[c->x86_processor].align_loop;
3953     align_jumps = x86_alignments[c->x86_processor].align_jump;
3954     }
3955    
3956     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3957     c->cpuid_level, c->x86_vendor_id,
3958     x86_processor_string_table[c->x86_processor]);
3959 gbeauche 1.36
3960     raw_flags_init();
3961 gbeauche 1.1 }
3962    
3963 gbeauche 1.10 static bool target_check_bsf(void)
3964     {
3965     bool mismatch = false;
3966     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3967     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3968     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3969     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3970     for (int value = -1; value <= 1; value++) {
3971 gbeauche 1.25 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3972     unsigned long tmp = value;
3973 gbeauche 1.10 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3974 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3975 gbeauche 1.10 int OF = (flags >> 11) & 1;
3976     int SF = (flags >> 7) & 1;
3977     int ZF = (flags >> 6) & 1;
3978     int CF = flags & 1;
3979     tmp = (value == 0);
3980     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3981     mismatch = true;
3982     }
3983     }}}}
3984     if (mismatch)
3985     write_log("Target CPU defines all flags on BSF instruction\n");
3986     return !mismatch;
3987     }
3988    
3989 gbeauche 1.1
3990     /*************************************************************************
3991     * FPU stuff *
3992     *************************************************************************/
3993    
3994    
3995     static __inline__ void raw_fp_init(void)
3996     {
3997     int i;
3998    
3999     for (i=0;i<N_FREGS;i++)
4000     live.spos[i]=-2;
4001     live.tos=-1; /* Stack is empty */
4002     }
4003    
4004     static __inline__ void raw_fp_cleanup_drop(void)
4005     {
4006     #if 0
4007     /* using FINIT instead of popping all the entries.
4008     Seems to have side effects --- there is display corruption in
4009     Quake when this is used */
4010     if (live.tos>1) {
4011     emit_byte(0x9b);
4012     emit_byte(0xdb);
4013     emit_byte(0xe3);
4014     live.tos=-1;
4015     }
4016     #endif
4017     while (live.tos>=1) {
4018     emit_byte(0xde);
4019     emit_byte(0xd9);
4020     live.tos-=2;
4021     }
4022     while (live.tos>=0) {
4023     emit_byte(0xdd);
4024     emit_byte(0xd8);
4025     live.tos--;
4026     }
4027     raw_fp_init();
4028     }
4029    
4030     static __inline__ void make_tos(int r)
4031     {
4032     int p,q;
4033    
4034     if (live.spos[r]<0) { /* Register not yet on stack */
4035     emit_byte(0xd9);
4036     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
4037     live.tos++;
4038     live.spos[r]=live.tos;
4039     live.onstack[live.tos]=r;
4040     return;
4041     }
4042     /* Register is on stack */
4043     if (live.tos==live.spos[r])
4044     return;
4045     p=live.spos[r];
4046     q=live.onstack[live.tos];
4047    
4048     emit_byte(0xd9);
4049     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
4050     live.onstack[live.tos]=r;
4051     live.spos[r]=live.tos;
4052     live.onstack[p]=q;
4053     live.spos[q]=p;
4054     }
4055    
4056     static __inline__ void make_tos2(int r, int r2)
4057     {
4058     int q;
4059    
4060     make_tos(r2); /* Put the reg that's supposed to end up in position2
4061     on top */
4062    
4063     if (live.spos[r]<0) { /* Register not yet on stack */
4064     make_tos(r); /* This will extend the stack */
4065     return;
4066     }
4067     /* Register is on stack */
4068     emit_byte(0xd9);
4069     emit_byte(0xc9); /* Move r2 into position 2 */
4070    
4071     q=live.onstack[live.tos-1];
4072     live.onstack[live.tos]=q;
4073     live.spos[q]=live.tos;
4074     live.onstack[live.tos-1]=r2;
4075     live.spos[r2]=live.tos-1;
4076    
4077     make_tos(r); /* And r into 1 */
4078     }
4079    
4080     static __inline__ int stackpos(int r)
4081     {
4082     if (live.spos[r]<0)
4083     abort();
4084     if (live.tos<live.spos[r]) {
4085     printf("Looking for spos for fnreg %d\n",r);
4086     abort();
4087     }
4088     return live.tos-live.spos[r];
4089     }
4090    
4091     static __inline__ void usereg(int r)
4092     {
4093     if (live.spos[r]<0)
4094     make_tos(r);
4095     }
4096    
4097     /* This is called with one FP value in a reg *above* tos, which it will
4098     pop off the stack if necessary */
4099     static __inline__ void tos_make(int r)
4100     {
4101     if (live.spos[r]<0) {
4102     live.tos++;
4103     live.spos[r]=live.tos;
4104     live.onstack[live.tos]=r;
4105     return;
4106     }
4107     emit_byte(0xdd);
4108     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
4109     and pop it*/
4110     }
4111 gbeauche 1.23
4112     /* FP helper functions */
4113     #if USE_NEW_RTASM
4114     #define DEFINE_OP(NAME, GEN) \
4115     static inline void raw_##NAME(uint32 m) \
4116     { \
4117     GEN(m, X86_NOREG, X86_NOREG, 1); \
4118     }
4119     DEFINE_OP(fstl, FSTLm);
4120     DEFINE_OP(fstpl, FSTPLm);
4121     DEFINE_OP(fldl, FLDLm);
4122     DEFINE_OP(fildl, FILDLm);
4123     DEFINE_OP(fistl, FISTLm);
4124     DEFINE_OP(flds, FLDSm);
4125     DEFINE_OP(fsts, FSTSm);
4126     DEFINE_OP(fstpt, FSTPTm);
4127     DEFINE_OP(fldt, FLDTm);
4128     #else
4129     #define DEFINE_OP(NAME, OP1, OP2) \
4130     static inline void raw_##NAME(uint32 m) \
4131     { \
4132     emit_byte(OP1); \
4133     emit_byte(OP2); \
4134     emit_long(m); \
4135     }
4136     DEFINE_OP(fstl, 0xdd, 0x15);
4137     DEFINE_OP(fstpl, 0xdd, 0x1d);
4138     DEFINE_OP(fldl, 0xdd, 0x05);
4139     DEFINE_OP(fildl, 0xdb, 0x05);
4140     DEFINE_OP(fistl, 0xdb, 0x15);
4141     DEFINE_OP(flds, 0xd9, 0x05);
4142     DEFINE_OP(fsts, 0xd9, 0x15);
4143     DEFINE_OP(fstpt, 0xdb, 0x3d);
4144     DEFINE_OP(fldt, 0xdb, 0x2d);
4145     #endif
4146     #undef DEFINE_OP
4147    
4148 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4149     {
4150     make_tos(r);
4151 gbeauche 1.23 raw_fstl(m);
4152 gbeauche 1.1 }
4153     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4154    
4155     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4156     {
4157     make_tos(r);
4158 gbeauche 1.23 raw_fstpl(m);
4159 gbeauche 1.1 live.onstack[live.tos]=-1;
4160     live.tos--;
4161     live.spos[r]=-2;
4162     }
4163     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4164    
4165     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4166     {
4167 gbeauche 1.23 raw_fldl(m);
4168 gbeauche 1.1 tos_make(r);
4169     }
4170     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4171    
4172     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4173     {
4174 gbeauche 1.23 raw_fildl(m);
4175 gbeauche 1.1 tos_make(r);
4176     }
4177     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4178    
4179     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4180     {
4181     make_tos(r);
4182 gbeauche 1.23 raw_fistl(m);
4183 gbeauche 1.1 }
4184     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4185    
4186     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4187     {
4188 gbeauche 1.23 raw_flds(m);
4189 gbeauche 1.1 tos_make(r);
4190     }
4191     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4192    
4193     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4194     {
4195     make_tos(r);
4196 gbeauche 1.23 raw_fsts(m);
4197 gbeauche 1.1 }
4198     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4199    
4200     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4201     {
4202     int rs;
4203    
4204     /* Stupid x87 can't write a long double to mem without popping the
4205     stack! */
4206     usereg(r);
4207     rs=stackpos(r);
4208     emit_byte(0xd9); /* Get a copy to the top of stack */
4209     emit_byte(0xc0+rs);
4210    
4211 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4212 gbeauche 1.1 }
4213     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4214    
4215     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4216     {
4217     int rs;
4218    
4219     make_tos(r);
4220 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4221 gbeauche 1.1 live.onstack[live.tos]=-1;
4222     live.tos--;
4223     live.spos[r]=-2;
4224     }
4225     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4226    
4227     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4228     {
4229 gbeauche 1.23 raw_fldt(m);
4230 gbeauche 1.1 tos_make(r);
4231     }
4232     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4233    
4234     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4235     {
4236     emit_byte(0xd9);
4237     emit_byte(0xeb);
4238     tos_make(r);
4239     }
4240     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4241    
4242     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4243     {
4244     emit_byte(0xd9);
4245     emit_byte(0xec);
4246     tos_make(r);
4247     }
4248     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4249    
4250     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4251     {
4252     emit_byte(0xd9);
4253     emit_byte(0xea);
4254     tos_make(r);
4255     }
4256     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4257    
4258     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4259     {
4260     emit_byte(0xd9);
4261     emit_byte(0xed);
4262     tos_make(r);
4263     }
4264     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4265    
4266     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4267     {
4268     emit_byte(0xd9);
4269     emit_byte(0xe8);
4270     tos_make(r);
4271     }
4272     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4273    
4274     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4275     {
4276     emit_byte(0xd9);
4277     emit_byte(0xee);
4278     tos_make(r);
4279     }
4280     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4281    
4282     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4283     {
4284     int ds;
4285    
4286     usereg(s);
4287     ds=stackpos(s);
4288     if (ds==0 && live.spos[d]>=0) {
4289     /* source is on top of stack, and we already have the dest */
4290     int dd=stackpos(d);
4291     emit_byte(0xdd);
4292     emit_byte(0xd0+dd);
4293     }
4294     else {
4295     emit_byte(0xd9);
4296     emit_byte(0xc0+ds); /* duplicate source on tos */
4297     tos_make(d); /* store to destination, pop if necessary */
4298     }
4299     }
4300     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4301    
4302     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4303     {
4304     emit_byte(0xd9);
4305     emit_byte(0xa8+index);
4306     emit_long(base);
4307     }
4308     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4309    
4310    
4311     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4312     {
4313     int ds;
4314    
4315     if (d!=s) {
4316     usereg(s);
4317     ds=stackpos(s);
4318     emit_byte(0xd9);
4319     emit_byte(0xc0+ds); /* duplicate source */
4320     emit_byte(0xd9);
4321     emit_byte(0xfa); /* take square root */
4322     tos_make(d); /* store to destination */
4323     }
4324     else {
4325     make_tos(d);
4326     emit_byte(0xd9);
4327     emit_byte(0xfa); /* take square root */
4328     }
4329     }
4330     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4331    
4332     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4333     {
4334     int ds;
4335    
4336     if (d!=s) {
4337     usereg(s);
4338     ds=stackpos(s);
4339     emit_byte(0xd9);
4340     emit_byte(0xc0+ds); /* duplicate source */
4341     emit_byte(0xd9);
4342     emit_byte(0xe1); /* take fabs */
4343     tos_make(d); /* store to destination */
4344     }
4345     else {
4346     make_tos(d);
4347     emit_byte(0xd9);
4348     emit_byte(0xe1); /* take fabs */
4349     }
4350     }
4351     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4352    
4353     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4354     {
4355     int ds;
4356    
4357     if (d!=s) {
4358     usereg(s);
4359     ds=stackpos(s);
4360     emit_byte(0xd9);
4361     emit_byte(0xc0+ds); /* duplicate source */
4362     emit_byte(0xd9);
4363     emit_byte(0xfc); /* take frndint */
4364     tos_make(d); /* store to destination */
4365     }
4366     else {
4367     make_tos(d);
4368     emit_byte(0xd9);
4369     emit_byte(0xfc); /* take frndint */
4370     }
4371     }
4372     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4373    
4374     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4375     {
4376     int ds;
4377    
4378     if (d!=s) {
4379     usereg(s);
4380     ds=stackpos(s);
4381     emit_byte(0xd9);
4382     emit_byte(0xc0+ds); /* duplicate source */
4383     emit_byte(0xd9);
4384     emit_byte(0xff); /* take cos */
4385     tos_make(d); /* store to destination */
4386     }
4387     else {
4388     make_tos(d);
4389     emit_byte(0xd9);
4390     emit_byte(0xff); /* take cos */
4391     }
4392     }
4393     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4394    
4395     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4396     {
4397     int ds;
4398    
4399     if (d!=s) {
4400     usereg(s);
4401     ds=stackpos(s);
4402     emit_byte(0xd9);
4403     emit_byte(0xc0+ds); /* duplicate source */
4404     emit_byte(0xd9);
4405     emit_byte(0xfe); /* take sin */
4406     tos_make(d); /* store to destination */
4407     }
4408     else {
4409     make_tos(d);
4410     emit_byte(0xd9);
4411     emit_byte(0xfe); /* take sin */
4412     }
4413     }
4414     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4415    
4416 gbeauche 1.34 static const double one=1;
4417 gbeauche 1.1 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4418     {
4419     int ds;
4420    
4421     usereg(s);
4422     ds=stackpos(s);
4423     emit_byte(0xd9);
4424     emit_byte(0xc0+ds); /* duplicate source */
4425    
4426     emit_byte(0xd9);
4427     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4428     emit_byte(0xd9);
4429     emit_byte(0xfc); /* rndint */
4430     emit_byte(0xd9);
4431     emit_byte(0xc9); /* swap top two elements */
4432     emit_byte(0xd8);
4433     emit_byte(0xe1); /* subtract rounded from original */
4434     emit_byte(0xd9);
4435     emit_byte(0xf0); /* f2xm1 */
4436 gbeauche 1.34 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4437 gbeauche 1.1 emit_byte(0xd9);
4438     emit_byte(0xfd); /* and scale it */
4439     emit_byte(0xdd);
4440     emit_byte(0xd9); /* take he rounded value off */
4441     tos_make(d); /* store to destination */
4442     }
4443     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4444    
4445     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4446     {
4447     int ds;
4448    
4449     usereg(s);
4450     ds=stackpos(s);
4451     emit_byte(0xd9);
4452     emit_byte(0xc0+ds); /* duplicate source */
4453     emit_byte(0xd9);
4454     emit_byte(0xea); /* fldl2e */
4455     emit_byte(0xde);
4456     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4457    
4458     emit_byte(0xd9);
4459     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4460     emit_byte(0xd9);
4461     emit_byte(0xfc); /* rndint */
4462     emit_byte(0xd9);
4463     emit_byte(0xc9); /* swap top two elements */
4464     emit_byte(0xd8);
4465     emit_byte(0xe1); /* subtract rounded from original */
4466     emit_byte(0xd9);
4467     emit_byte(0xf0); /* f2xm1 */
4468 gbeauche 1.34 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4469 gbeauche 1.1 emit_byte(0xd9);
4470     emit_byte(0xfd); /* and scale it */
4471     emit_byte(0xdd);
4472     emit_byte(0xd9); /* take he rounded value off */
4473     tos_make(d); /* store to destination */
4474     }
4475     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4476    
4477     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4478     {
4479     int ds;
4480    
4481     usereg(s);
4482     ds=stackpos(s);
4483     emit_byte(0xd9);
4484     emit_byte(0xc0+ds); /* duplicate source */
4485     emit_byte(0xd9);
4486     emit_byte(0xe8); /* push '1' */
4487     emit_byte(0xd9);
4488     emit_byte(0xc9); /* swap top two */
4489     emit_byte(0xd9);
4490     emit_byte(0xf1); /* take 1*log2(x) */
4491     tos_make(d); /* store to destination */
4492     }
4493     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4494    
4495    
4496     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4497     {
4498     int ds;
4499    
4500     if (d!=s) {
4501     usereg(s);
4502     ds=stackpos(s);
4503     emit_byte(0xd9);
4504     emit_byte(0xc0+ds); /* duplicate source */
4505     emit_byte(0xd9);
4506     emit_byte(0xe0); /* take fchs */
4507     tos_make(d); /* store to destination */
4508     }
4509     else {
4510     make_tos(d);
4511     emit_byte(0xd9);
4512     emit_byte(0xe0); /* take fchs */
4513     }
4514     }
4515     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4516    
4517     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4518     {
4519     int ds;
4520    
4521     usereg(s);
4522     usereg(d);
4523    
4524     if (live.spos[s]==live.tos) {
4525     /* Source is on top of stack */
4526     ds=stackpos(d);
4527     emit_byte(0xdc);
4528     emit_byte(0xc0+ds); /* add source to dest*/
4529     }
4530     else {
4531     make_tos(d);
4532     ds=stackpos(s);
4533    
4534     emit_byte(0xd8);
4535     emit_byte(0xc0+ds); /* add source to dest*/
4536     }
4537     }
4538     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4539    
4540     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4541     {
4542     int ds;
4543    
4544     usereg(s);
4545     usereg(d);
4546    
4547     if (live.spos[s]==live.tos) {
4548     /* Source is on top of stack */
4549     ds=stackpos(d);
4550     emit_byte(0xdc);
4551     emit_byte(0xe8+ds); /* sub source from dest*/
4552     }
4553     else {
4554     make_tos(d);
4555     ds=stackpos(s);
4556    
4557     emit_byte(0xd8);
4558     emit_byte(0xe0+ds); /* sub src from dest */
4559     }
4560     }
4561     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4562    
4563     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4564     {
4565     int ds;
4566    
4567     usereg(s);
4568     usereg(d);
4569    
4570     make_tos(d);
4571     ds=stackpos(s);
4572    
4573     emit_byte(0xdd);
4574     emit_byte(0xe0+ds); /* cmp dest with source*/
4575     }
4576     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4577    
4578     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4579     {
4580     int ds;
4581    
4582     usereg(s);
4583     usereg(d);
4584    
4585     if (live.spos[s]==live.tos) {
4586     /* Source is on top of stack */
4587     ds=stackpos(d);
4588     emit_byte(0xdc);
4589     emit_byte(0xc8+ds); /* mul dest by source*/
4590     }
4591     else {
4592     make_tos(d);
4593     ds=stackpos(s);
4594    
4595     emit_byte(0xd8);
4596     emit_byte(0xc8+ds); /* mul dest by source*/
4597     }
4598     }
4599     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4600    
4601     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4602     {
4603     int ds;
4604    
4605     usereg(s);
4606     usereg(d);
4607    
4608     if (live.spos[s]==live.tos) {
4609     /* Source is on top of stack */
4610     ds=stackpos(d);
4611     emit_byte(0xdc);
4612     emit_byte(0xf8+ds); /* div dest by source */
4613     }
4614     else {
4615     make_tos(d);
4616     ds=stackpos(s);
4617    
4618     emit_byte(0xd8);
4619     emit_byte(0xf0+ds); /* div dest by source*/
4620     }
4621     }
4622     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4623    
4624     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4625     {
4626     int ds;
4627    
4628     usereg(s);
4629     usereg(d);
4630    
4631     make_tos2(d,s);
4632     ds=stackpos(s);
4633    
4634     if (ds!=1) {
4635     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4636     abort();
4637     }
4638     emit_byte(0xd9);
4639     emit_byte(0xf8); /* take rem from dest by source */
4640     }
4641     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4642    
4643     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4644     {
4645     int ds;
4646    
4647     usereg(s);
4648     usereg(d);
4649    
4650     make_tos2(d,s);
4651     ds=stackpos(s);
4652    
4653     if (ds!=1) {
4654     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4655     abort();
4656     }
4657     emit_byte(0xd9);
4658     emit_byte(0xf5); /* take rem1 from dest by source */
4659     }
4660     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4661    
4662    
4663     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4664     {
4665     make_tos(r);
4666     emit_byte(0xd9); /* ftst */
4667     emit_byte(0xe4);
4668     }
4669     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4670    
4671     /* %eax register is clobbered if target processor doesn't support fucomi */
4672     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4673     #define FFLAG_NREG EAX_INDEX
4674    
4675     static __inline__ void raw_fflags_into_flags(int r)
4676     {
4677     int p;
4678    
4679     usereg(r);
4680     p=stackpos(r);
4681    
4682     emit_byte(0xd9);
4683     emit_byte(0xee); /* Push 0 */
4684     emit_byte(0xd9);
4685     emit_byte(0xc9+p); /* swap top two around */
4686     if (have_cmov) {
4687     // gb-- fucomi is for P6 cores only, not K6-2 then...
4688     emit_byte(0xdb);
4689     emit_byte(0xe9+p); /* fucomi them */
4690     }
4691     else {
4692     emit_byte(0xdd);
4693     emit_byte(0xe1+p); /* fucom them */
4694     emit_byte(0x9b);
4695     emit_byte(0xdf);
4696     emit_byte(0xe0); /* fstsw ax */
4697     raw_sahf(0); /* sahf */
4698     }
4699     emit_byte(0xdd);
4700     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4701     }