ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.39
Committed: 2007-06-29T16:36:03Z (17 years, 3 months ago) by gbeauche
Branch: MAIN
Changes since 1.38: +53 -0 lines
Log Message:
Implement CMOV.B and CMOV.W translations. Only the latter has a native
x86 equivalent however.

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.26 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 gbeauche 1.26 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.33 /* XXX this has to match X86_Reg8H_Base + 4 */
56     #define AH_INDEX (0x10+4+EAX_INDEX)
57     #define CH_INDEX (0x10+4+ECX_INDEX)
58     #define DH_INDEX (0x10+4+EDX_INDEX)
59     #define BH_INDEX (0x10+4+EBX_INDEX)
60 gbeauche 1.1
61     /* The register in which subroutines return an integer return value */
62 gbeauche 1.20 #define REG_RESULT EAX_INDEX
63 gbeauche 1.1
64     /* The registers subroutines take their first and second argument in */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     /* Handle the _fastcall parameters of ECX and EDX */
67 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
68     #define REG_PAR2 EDX_INDEX
69     #elif defined(__x86_64__)
70     #define REG_PAR1 EDI_INDEX
71     #define REG_PAR2 ESI_INDEX
72 gbeauche 1.1 #else
73 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
74     #define REG_PAR2 EDX_INDEX
75 gbeauche 1.1 #endif
76    
77 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
78 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
79 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
80 gbeauche 1.1 #else
81 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
82 gbeauche 1.1 #endif
83    
84 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
85 gbeauche 1.1 -1 if any reg will do */
86 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
87     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
88 gbeauche 1.1
89 gbeauche 1.31 #define STACK_ALIGN 16
90     #define STACK_OFFSET sizeof(void *)
91    
92 gbeauche 1.1 uae_s8 always_used[]={4,-1};
93 gbeauche 1.20 #if defined(__x86_64__)
94     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
95     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
96     #else
97 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
98     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
99 gbeauche 1.20 #endif
100 gbeauche 1.1
101 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
102     /* Make sure interpretive core does not use cpuopti */
103     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
104 gbeauche 1.20 #error FIXME: code not ready
105 gbeauche 1.17 #else
106 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
107     by the caller */
108 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
109 gbeauche 1.17 #endif
110 gbeauche 1.1
111     /* This *should* be the same as call_saved. But:
112     - We might not really know which registers are saved, and which aren't,
113     so we need to preserve some, but don't want to rely on everyone else
114     also saving those registers
115     - Special registers (such like the stack pointer) should not be "preserved"
116     by pushing, even though they are "saved" across function calls
117     */
118 gbeauche 1.21 #if defined(__x86_64__)
119 gbeauche 1.32 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
120 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
121     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
122 gbeauche 1.21 #else
123 gbeauche 1.32 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
124     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
125 gbeauche 1.21 #endif
126 gbeauche 1.1
127     /* Whether classes of instructions do or don't clobber the native flags */
128     #define CLOBBER_MOV
129     #define CLOBBER_LEA
130     #define CLOBBER_CMOV
131     #define CLOBBER_POP
132     #define CLOBBER_PUSH
133     #define CLOBBER_SUB clobber_flags()
134     #define CLOBBER_SBB clobber_flags()
135     #define CLOBBER_CMP clobber_flags()
136     #define CLOBBER_ADD clobber_flags()
137     #define CLOBBER_ADC clobber_flags()
138     #define CLOBBER_AND clobber_flags()
139     #define CLOBBER_OR clobber_flags()
140     #define CLOBBER_XOR clobber_flags()
141    
142     #define CLOBBER_ROL clobber_flags()
143     #define CLOBBER_ROR clobber_flags()
144     #define CLOBBER_SHLL clobber_flags()
145     #define CLOBBER_SHRL clobber_flags()
146     #define CLOBBER_SHRA clobber_flags()
147     #define CLOBBER_TEST clobber_flags()
148     #define CLOBBER_CL16
149     #define CLOBBER_CL8
150 gbeauche 1.20 #define CLOBBER_SE32
151 gbeauche 1.1 #define CLOBBER_SE16
152     #define CLOBBER_SE8
153 gbeauche 1.20 #define CLOBBER_ZE32
154 gbeauche 1.1 #define CLOBBER_ZE16
155     #define CLOBBER_ZE8
156     #define CLOBBER_SW16 clobber_flags()
157     #define CLOBBER_SW32
158     #define CLOBBER_SETCC
159     #define CLOBBER_MUL clobber_flags()
160     #define CLOBBER_BT clobber_flags()
161     #define CLOBBER_BSF clobber_flags()
162    
163 gbeauche 1.37 /* The older code generator is now deprecated. */
164 gbeauche 1.20 #define USE_NEW_RTASM 1
165    
166     #if USE_NEW_RTASM
167 gbeauche 1.13
168     #if defined(__x86_64__)
169     #define X86_TARGET_64BIT 1
170 gbeauche 1.35 /* The address override prefix causes a 5 cycles penalty on Intel Core
171     processors. Another solution would be to decompose the load in an LEA,
172     MOV (to zero-extend), MOV (from memory): is it better? */
173     #define ADDR32 x86_emit_byte(0x67),
174     #else
175     #define ADDR32 /**/
176 gbeauche 1.13 #endif
177     #define X86_FLAT_REGISTERS 0
178 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
179     #define X86_OPTIMIZE_ROTSHI 1
180 gbeauche 1.13 #include "codegen_x86.h"
181    
182     #define x86_emit_byte(B) emit_byte(B)
183     #define x86_emit_word(W) emit_word(W)
184     #define x86_emit_long(L) emit_long(L)
185 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
186 gbeauche 1.13 #define x86_get_target() get_target()
187     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
188    
189     static void jit_fail(const char *msg, const char *file, int line, const char *function)
190     {
191     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
192     function, file, line, msg);
193     abort();
194     }
195    
196     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
197     {
198 gbeauche 1.20 #if defined(__x86_64__)
199     PUSHQr(r);
200     #else
201 gbeauche 1.13 PUSHLr(r);
202 gbeauche 1.20 #endif
203 gbeauche 1.13 }
204     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
205    
206     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
207     {
208 gbeauche 1.20 #if defined(__x86_64__)
209     POPQr(r);
210     #else
211 gbeauche 1.13 POPLr(r);
212 gbeauche 1.20 #endif
213 gbeauche 1.13 }
214     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
215    
216 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
217     {
218     #if defined(__x86_64__)
219     POPQm(d, X86_NOREG, X86_NOREG, 1);
220     #else
221     POPLm(d, X86_NOREG, X86_NOREG, 1);
222     #endif
223     }
224     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
225    
226 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
227     {
228     BTLir(i, r);
229     }
230     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
231    
232     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
233     {
234     BTLrr(b, r);
235     }
236     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
237    
238     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
239     {
240     BTCLir(i, r);
241     }
242     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
243    
244     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
245     {
246     BTCLrr(b, r);
247     }
248     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
249    
250     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
251     {
252     BTRLir(i, r);
253     }
254     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
255    
256     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
257     {
258     BTRLrr(b, r);
259     }
260     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
261    
262     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
263     {
264     BTSLir(i, r);
265     }
266     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
267    
268     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
269     {
270     BTSLrr(b, r);
271     }
272     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
273    
274     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
275     {
276     SUBWir(i, d);
277     }
278     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
279    
280     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
281     {
282     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
283     }
284     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
285    
286     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
287     {
288     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
289     }
290     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
291    
292     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
293     {
294     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
295     }
296     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
297    
298     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
299     {
300     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
301     }
302     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
303    
304     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
305     {
306     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
307     }
308     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
309    
310     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
311     {
312     ROLBir(i, r);
313     }
314     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
315    
316     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
317     {
318     ROLWir(i, r);
319     }
320     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
321    
322     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
323     {
324     ROLLir(i, r);
325     }
326     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
327    
328     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
329     {
330     ROLLrr(r, d);
331     }
332     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
333    
334     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
335     {
336     ROLWrr(r, d);
337     }
338     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
339    
340     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
341     {
342     ROLBrr(r, d);
343     }
344     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
345    
346     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
347     {
348     SHLLrr(r, d);
349     }
350     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
351    
352     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
353     {
354     SHLWrr(r, d);
355     }
356     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
357    
358     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
359     {
360     SHLBrr(r, d);
361     }
362     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
363    
364     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
365     {
366     RORBir(i, r);
367     }
368     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
369    
370     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
371     {
372     RORWir(i, r);
373     }
374     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
375    
376     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
377     {
378     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
379     }
380     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
381    
382     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
383     {
384     RORLir(i, r);
385     }
386     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
387    
388     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
389     {
390     RORLrr(r, d);
391     }
392     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
393    
394     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
395     {
396     RORWrr(r, d);
397     }
398     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
399    
400     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
401     {
402     RORBrr(r, d);
403     }
404     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
405    
406     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
407     {
408     SHRLrr(r, d);
409     }
410     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
411    
412     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
413     {
414     SHRWrr(r, d);
415     }
416     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
417    
418     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
419     {
420     SHRBrr(r, d);
421     }
422     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
423    
424     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
425     {
426 gbeauche 1.14 SARLrr(r, d);
427 gbeauche 1.13 }
428     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
429    
430     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
431     {
432 gbeauche 1.14 SARWrr(r, d);
433 gbeauche 1.13 }
434     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
435    
436     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
437     {
438 gbeauche 1.14 SARBrr(r, d);
439 gbeauche 1.13 }
440     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
441    
442     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
443     {
444     SHLLir(i, r);
445     }
446     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
447    
448     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
449     {
450     SHLWir(i, r);
451     }
452     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
453    
454     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
455     {
456     SHLBir(i, r);
457     }
458     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
459    
460     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
461     {
462     SHRLir(i, r);
463     }
464     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
465    
466     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
467     {
468     SHRWir(i, r);
469     }
470     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
471    
472     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
473     {
474     SHRBir(i, r);
475     }
476     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
477    
478     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
479     {
480 gbeauche 1.14 SARLir(i, r);
481 gbeauche 1.13 }
482     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
483    
484     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
485     {
486 gbeauche 1.14 SARWir(i, r);
487 gbeauche 1.13 }
488     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
489    
490     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
491     {
492 gbeauche 1.14 SARBir(i, r);
493 gbeauche 1.13 }
494     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
495    
496     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
497     {
498     SAHF();
499     }
500     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
501    
502     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
503     {
504     CPUID();
505     }
506     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
507    
508     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
509     {
510     LAHF();
511     }
512     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
513    
514     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
515     {
516     SETCCir(cc, d);
517     }
518     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
519    
520     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
521     {
522     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
523     }
524     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
525    
526 gbeauche 1.39 LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
527     {
528     /* replacement using branch and mov */
529     int8 *target_p = (int8 *)x86_get_target() + 1;
530     JCCSii(cc^1, 0);
531     MOVBrr(s, d);
532     *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
533     }
534     LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
535    
536     LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
537     {
538     if (have_cmov)
539     CMOVWrr(cc, s, d);
540     else { /* replacement using branch and mov */
541     int8 *target_p = (int8 *)x86_get_target() + 1;
542     JCCSii(cc^1, 0);
543     MOVWrr(s, d);
544     *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
545     }
546     }
547     LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
548    
549 gbeauche 1.13 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
550     {
551 gbeauche 1.15 if (have_cmov)
552     CMOVLrr(cc, s, d);
553     else { /* replacement using branch and mov */
554 gbeauche 1.38 int8 *target_p = (int8 *)x86_get_target() + 1;
555     JCCSii(cc^1, 0);
556 gbeauche 1.15 MOVLrr(s, d);
557 gbeauche 1.38 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
558 gbeauche 1.15 }
559 gbeauche 1.13 }
560     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
561    
562     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
563     {
564     BSFLrr(s, d);
565     }
566     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
567    
568 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
569     {
570     MOVSLQrr(s, d);
571     }
572     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
573    
574 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
575     {
576     MOVSWLrr(s, d);
577     }
578     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
579    
580     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
581     {
582     MOVSBLrr(s, d);
583     }
584     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
585    
586     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
587     {
588     MOVZWLrr(s, d);
589     }
590     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
591    
592     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
593     {
594     MOVZBLrr(s, d);
595     }
596     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
597    
598     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
599     {
600 gbeauche 1.14 IMULLrr(s, d);
601 gbeauche 1.13 }
602     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
603    
604     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
605     {
606 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
607     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
608 gbeauche 1.13 abort();
609 gbeauche 1.14 }
610     IMULLr(s);
611 gbeauche 1.13 }
612     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
613    
614     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
615     {
616 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
617     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
618 gbeauche 1.13 abort();
619 gbeauche 1.14 }
620     MULLr(s);
621 gbeauche 1.13 }
622     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
623    
624     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
625     {
626 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
627 gbeauche 1.13 }
628     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
629    
630     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
631     {
632     MOVBrr(s, d);
633     }
634     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
635    
636     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
637     {
638     MOVWrr(s, d);
639     }
640     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
641    
642     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
643     {
644 gbeauche 1.35 ADDR32 MOVLmr(0, baser, index, factor, d);
645 gbeauche 1.13 }
646     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
647    
648     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
649     {
650 gbeauche 1.35 ADDR32 MOVWmr(0, baser, index, factor, d);
651 gbeauche 1.13 }
652     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
653    
654     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
655     {
656 gbeauche 1.35 ADDR32 MOVBmr(0, baser, index, factor, d);
657 gbeauche 1.13 }
658     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
659    
660     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
661     {
662 gbeauche 1.35 ADDR32 MOVLrm(s, 0, baser, index, factor);
663 gbeauche 1.13 }
664     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
665    
666     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
667     {
668 gbeauche 1.35 ADDR32 MOVWrm(s, 0, baser, index, factor);
669 gbeauche 1.13 }
670     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
671    
672     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
673     {
674 gbeauche 1.35 ADDR32 MOVBrm(s, 0, baser, index, factor);
675 gbeauche 1.13 }
676     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
677    
678     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
679     {
680 gbeauche 1.35 ADDR32 MOVLrm(s, base, baser, index, factor);
681 gbeauche 1.13 }
682     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
683    
684     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
685     {
686 gbeauche 1.35 ADDR32 MOVWrm(s, base, baser, index, factor);
687 gbeauche 1.13 }
688     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
689    
690     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
691     {
692 gbeauche 1.35 ADDR32 MOVBrm(s, base, baser, index, factor);
693 gbeauche 1.13 }
694     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
695    
696     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
697     {
698 gbeauche 1.35 ADDR32 MOVLmr(base, baser, index, factor, d);
699 gbeauche 1.13 }
700     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
701    
702     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
703     {
704 gbeauche 1.35 ADDR32 MOVWmr(base, baser, index, factor, d);
705 gbeauche 1.13 }
706     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
707    
708     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
709     {
710 gbeauche 1.35 ADDR32 MOVBmr(base, baser, index, factor, d);
711 gbeauche 1.13 }
712     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
713    
714     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
715     {
716 gbeauche 1.35 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
717 gbeauche 1.13 }
718     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
719    
720     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
721     {
722 gbeauche 1.15 if (have_cmov)
723 gbeauche 1.35 ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
724 gbeauche 1.15 else { /* replacement using branch and mov */
725 gbeauche 1.38 int8 *target_p = (int8 *)x86_get_target() + 1;
726     JCCSii(cond^1, 0);
727 gbeauche 1.35 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
728 gbeauche 1.38 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
729 gbeauche 1.15 }
730 gbeauche 1.13 }
731     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
732    
733     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
734     {
735 gbeauche 1.15 if (have_cmov)
736     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
737     else { /* replacement using branch and mov */
738 gbeauche 1.38 int8 *target_p = (int8 *)x86_get_target() + 1;
739     JCCSii(cond^1, 0);
740 gbeauche 1.15 MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
741 gbeauche 1.38 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
742 gbeauche 1.15 }
743 gbeauche 1.13 }
744     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
745    
746     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
747     {
748 gbeauche 1.35 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
749 gbeauche 1.13 }
750     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
751    
752     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
753     {
754 gbeauche 1.35 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
755 gbeauche 1.13 }
756     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
757    
758     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
759     {
760 gbeauche 1.35 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
761 gbeauche 1.13 }
762     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
763    
764     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
765     {
766 gbeauche 1.35 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
767 gbeauche 1.13 }
768     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
769    
770     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
771     {
772 gbeauche 1.35 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
773 gbeauche 1.13 }
774     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
775    
776     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
777     {
778 gbeauche 1.35 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
779 gbeauche 1.13 }
780     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
781    
782     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
783     {
784 gbeauche 1.35 ADDR32 MOVLim(i, offset, d, X86_NOREG, 1);
785 gbeauche 1.13 }
786     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
787    
788     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
789     {
790 gbeauche 1.35 ADDR32 MOVWim(i, offset, d, X86_NOREG, 1);
791 gbeauche 1.13 }
792     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
793    
794     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
795     {
796 gbeauche 1.35 ADDR32 MOVBim(i, offset, d, X86_NOREG, 1);
797 gbeauche 1.13 }
798     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
799    
800     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
801     {
802 gbeauche 1.35 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
803 gbeauche 1.13 }
804     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
805    
806     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
807     {
808 gbeauche 1.35 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
809 gbeauche 1.13 }
810     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
811    
812     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
813     {
814 gbeauche 1.35 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
815 gbeauche 1.13 }
816     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
817    
818     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
819     {
820     LEALmr(offset, s, X86_NOREG, 1, d);
821     }
822     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
823    
824     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
825     {
826     LEALmr(offset, s, index, factor, d);
827     }
828     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
829    
830     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
831     {
832     LEALmr(0, s, index, factor, d);
833     }
834     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
835    
836 gbeauche 1.36 LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
837     {
838     LEALmr(0, X86_NOREG, index, factor, d);
839     }
840     LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
841    
842 gbeauche 1.13 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
843     {
844 gbeauche 1.35 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
845 gbeauche 1.13 }
846     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
847    
848     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
849     {
850 gbeauche 1.35 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
851 gbeauche 1.13 }
852     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
853    
854     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
855     {
856 gbeauche 1.35 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
857 gbeauche 1.13 }
858     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
859    
860     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
861     {
862     BSWAPLr(r);
863     }
864     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
865    
866     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
867     {
868     ROLWir(8, r);
869     }
870     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
871    
872     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
873     {
874     MOVLrr(s, d);
875     }
876     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
877    
878     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
879     {
880     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
881     }
882     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
883    
884     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
885     {
886     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
887     }
888     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
889    
890     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
891     {
892     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
893     }
894     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
895    
896     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
897     {
898     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
899     }
900     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
901    
902     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
903     {
904     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
905     }
906     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
907    
908     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
909     {
910     MOVLir(s, d);
911     }
912     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
913    
914     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
915     {
916     MOVWir(s, d);
917     }
918     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
919    
920     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
921     {
922     MOVBir(s, d);
923     }
924     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
925    
926     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
927     {
928     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
929     }
930     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
931    
932     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
933     {
934     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
935     }
936     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
937    
938     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
939     {
940     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
941     }
942     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
943    
944     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
945     {
946     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
947     }
948     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
949    
950     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
951     {
952     TESTLir(i, d);
953     }
954     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
955    
956     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
957     {
958     TESTLrr(s, d);
959     }
960     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
961    
962     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
963     {
964     TESTWrr(s, d);
965     }
966     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
967    
968     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
969     {
970     TESTBrr(s, d);
971     }
972     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
973    
974 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
975     {
976     XORLir(i, d);
977     }
978     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
979    
980 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
981     {
982     ANDLir(i, d);
983     }
984     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
985    
986     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
987     {
988     ANDWir(i, d);
989     }
990     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
991    
992     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
993     {
994     ANDLrr(s, d);
995     }
996     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
997    
998     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
999     {
1000     ANDWrr(s, d);
1001     }
1002     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1003    
1004     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1005     {
1006     ANDBrr(s, d);
1007     }
1008     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1009    
1010     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1011     {
1012     ORLir(i, d);
1013     }
1014     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1015    
1016     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1017     {
1018     ORLrr(s, d);
1019     }
1020     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1021    
1022     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1023     {
1024     ORWrr(s, d);
1025     }
1026     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1027    
1028     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1029     {
1030     ORBrr(s, d);
1031     }
1032     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1033    
1034     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1035     {
1036     ADCLrr(s, d);
1037     }
1038     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1039    
1040     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1041     {
1042     ADCWrr(s, d);
1043     }
1044     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1045    
1046     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1047     {
1048     ADCBrr(s, d);
1049     }
1050     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1051    
1052     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1053     {
1054     ADDLrr(s, d);
1055     }
1056     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1057    
1058     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1059     {
1060     ADDWrr(s, d);
1061     }
1062     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1063    
1064     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1065     {
1066     ADDBrr(s, d);
1067     }
1068     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1069    
1070     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1071     {
1072     SUBLir(i, d);
1073     }
1074     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1075    
1076     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1077     {
1078     SUBBir(i, d);
1079     }
1080     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1081    
1082     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1083     {
1084     ADDLir(i, d);
1085     }
1086     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1087    
1088     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1089     {
1090     ADDWir(i, d);
1091     }
1092     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1093    
1094     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1095     {
1096     ADDBir(i, d);
1097     }
1098     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1099    
1100     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1101     {
1102     SBBLrr(s, d);
1103     }
1104     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1105    
1106     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1107     {
1108     SBBWrr(s, d);
1109     }
1110     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1111    
1112     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1113     {
1114     SBBBrr(s, d);
1115     }
1116     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1117    
1118     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1119     {
1120     SUBLrr(s, d);
1121     }
1122     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1123    
1124     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1125     {
1126     SUBWrr(s, d);
1127     }
1128     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1129    
1130     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1131     {
1132     SUBBrr(s, d);
1133     }
1134     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1135    
1136     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1137     {
1138     CMPLrr(s, d);
1139     }
1140     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1141    
1142     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1143     {
1144     CMPLir(i, r);
1145     }
1146     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1147    
1148     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1149     {
1150     CMPWrr(s, d);
1151     }
1152     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1153    
1154     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1155     {
1156     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1157     }
1158     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1159    
1160     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1161     {
1162     CMPBir(i, d);
1163     }
1164     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1165    
1166     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1167     {
1168     CMPBrr(s, d);
1169     }
1170     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1171    
1172     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1173     {
1174 gbeauche 1.35 ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d);
1175 gbeauche 1.13 }
1176     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1177    
1178     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1179     {
1180     XORLrr(s, d);
1181     }
1182     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1183    
1184     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1185     {
1186     XORWrr(s, d);
1187     }
1188     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1189    
1190     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1191     {
1192     XORBrr(s, d);
1193     }
1194     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1195    
1196     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1197     {
1198     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1199     }
1200     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1201    
1202     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1203     {
1204     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1205     }
1206     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1207    
1208     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1209     {
1210     XCHGLrr(r2, r1);
1211     }
1212     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1213    
1214 gbeauche 1.36 LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1215     {
1216     XCHGBrr(r2, r1);
1217     }
1218     LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1219    
1220 gbeauche 1.13 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1221     {
1222 gbeauche 1.18 PUSHF();
1223 gbeauche 1.13 }
1224     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1225    
1226     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1227     {
1228 gbeauche 1.18 POPF();
1229 gbeauche 1.13 }
1230     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1231    
1232 gbeauche 1.34 /* Generate floating-point instructions */
1233     static inline void x86_fadd_m(MEMR s)
1234     {
1235     FADDLm(s,X86_NOREG,X86_NOREG,1);
1236     }
1237    
1238 gbeauche 1.13 #else
1239    
1240 gbeauche 1.2 const bool optimize_accum = true;
1241 gbeauche 1.1 const bool optimize_imm8 = true;
1242     const bool optimize_shift_once = true;
1243    
1244     /*************************************************************************
1245     * Actual encoding of the instructions on the target CPU *
1246     *************************************************************************/
1247    
1248 gbeauche 1.2 static __inline__ int isaccum(int r)
1249     {
1250     return (r == EAX_INDEX);
1251     }
1252    
1253 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1254     {
1255     return (x>=-128 && x<=127);
1256     }
1257    
1258     static __inline__ int isword(uae_s32 x)
1259     {
1260     return (x>=-32768 && x<=32767);
1261     }
1262    
1263     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1264     {
1265     emit_byte(0x50+r);
1266     }
1267     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1268    
1269     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1270     {
1271     emit_byte(0x58+r);
1272     }
1273     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1274    
1275 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1276     {
1277     emit_byte(0x8f);
1278     emit_byte(0x05);
1279     emit_long(d);
1280     }
1281     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1282    
1283 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1284     {
1285     emit_byte(0x0f);
1286     emit_byte(0xba);
1287     emit_byte(0xe0+r);
1288     emit_byte(i);
1289     }
1290     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1291    
1292     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1293     {
1294     emit_byte(0x0f);
1295     emit_byte(0xa3);
1296     emit_byte(0xc0+8*b+r);
1297     }
1298     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1299    
1300     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1301     {
1302     emit_byte(0x0f);
1303     emit_byte(0xba);
1304     emit_byte(0xf8+r);
1305     emit_byte(i);
1306     }
1307     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1308    
1309     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1310     {
1311     emit_byte(0x0f);
1312     emit_byte(0xbb);
1313     emit_byte(0xc0+8*b+r);
1314     }
1315     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1316    
1317    
1318     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1319     {
1320     emit_byte(0x0f);
1321     emit_byte(0xba);
1322     emit_byte(0xf0+r);
1323     emit_byte(i);
1324     }
1325     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1326    
1327     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1328     {
1329     emit_byte(0x0f);
1330     emit_byte(0xb3);
1331     emit_byte(0xc0+8*b+r);
1332     }
1333     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1334    
1335     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1336     {
1337     emit_byte(0x0f);
1338     emit_byte(0xba);
1339     emit_byte(0xe8+r);
1340     emit_byte(i);
1341     }
1342     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1343    
1344     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1345     {
1346     emit_byte(0x0f);
1347     emit_byte(0xab);
1348     emit_byte(0xc0+8*b+r);
1349     }
1350     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1351    
1352     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1353     {
1354     emit_byte(0x66);
1355     if (isbyte(i)) {
1356     emit_byte(0x83);
1357     emit_byte(0xe8+d);
1358     emit_byte(i);
1359     }
1360     else {
1361 gbeauche 1.2 if (optimize_accum && isaccum(d))
1362     emit_byte(0x2d);
1363     else {
1364 gbeauche 1.1 emit_byte(0x81);
1365     emit_byte(0xe8+d);
1366 gbeauche 1.2 }
1367 gbeauche 1.1 emit_word(i);
1368     }
1369     }
1370     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1371    
1372    
1373     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1374     {
1375     emit_byte(0x8b);
1376     emit_byte(0x05+8*d);
1377     emit_long(s);
1378     }
1379     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1380    
1381     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1382     {
1383     emit_byte(0xc7);
1384     emit_byte(0x05);
1385     emit_long(d);
1386     emit_long(s);
1387     }
1388     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1389    
1390     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1391     {
1392     emit_byte(0x66);
1393     emit_byte(0xc7);
1394     emit_byte(0x05);
1395     emit_long(d);
1396     emit_word(s);
1397     }
1398     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1399    
1400     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1401     {
1402     emit_byte(0xc6);
1403     emit_byte(0x05);
1404     emit_long(d);
1405     emit_byte(s);
1406     }
1407     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1408    
1409     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1410     {
1411     if (optimize_shift_once && (i == 1)) {
1412     emit_byte(0xd0);
1413     emit_byte(0x05);
1414     emit_long(d);
1415     }
1416     else {
1417     emit_byte(0xc0);
1418     emit_byte(0x05);
1419     emit_long(d);
1420     emit_byte(i);
1421     }
1422     }
1423     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1424    
1425     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1426     {
1427     if (optimize_shift_once && (i == 1)) {
1428     emit_byte(0xd0);
1429     emit_byte(0xc0+r);
1430     }
1431     else {
1432     emit_byte(0xc0);
1433     emit_byte(0xc0+r);
1434     emit_byte(i);
1435     }
1436     }
1437     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1438    
1439     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1440     {
1441     emit_byte(0x66);
1442     emit_byte(0xc1);
1443     emit_byte(0xc0+r);
1444     emit_byte(i);
1445     }
1446     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1447    
1448     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1449     {
1450     if (optimize_shift_once && (i == 1)) {
1451     emit_byte(0xd1);
1452     emit_byte(0xc0+r);
1453     }
1454     else {
1455     emit_byte(0xc1);
1456     emit_byte(0xc0+r);
1457     emit_byte(i);
1458     }
1459     }
1460     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1461    
1462     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1463     {
1464     emit_byte(0xd3);
1465     emit_byte(0xc0+d);
1466     }
1467     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1468    
1469     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1470     {
1471     emit_byte(0x66);
1472     emit_byte(0xd3);
1473     emit_byte(0xc0+d);
1474     }
1475     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1476    
1477     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1478     {
1479     emit_byte(0xd2);
1480     emit_byte(0xc0+d);
1481     }
1482     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1483    
1484     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1485     {
1486     emit_byte(0xd3);
1487     emit_byte(0xe0+d);
1488     }
1489     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1490    
1491     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1492     {
1493     emit_byte(0x66);
1494     emit_byte(0xd3);
1495     emit_byte(0xe0+d);
1496     }
1497     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1498    
1499     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1500     {
1501     emit_byte(0xd2);
1502     emit_byte(0xe0+d);
1503     }
1504     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1505    
1506     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1507     {
1508     if (optimize_shift_once && (i == 1)) {
1509     emit_byte(0xd0);
1510     emit_byte(0xc8+r);
1511     }
1512     else {
1513     emit_byte(0xc0);
1514     emit_byte(0xc8+r);
1515     emit_byte(i);
1516     }
1517     }
1518     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1519    
1520     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1521     {
1522     emit_byte(0x66);
1523     emit_byte(0xc1);
1524     emit_byte(0xc8+r);
1525     emit_byte(i);
1526     }
1527     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1528    
1529     // gb-- used for making an fpcr value in compemu_fpp.cpp
1530     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1531     {
1532     emit_byte(0x0b);
1533     emit_byte(0x05+8*d);
1534     emit_long(s);
1535     }
1536     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1537    
1538     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1539     {
1540     if (optimize_shift_once && (i == 1)) {
1541     emit_byte(0xd1);
1542     emit_byte(0xc8+r);
1543     }
1544     else {
1545     emit_byte(0xc1);
1546     emit_byte(0xc8+r);
1547     emit_byte(i);
1548     }
1549     }
1550     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1551    
1552     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1553     {
1554     emit_byte(0xd3);
1555     emit_byte(0xc8+d);
1556     }
1557     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1558    
1559     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1560     {
1561     emit_byte(0x66);
1562     emit_byte(0xd3);
1563     emit_byte(0xc8+d);
1564     }
1565     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1566    
1567     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1568     {
1569     emit_byte(0xd2);
1570     emit_byte(0xc8+d);
1571     }
1572     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1573    
1574     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1575     {
1576     emit_byte(0xd3);
1577     emit_byte(0xe8+d);
1578     }
1579     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1580    
1581     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1582     {
1583     emit_byte(0x66);
1584     emit_byte(0xd3);
1585     emit_byte(0xe8+d);
1586     }
1587     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1588    
1589     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1590     {
1591     emit_byte(0xd2);
1592     emit_byte(0xe8+d);
1593     }
1594     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1595    
1596     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1597     {
1598     emit_byte(0xd3);
1599     emit_byte(0xf8+d);
1600     }
1601     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1602    
1603     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1604     {
1605     emit_byte(0x66);
1606     emit_byte(0xd3);
1607     emit_byte(0xf8+d);
1608     }
1609     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1610    
1611     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1612     {
1613     emit_byte(0xd2);
1614     emit_byte(0xf8+d);
1615     }
1616     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1617    
1618     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1619     {
1620     if (optimize_shift_once && (i == 1)) {
1621     emit_byte(0xd1);
1622     emit_byte(0xe0+r);
1623     }
1624     else {
1625     emit_byte(0xc1);
1626     emit_byte(0xe0+r);
1627     emit_byte(i);
1628     }
1629     }
1630     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1631    
1632     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1633     {
1634     emit_byte(0x66);
1635     emit_byte(0xc1);
1636     emit_byte(0xe0+r);
1637     emit_byte(i);
1638     }
1639     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1640    
1641     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1642     {
1643     if (optimize_shift_once && (i == 1)) {
1644     emit_byte(0xd0);
1645     emit_byte(0xe0+r);
1646     }
1647     else {
1648     emit_byte(0xc0);
1649     emit_byte(0xe0+r);
1650     emit_byte(i);
1651     }
1652     }
1653     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1654    
1655     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1656     {
1657     if (optimize_shift_once && (i == 1)) {
1658     emit_byte(0xd1);
1659     emit_byte(0xe8+r);
1660     }
1661     else {
1662     emit_byte(0xc1);
1663     emit_byte(0xe8+r);
1664     emit_byte(i);
1665     }
1666     }
1667     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1668    
1669     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1670     {
1671     emit_byte(0x66);
1672     emit_byte(0xc1);
1673     emit_byte(0xe8+r);
1674     emit_byte(i);
1675     }
1676     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1677    
1678     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1679     {
1680     if (optimize_shift_once && (i == 1)) {
1681     emit_byte(0xd0);
1682     emit_byte(0xe8+r);
1683     }
1684     else {
1685     emit_byte(0xc0);
1686     emit_byte(0xe8+r);
1687     emit_byte(i);
1688     }
1689     }
1690     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1691    
1692     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1693     {
1694     if (optimize_shift_once && (i == 1)) {
1695     emit_byte(0xd1);
1696     emit_byte(0xf8+r);
1697     }
1698     else {
1699     emit_byte(0xc1);
1700     emit_byte(0xf8+r);
1701     emit_byte(i);
1702     }
1703     }
1704     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1705    
1706     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1707     {
1708     emit_byte(0x66);
1709     emit_byte(0xc1);
1710     emit_byte(0xf8+r);
1711     emit_byte(i);
1712     }
1713     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1714    
1715     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1716     {
1717     if (optimize_shift_once && (i == 1)) {
1718     emit_byte(0xd0);
1719     emit_byte(0xf8+r);
1720     }
1721     else {
1722     emit_byte(0xc0);
1723     emit_byte(0xf8+r);
1724     emit_byte(i);
1725     }
1726     }
1727     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1728    
1729     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1730     {
1731     emit_byte(0x9e);
1732     }
1733     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1734    
1735     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1736     {
1737     emit_byte(0x0f);
1738     emit_byte(0xa2);
1739     }
1740     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1741    
1742     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1743     {
1744     emit_byte(0x9f);
1745     }
1746     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1747    
1748     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1749     {
1750     emit_byte(0x0f);
1751     emit_byte(0x90+cc);
1752     emit_byte(0xc0+d);
1753     }
1754     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1755    
1756     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1757     {
1758     emit_byte(0x0f);
1759     emit_byte(0x90+cc);
1760     emit_byte(0x05);
1761     emit_long(d);
1762     }
1763     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1764    
1765 gbeauche 1.39 LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
1766     {
1767     /* replacement using branch and mov */
1768     int uncc=(cc^1);
1769     emit_byte(0x70+uncc);
1770     emit_byte(3); /* skip next 2 bytes if not cc=true */
1771     emit_byte(0x88);
1772     emit_byte(0xc0+8*s+d);
1773     }
1774     LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
1775    
1776     LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
1777     {
1778     if (have_cmov) {
1779     emit_byte(0x66);
1780     emit_byte(0x0f);
1781     emit_byte(0x40+cc);
1782     emit_byte(0xc0+8*d+s);
1783     }
1784     else { /* replacement using branch and mov */
1785     int uncc=(cc^1);
1786     emit_byte(0x70+uncc);
1787     emit_byte(3); /* skip next 3 bytes if not cc=true */
1788     emit_byte(0x66);
1789     emit_byte(0x89);
1790     emit_byte(0xc0+8*s+d);
1791     }
1792     }
1793     LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
1794    
1795 gbeauche 1.1 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1796     {
1797     if (have_cmov) {
1798     emit_byte(0x0f);
1799     emit_byte(0x40+cc);
1800     emit_byte(0xc0+8*d+s);
1801     }
1802     else { /* replacement using branch and mov */
1803     int uncc=(cc^1);
1804     emit_byte(0x70+uncc);
1805     emit_byte(2); /* skip next 2 bytes if not cc=true */
1806     emit_byte(0x89);
1807     emit_byte(0xc0+8*s+d);
1808     }
1809     }
1810     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1811    
1812     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1813     {
1814     emit_byte(0x0f);
1815     emit_byte(0xbc);
1816     emit_byte(0xc0+8*d+s);
1817     }
1818     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1819    
1820     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1821     {
1822     emit_byte(0x0f);
1823     emit_byte(0xbf);
1824     emit_byte(0xc0+8*d+s);
1825     }
1826     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1827    
1828     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1829     {
1830     emit_byte(0x0f);
1831     emit_byte(0xbe);
1832     emit_byte(0xc0+8*d+s);
1833     }
1834     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1835    
1836     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1837     {
1838     emit_byte(0x0f);
1839     emit_byte(0xb7);
1840     emit_byte(0xc0+8*d+s);
1841     }
1842     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1843    
1844     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1845     {
1846     emit_byte(0x0f);
1847     emit_byte(0xb6);
1848     emit_byte(0xc0+8*d+s);
1849     }
1850     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1851    
1852     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1853     {
1854     emit_byte(0x0f);
1855     emit_byte(0xaf);
1856     emit_byte(0xc0+8*d+s);
1857     }
1858     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1859    
1860     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1861     {
1862     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1863     abort();
1864     emit_byte(0xf7);
1865     emit_byte(0xea);
1866     }
1867     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1868    
1869     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1870     {
1871     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1872     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1873     abort();
1874     }
1875     emit_byte(0xf7);
1876     emit_byte(0xe2);
1877     }
1878     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1879    
1880     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1881     {
1882     abort(); /* %^$&%^$%#^ x86! */
1883     emit_byte(0x0f);
1884     emit_byte(0xaf);
1885     emit_byte(0xc0+8*d+s);
1886     }
1887     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1888    
1889     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1890     {
1891     emit_byte(0x88);
1892     emit_byte(0xc0+8*s+d);
1893     }
1894     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1895    
1896     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1897     {
1898     emit_byte(0x66);
1899     emit_byte(0x89);
1900     emit_byte(0xc0+8*s+d);
1901     }
1902     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1903    
1904     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1905     {
1906     int isebp=(baser==5)?0x40:0;
1907     int fi;
1908    
1909     switch(factor) {
1910     case 1: fi=0; break;
1911     case 2: fi=1; break;
1912     case 4: fi=2; break;
1913     case 8: fi=3; break;
1914     default: abort();
1915     }
1916    
1917    
1918     emit_byte(0x8b);
1919     emit_byte(0x04+8*d+isebp);
1920     emit_byte(baser+8*index+0x40*fi);
1921     if (isebp)
1922     emit_byte(0x00);
1923     }
1924     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1925    
1926     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1927     {
1928     int fi;
1929     int isebp;
1930    
1931     switch(factor) {
1932     case 1: fi=0; break;
1933     case 2: fi=1; break;
1934     case 4: fi=2; break;
1935     case 8: fi=3; break;
1936     default: abort();
1937     }
1938     isebp=(baser==5)?0x40:0;
1939    
1940     emit_byte(0x66);
1941     emit_byte(0x8b);
1942     emit_byte(0x04+8*d+isebp);
1943     emit_byte(baser+8*index+0x40*fi);
1944     if (isebp)
1945     emit_byte(0x00);
1946     }
1947     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1948    
1949     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1950     {
1951     int fi;
1952     int isebp;
1953    
1954     switch(factor) {
1955     case 1: fi=0; break;
1956     case 2: fi=1; break;
1957     case 4: fi=2; break;
1958     case 8: fi=3; break;
1959     default: abort();
1960     }
1961     isebp=(baser==5)?0x40:0;
1962    
1963     emit_byte(0x8a);
1964     emit_byte(0x04+8*d+isebp);
1965     emit_byte(baser+8*index+0x40*fi);
1966     if (isebp)
1967     emit_byte(0x00);
1968     }
1969     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1970    
1971     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1972     {
1973     int fi;
1974     int isebp;
1975    
1976     switch(factor) {
1977     case 1: fi=0; break;
1978     case 2: fi=1; break;
1979     case 4: fi=2; break;
1980     case 8: fi=3; break;
1981     default: abort();
1982     }
1983    
1984    
1985     isebp=(baser==5)?0x40:0;
1986    
1987     emit_byte(0x89);
1988     emit_byte(0x04+8*s+isebp);
1989     emit_byte(baser+8*index+0x40*fi);
1990     if (isebp)
1991     emit_byte(0x00);
1992     }
1993     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1994    
1995     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1996     {
1997     int fi;
1998     int isebp;
1999    
2000     switch(factor) {
2001     case 1: fi=0; break;
2002     case 2: fi=1; break;
2003     case 4: fi=2; break;
2004     case 8: fi=3; break;
2005     default: abort();
2006     }
2007     isebp=(baser==5)?0x40:0;
2008    
2009     emit_byte(0x66);
2010     emit_byte(0x89);
2011     emit_byte(0x04+8*s+isebp);
2012     emit_byte(baser+8*index+0x40*fi);
2013     if (isebp)
2014     emit_byte(0x00);
2015     }
2016     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2017    
2018     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2019     {
2020     int fi;
2021     int isebp;
2022    
2023     switch(factor) {
2024     case 1: fi=0; break;
2025     case 2: fi=1; break;
2026     case 4: fi=2; break;
2027     case 8: fi=3; break;
2028     default: abort();
2029     }
2030     isebp=(baser==5)?0x40:0;
2031    
2032     emit_byte(0x88);
2033     emit_byte(0x04+8*s+isebp);
2034     emit_byte(baser+8*index+0x40*fi);
2035     if (isebp)
2036     emit_byte(0x00);
2037     }
2038     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2039    
2040     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2041     {
2042     int fi;
2043    
2044     switch(factor) {
2045     case 1: fi=0; break;
2046     case 2: fi=1; break;
2047     case 4: fi=2; break;
2048     case 8: fi=3; break;
2049     default: abort();
2050     }
2051    
2052     emit_byte(0x89);
2053     emit_byte(0x84+8*s);
2054     emit_byte(baser+8*index+0x40*fi);
2055     emit_long(base);
2056     }
2057     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2058    
2059     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2060     {
2061     int fi;
2062    
2063     switch(factor) {
2064     case 1: fi=0; break;
2065     case 2: fi=1; break;
2066     case 4: fi=2; break;
2067     case 8: fi=3; break;
2068     default: abort();
2069     }
2070    
2071     emit_byte(0x66);
2072     emit_byte(0x89);
2073     emit_byte(0x84+8*s);
2074     emit_byte(baser+8*index+0x40*fi);
2075     emit_long(base);
2076     }
2077     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2078    
2079     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2080     {
2081     int fi;
2082    
2083     switch(factor) {
2084     case 1: fi=0; break;
2085     case 2: fi=1; break;
2086     case 4: fi=2; break;
2087     case 8: fi=3; break;
2088     default: abort();
2089     }
2090    
2091     emit_byte(0x88);
2092     emit_byte(0x84+8*s);
2093     emit_byte(baser+8*index+0x40*fi);
2094     emit_long(base);
2095     }
2096     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2097    
2098     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2099     {
2100     int fi;
2101    
2102     switch(factor) {
2103     case 1: fi=0; break;
2104     case 2: fi=1; break;
2105     case 4: fi=2; break;
2106     case 8: fi=3; break;
2107     default: abort();
2108     }
2109    
2110     emit_byte(0x8b);
2111     emit_byte(0x84+8*d);
2112     emit_byte(baser+8*index+0x40*fi);
2113     emit_long(base);
2114     }
2115     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2116    
2117     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2118     {
2119     int fi;
2120    
2121     switch(factor) {
2122     case 1: fi=0; break;
2123     case 2: fi=1; break;
2124     case 4: fi=2; break;
2125     case 8: fi=3; break;
2126     default: abort();
2127     }
2128    
2129     emit_byte(0x66);
2130     emit_byte(0x8b);
2131     emit_byte(0x84+8*d);
2132     emit_byte(baser+8*index+0x40*fi);
2133     emit_long(base);
2134     }
2135     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2136    
2137     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2138     {
2139     int fi;
2140    
2141     switch(factor) {
2142     case 1: fi=0; break;
2143     case 2: fi=1; break;
2144     case 4: fi=2; break;
2145     case 8: fi=3; break;
2146     default: abort();
2147     }
2148    
2149     emit_byte(0x8a);
2150     emit_byte(0x84+8*d);
2151     emit_byte(baser+8*index+0x40*fi);
2152     emit_long(base);
2153     }
2154     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2155    
2156     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2157     {
2158     int fi;
2159     switch(factor) {
2160     case 1: fi=0; break;
2161     case 2: fi=1; break;
2162     case 4: fi=2; break;
2163     case 8: fi=3; break;
2164     default:
2165     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2166     abort();
2167     }
2168     emit_byte(0x8b);
2169     emit_byte(0x04+8*d);
2170     emit_byte(0x05+8*index+64*fi);
2171     emit_long(base);
2172     }
2173     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2174    
2175     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2176     {
2177     int fi;
2178     switch(factor) {
2179     case 1: fi=0; break;
2180     case 2: fi=1; break;
2181     case 4: fi=2; break;
2182     case 8: fi=3; break;
2183     default:
2184     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2185     abort();
2186     }
2187     if (have_cmov) {
2188     emit_byte(0x0f);
2189     emit_byte(0x40+cond);
2190     emit_byte(0x04+8*d);
2191     emit_byte(0x05+8*index+64*fi);
2192     emit_long(base);
2193     }
2194     else { /* replacement using branch and mov */
2195     int uncc=(cond^1);
2196     emit_byte(0x70+uncc);
2197     emit_byte(7); /* skip next 7 bytes if not cc=true */
2198     emit_byte(0x8b);
2199     emit_byte(0x04+8*d);
2200     emit_byte(0x05+8*index+64*fi);
2201     emit_long(base);
2202     }
2203     }
2204     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2205    
2206     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2207     {
2208     if (have_cmov) {
2209     emit_byte(0x0f);
2210     emit_byte(0x40+cond);
2211     emit_byte(0x05+8*d);
2212     emit_long(mem);
2213     }
2214     else { /* replacement using branch and mov */
2215     int uncc=(cond^1);
2216     emit_byte(0x70+uncc);
2217     emit_byte(6); /* skip next 6 bytes if not cc=true */
2218     emit_byte(0x8b);
2219     emit_byte(0x05+8*d);
2220     emit_long(mem);
2221     }
2222     }
2223     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2224    
2225     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2226     {
2227 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2228 gbeauche 1.1 emit_byte(0x8b);
2229     emit_byte(0x40+8*d+s);
2230     emit_byte(offset);
2231     }
2232     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2233    
2234     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2235     {
2236 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2237 gbeauche 1.1 emit_byte(0x66);
2238     emit_byte(0x8b);
2239     emit_byte(0x40+8*d+s);
2240     emit_byte(offset);
2241     }
2242     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2243    
2244     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2245     {
2246 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2247 gbeauche 1.1 emit_byte(0x8a);
2248     emit_byte(0x40+8*d+s);
2249     emit_byte(offset);
2250     }
2251     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2252    
2253     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2254     {
2255     emit_byte(0x8b);
2256     emit_byte(0x80+8*d+s);
2257     emit_long(offset);
2258     }
2259     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2260    
2261     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2262     {
2263     emit_byte(0x66);
2264     emit_byte(0x8b);
2265     emit_byte(0x80+8*d+s);
2266     emit_long(offset);
2267     }
2268     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2269    
2270     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2271     {
2272     emit_byte(0x8a);
2273     emit_byte(0x80+8*d+s);
2274     emit_long(offset);
2275     }
2276     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2277    
2278     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2279     {
2280 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2281 gbeauche 1.1 emit_byte(0xc7);
2282     emit_byte(0x40+d);
2283     emit_byte(offset);
2284     emit_long(i);
2285     }
2286     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2287    
2288     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2289     {
2290 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2291 gbeauche 1.1 emit_byte(0x66);
2292     emit_byte(0xc7);
2293     emit_byte(0x40+d);
2294     emit_byte(offset);
2295     emit_word(i);
2296     }
2297     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2298    
2299     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2300     {
2301 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2302 gbeauche 1.1 emit_byte(0xc6);
2303     emit_byte(0x40+d);
2304     emit_byte(offset);
2305     emit_byte(i);
2306     }
2307     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2308    
2309     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2310     {
2311 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2312 gbeauche 1.1 emit_byte(0x89);
2313     emit_byte(0x40+8*s+d);
2314     emit_byte(offset);
2315     }
2316     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2317    
2318     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2319     {
2320 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2321 gbeauche 1.1 emit_byte(0x66);
2322     emit_byte(0x89);
2323     emit_byte(0x40+8*s+d);
2324     emit_byte(offset);
2325     }
2326     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2327    
2328     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2329     {
2330 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2331 gbeauche 1.1 emit_byte(0x88);
2332     emit_byte(0x40+8*s+d);
2333     emit_byte(offset);
2334     }
2335     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2336    
2337     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2338     {
2339     if (optimize_imm8 && isbyte(offset)) {
2340     emit_byte(0x8d);
2341     emit_byte(0x40+8*d+s);
2342     emit_byte(offset);
2343     }
2344     else {
2345     emit_byte(0x8d);
2346     emit_byte(0x80+8*d+s);
2347     emit_long(offset);
2348     }
2349     }
2350     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2351    
2352     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2353     {
2354     int fi;
2355    
2356     switch(factor) {
2357     case 1: fi=0; break;
2358     case 2: fi=1; break;
2359     case 4: fi=2; break;
2360     case 8: fi=3; break;
2361     default: abort();
2362     }
2363    
2364     if (optimize_imm8 && isbyte(offset)) {
2365     emit_byte(0x8d);
2366     emit_byte(0x44+8*d);
2367     emit_byte(0x40*fi+8*index+s);
2368     emit_byte(offset);
2369     }
2370     else {
2371     emit_byte(0x8d);
2372     emit_byte(0x84+8*d);
2373     emit_byte(0x40*fi+8*index+s);
2374     emit_long(offset);
2375     }
2376     }
2377     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2378    
2379     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2380     {
2381     int isebp=(s==5)?0x40:0;
2382     int fi;
2383    
2384     switch(factor) {
2385     case 1: fi=0; break;
2386     case 2: fi=1; break;
2387     case 4: fi=2; break;
2388     case 8: fi=3; break;
2389     default: abort();
2390     }
2391    
2392     emit_byte(0x8d);
2393     emit_byte(0x04+8*d+isebp);
2394     emit_byte(0x40*fi+8*index+s);
2395     if (isebp)
2396     emit_byte(0);
2397     }
2398     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2399    
2400     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2401     {
2402     if (optimize_imm8 && isbyte(offset)) {
2403     emit_byte(0x89);
2404     emit_byte(0x40+8*s+d);
2405     emit_byte(offset);
2406     }
2407     else {
2408     emit_byte(0x89);
2409     emit_byte(0x80+8*s+d);
2410     emit_long(offset);
2411     }
2412     }
2413     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2414    
2415     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2416     {
2417     emit_byte(0x66);
2418     emit_byte(0x89);
2419     emit_byte(0x80+8*s+d);
2420     emit_long(offset);
2421     }
2422     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2423    
2424     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2425     {
2426     if (optimize_imm8 && isbyte(offset)) {
2427     emit_byte(0x88);
2428     emit_byte(0x40+8*s+d);
2429     emit_byte(offset);
2430     }
2431     else {
2432     emit_byte(0x88);
2433     emit_byte(0x80+8*s+d);
2434     emit_long(offset);
2435     }
2436     }
2437     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2438    
2439     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2440     {
2441     emit_byte(0x0f);
2442     emit_byte(0xc8+r);
2443     }
2444     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2445    
2446     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2447     {
2448     emit_byte(0x66);
2449     emit_byte(0xc1);
2450     emit_byte(0xc0+r);
2451     emit_byte(0x08);
2452     }
2453     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2454    
2455     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2456     {
2457     emit_byte(0x89);
2458     emit_byte(0xc0+8*s+d);
2459     }
2460     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2461    
2462     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2463     {
2464     emit_byte(0x89);
2465     emit_byte(0x05+8*s);
2466     emit_long(d);
2467     }
2468     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2469    
2470     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2471     {
2472     emit_byte(0x66);
2473     emit_byte(0x89);
2474     emit_byte(0x05+8*s);
2475     emit_long(d);
2476     }
2477     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2478    
2479     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2480     {
2481     emit_byte(0x66);
2482     emit_byte(0x8b);
2483     emit_byte(0x05+8*d);
2484     emit_long(s);
2485     }
2486     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2487    
2488     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2489     {
2490     emit_byte(0x88);
2491 gbeauche 1.33 emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
2492 gbeauche 1.1 emit_long(d);
2493     }
2494     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2495    
2496     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2497     {
2498     emit_byte(0x8a);
2499     emit_byte(0x05+8*d);
2500     emit_long(s);
2501     }
2502     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2503    
2504     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2505     {
2506     emit_byte(0xb8+d);
2507     emit_long(s);
2508     }
2509     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2510    
2511     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2512     {
2513     emit_byte(0x66);
2514     emit_byte(0xb8+d);
2515     emit_word(s);
2516     }
2517     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2518    
2519     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2520     {
2521     emit_byte(0xb0+d);
2522     emit_byte(s);
2523     }
2524     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2525    
2526     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2527     {
2528     emit_byte(0x81);
2529     emit_byte(0x15);
2530     emit_long(d);
2531     emit_long(s);
2532     }
2533     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2534    
2535     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2536     {
2537     if (optimize_imm8 && isbyte(s)) {
2538     emit_byte(0x83);
2539     emit_byte(0x05);
2540     emit_long(d);
2541     emit_byte(s);
2542     }
2543     else {
2544     emit_byte(0x81);
2545     emit_byte(0x05);
2546     emit_long(d);
2547     emit_long(s);
2548     }
2549     }
2550     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2551    
2552     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2553     {
2554     emit_byte(0x66);
2555     emit_byte(0x81);
2556     emit_byte(0x05);
2557     emit_long(d);
2558     emit_word(s);
2559     }
2560     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2561    
2562     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2563     {
2564     emit_byte(0x80);
2565     emit_byte(0x05);
2566     emit_long(d);
2567     emit_byte(s);
2568     }
2569     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2570    
2571     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2572     {
2573 gbeauche 1.2 if (optimize_accum && isaccum(d))
2574     emit_byte(0xa9);
2575     else {
2576 gbeauche 1.1 emit_byte(0xf7);
2577     emit_byte(0xc0+d);
2578 gbeauche 1.2 }
2579 gbeauche 1.1 emit_long(i);
2580     }
2581     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2582    
2583     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2584     {
2585     emit_byte(0x85);
2586     emit_byte(0xc0+8*s+d);
2587     }
2588     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2589    
2590     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2591     {
2592     emit_byte(0x66);
2593     emit_byte(0x85);
2594     emit_byte(0xc0+8*s+d);
2595     }
2596     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2597    
2598     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2599     {
2600     emit_byte(0x84);
2601     emit_byte(0xc0+8*s+d);
2602     }
2603     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2604    
2605 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2606     {
2607     emit_byte(0x81);
2608     emit_byte(0xf0+d);
2609     emit_long(i);
2610     }
2611     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2612    
2613 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2614     {
2615     if (optimize_imm8 && isbyte(i)) {
2616 gbeauche 1.2 emit_byte(0x83);
2617     emit_byte(0xe0+d);
2618     emit_byte(i);
2619 gbeauche 1.1 }
2620     else {
2621 gbeauche 1.2 if (optimize_accum && isaccum(d))
2622     emit_byte(0x25);
2623     else {
2624     emit_byte(0x81);
2625     emit_byte(0xe0+d);
2626     }
2627     emit_long(i);
2628 gbeauche 1.1 }
2629     }
2630     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2631    
2632     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2633     {
2634 gbeauche 1.2 emit_byte(0x66);
2635     if (optimize_imm8 && isbyte(i)) {
2636     emit_byte(0x83);
2637     emit_byte(0xe0+d);
2638     emit_byte(i);
2639     }
2640     else {
2641     if (optimize_accum && isaccum(d))
2642     emit_byte(0x25);
2643     else {
2644     emit_byte(0x81);
2645     emit_byte(0xe0+d);
2646     }
2647     emit_word(i);
2648     }
2649 gbeauche 1.1 }
2650     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2651    
2652     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2653     {
2654     emit_byte(0x21);
2655     emit_byte(0xc0+8*s+d);
2656     }
2657     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2658    
2659     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2660     {
2661     emit_byte(0x66);
2662     emit_byte(0x21);
2663     emit_byte(0xc0+8*s+d);
2664     }
2665     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2666    
2667     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2668     {
2669     emit_byte(0x20);
2670     emit_byte(0xc0+8*s+d);
2671     }
2672     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2673    
2674     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2675     {
2676     if (optimize_imm8 && isbyte(i)) {
2677     emit_byte(0x83);
2678     emit_byte(0xc8+d);
2679     emit_byte(i);
2680     }
2681     else {
2682 gbeauche 1.2 if (optimize_accum && isaccum(d))
2683     emit_byte(0x0d);
2684     else {
2685 gbeauche 1.1 emit_byte(0x81);
2686     emit_byte(0xc8+d);
2687 gbeauche 1.2 }
2688 gbeauche 1.1 emit_long(i);
2689     }
2690     }
2691     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2692    
2693     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2694     {
2695     emit_byte(0x09);
2696     emit_byte(0xc0+8*s+d);
2697     }
2698     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2699    
2700     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2701     {
2702     emit_byte(0x66);
2703     emit_byte(0x09);
2704     emit_byte(0xc0+8*s+d);
2705     }
2706     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2707    
2708     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2709     {
2710     emit_byte(0x08);
2711     emit_byte(0xc0+8*s+d);
2712     }
2713     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2714    
2715     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2716     {
2717     emit_byte(0x11);
2718     emit_byte(0xc0+8*s+d);
2719     }
2720     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2721    
2722     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2723     {
2724     emit_byte(0x66);
2725     emit_byte(0x11);
2726     emit_byte(0xc0+8*s+d);
2727     }
2728     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2729    
2730     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2731     {
2732     emit_byte(0x10);
2733     emit_byte(0xc0+8*s+d);
2734     }
2735     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2736    
2737     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2738     {
2739     emit_byte(0x01);
2740     emit_byte(0xc0+8*s+d);
2741     }
2742     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2743    
2744     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2745     {
2746     emit_byte(0x66);
2747     emit_byte(0x01);
2748     emit_byte(0xc0+8*s+d);
2749     }
2750     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2751    
2752     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2753     {
2754     emit_byte(0x00);
2755     emit_byte(0xc0+8*s+d);
2756     }
2757     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2758    
2759     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2760     {
2761     if (isbyte(i)) {
2762     emit_byte(0x83);
2763     emit_byte(0xe8+d);
2764     emit_byte(i);
2765     }
2766     else {
2767 gbeauche 1.2 if (optimize_accum && isaccum(d))
2768     emit_byte(0x2d);
2769     else {
2770 gbeauche 1.1 emit_byte(0x81);
2771     emit_byte(0xe8+d);
2772 gbeauche 1.2 }
2773 gbeauche 1.1 emit_long(i);
2774     }
2775     }
2776     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2777    
2778     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2779     {
2780 gbeauche 1.2 if (optimize_accum && isaccum(d))
2781     emit_byte(0x2c);
2782     else {
2783 gbeauche 1.1 emit_byte(0x80);
2784     emit_byte(0xe8+d);
2785 gbeauche 1.2 }
2786 gbeauche 1.1 emit_byte(i);
2787     }
2788     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2789    
2790     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2791     {
2792     if (isbyte(i)) {
2793     emit_byte(0x83);
2794     emit_byte(0xc0+d);
2795     emit_byte(i);
2796     }
2797     else {
2798 gbeauche 1.2 if (optimize_accum && isaccum(d))
2799     emit_byte(0x05);
2800     else {
2801 gbeauche 1.1 emit_byte(0x81);
2802     emit_byte(0xc0+d);
2803 gbeauche 1.2 }
2804 gbeauche 1.1 emit_long(i);
2805     }
2806     }
2807     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2808    
2809     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2810     {
2811 gbeauche 1.2 emit_byte(0x66);
2812 gbeauche 1.1 if (isbyte(i)) {
2813     emit_byte(0x83);
2814     emit_byte(0xc0+d);
2815     emit_byte(i);
2816     }
2817     else {
2818 gbeauche 1.2 if (optimize_accum && isaccum(d))
2819     emit_byte(0x05);
2820     else {
2821 gbeauche 1.1 emit_byte(0x81);
2822     emit_byte(0xc0+d);
2823 gbeauche 1.2 }
2824 gbeauche 1.1 emit_word(i);
2825     }
2826     }
2827     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2828    
2829     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2830     {
2831 gbeauche 1.2 if (optimize_accum && isaccum(d))
2832     emit_byte(0x04);
2833     else {
2834     emit_byte(0x80);
2835     emit_byte(0xc0+d);
2836     }
2837 gbeauche 1.1 emit_byte(i);
2838     }
2839     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2840    
2841     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2842     {
2843     emit_byte(0x19);
2844     emit_byte(0xc0+8*s+d);
2845     }
2846     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2847    
2848     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2849     {
2850     emit_byte(0x66);
2851     emit_byte(0x19);
2852     emit_byte(0xc0+8*s+d);
2853     }
2854     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2855    
2856     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2857     {
2858     emit_byte(0x18);
2859     emit_byte(0xc0+8*s+d);
2860     }
2861     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2862    
2863     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2864     {
2865     emit_byte(0x29);
2866     emit_byte(0xc0+8*s+d);
2867     }
2868     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2869    
2870     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2871     {
2872     emit_byte(0x66);
2873     emit_byte(0x29);
2874     emit_byte(0xc0+8*s+d);
2875     }
2876     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2877    
2878     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2879     {
2880     emit_byte(0x28);
2881     emit_byte(0xc0+8*s+d);
2882     }
2883     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2884    
2885     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2886     {
2887     emit_byte(0x39);
2888     emit_byte(0xc0+8*s+d);
2889     }
2890     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2891    
2892     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2893     {
2894     if (optimize_imm8 && isbyte(i)) {
2895     emit_byte(0x83);
2896     emit_byte(0xf8+r);
2897     emit_byte(i);
2898     }
2899     else {
2900 gbeauche 1.2 if (optimize_accum && isaccum(r))
2901     emit_byte(0x3d);
2902     else {
2903 gbeauche 1.1 emit_byte(0x81);
2904     emit_byte(0xf8+r);
2905 gbeauche 1.2 }
2906 gbeauche 1.1 emit_long(i);
2907     }
2908     }
2909     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2910    
2911     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2912     {
2913     emit_byte(0x66);
2914     emit_byte(0x39);
2915     emit_byte(0xc0+8*s+d);
2916     }
2917     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2918    
2919 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2920     {
2921     emit_byte(0x80);
2922     emit_byte(0x3d);
2923     emit_long(d);
2924     emit_byte(s);
2925     }
2926     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2927    
2928 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2929     {
2930 gbeauche 1.2 if (optimize_accum && isaccum(d))
2931     emit_byte(0x3c);
2932     else {
2933 gbeauche 1.1 emit_byte(0x80);
2934     emit_byte(0xf8+d);
2935 gbeauche 1.2 }
2936 gbeauche 1.1 emit_byte(i);
2937     }
2938     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2939    
2940     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2941     {
2942     emit_byte(0x38);
2943     emit_byte(0xc0+8*s+d);
2944     }
2945     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2946    
2947     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2948     {
2949     int fi;
2950    
2951     switch(factor) {
2952     case 1: fi=0; break;
2953     case 2: fi=1; break;
2954     case 4: fi=2; break;
2955     case 8: fi=3; break;
2956     default: abort();
2957     }
2958     emit_byte(0x39);
2959     emit_byte(0x04+8*d);
2960     emit_byte(5+8*index+0x40*fi);
2961     emit_long(offset);
2962     }
2963     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2964    
2965     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2966     {
2967     emit_byte(0x31);
2968     emit_byte(0xc0+8*s+d);
2969     }
2970     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2971    
2972     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2973     {
2974     emit_byte(0x66);
2975     emit_byte(0x31);
2976     emit_byte(0xc0+8*s+d);
2977     }
2978     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2979    
2980     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2981     {
2982     emit_byte(0x30);
2983     emit_byte(0xc0+8*s+d);
2984     }
2985     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2986    
2987     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2988     {
2989     if (optimize_imm8 && isbyte(s)) {
2990     emit_byte(0x83);
2991     emit_byte(0x2d);
2992     emit_long(d);
2993     emit_byte(s);
2994     }
2995     else {
2996     emit_byte(0x81);
2997     emit_byte(0x2d);
2998     emit_long(d);
2999     emit_long(s);
3000     }
3001     }
3002     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
3003    
3004     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
3005     {
3006     if (optimize_imm8 && isbyte(s)) {
3007     emit_byte(0x83);
3008     emit_byte(0x3d);
3009     emit_long(d);
3010     emit_byte(s);
3011     }
3012     else {
3013     emit_byte(0x81);
3014     emit_byte(0x3d);
3015     emit_long(d);
3016     emit_long(s);
3017     }
3018     }
3019     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
3020    
3021     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
3022     {
3023     emit_byte(0x87);
3024     emit_byte(0xc0+8*r1+r2);
3025     }
3026     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
3027    
3028 gbeauche 1.36 LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
3029     {
3030     emit_byte(0x86);
3031     emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */
3032     }
3033     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
3034    
3035 gbeauche 1.1 /*************************************************************************
3036     * FIXME: mem access modes probably wrong *
3037     *************************************************************************/
3038    
3039     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
3040     {
3041     emit_byte(0x9c);
3042     }
3043     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
3044    
3045     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
3046     {
3047     emit_byte(0x9d);
3048     }
3049     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
3050 gbeauche 1.13
3051 gbeauche 1.34 /* Generate floating-point instructions */
3052     static inline void x86_fadd_m(MEMR s)
3053     {
3054     emit_byte(0xdc);
3055     emit_byte(0x05);
3056     emit_long(s);
3057     }
3058    
3059 gbeauche 1.13 #endif
3060 gbeauche 1.1
3061     /*************************************************************************
3062     * Unoptimizable stuff --- jump *
3063     *************************************************************************/
3064    
3065     static __inline__ void raw_call_r(R4 r)
3066     {
3067 gbeauche 1.20 #if USE_NEW_RTASM
3068     CALLsr(r);
3069     #else
3070 gbeauche 1.1 emit_byte(0xff);
3071     emit_byte(0xd0+r);
3072 gbeauche 1.20 #endif
3073 gbeauche 1.5 }
3074    
3075     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3076     {
3077 gbeauche 1.20 #if USE_NEW_RTASM
3078     CALLsm(base, X86_NOREG, r, m);
3079     #else
3080 gbeauche 1.5 int mu;
3081     switch(m) {
3082     case 1: mu=0; break;
3083     case 2: mu=1; break;
3084     case 4: mu=2; break;
3085     case 8: mu=3; break;
3086     default: abort();
3087     }
3088     emit_byte(0xff);
3089     emit_byte(0x14);
3090     emit_byte(0x05+8*r+0x40*mu);
3091     emit_long(base);
3092 gbeauche 1.20 #endif
3093 gbeauche 1.1 }
3094    
3095     static __inline__ void raw_jmp_r(R4 r)
3096     {
3097 gbeauche 1.20 #if USE_NEW_RTASM
3098     JMPsr(r);
3099     #else
3100 gbeauche 1.1 emit_byte(0xff);
3101     emit_byte(0xe0+r);
3102 gbeauche 1.20 #endif
3103 gbeauche 1.1 }
3104    
3105     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3106     {
3107 gbeauche 1.20 #if USE_NEW_RTASM
3108     JMPsm(base, X86_NOREG, r, m);
3109     #else
3110 gbeauche 1.1 int mu;
3111     switch(m) {
3112     case 1: mu=0; break;
3113     case 2: mu=1; break;
3114     case 4: mu=2; break;
3115     case 8: mu=3; break;
3116     default: abort();
3117     }
3118     emit_byte(0xff);
3119     emit_byte(0x24);
3120     emit_byte(0x05+8*r+0x40*mu);
3121     emit_long(base);
3122 gbeauche 1.20 #endif
3123 gbeauche 1.1 }
3124    
3125     static __inline__ void raw_jmp_m(uae_u32 base)
3126     {
3127     emit_byte(0xff);
3128     emit_byte(0x25);
3129     emit_long(base);
3130     }
3131    
3132    
3133     static __inline__ void raw_call(uae_u32 t)
3134     {
3135 gbeauche 1.20 #if USE_NEW_RTASM
3136     CALLm(t);
3137     #else
3138 gbeauche 1.1 emit_byte(0xe8);
3139     emit_long(t-(uae_u32)target-4);
3140 gbeauche 1.20 #endif
3141 gbeauche 1.1 }
3142    
3143     static __inline__ void raw_jmp(uae_u32 t)
3144     {
3145 gbeauche 1.20 #if USE_NEW_RTASM
3146     JMPm(t);
3147     #else
3148 gbeauche 1.1 emit_byte(0xe9);
3149     emit_long(t-(uae_u32)target-4);
3150 gbeauche 1.20 #endif
3151 gbeauche 1.1 }
3152    
3153     static __inline__ void raw_jl(uae_u32 t)
3154     {
3155     emit_byte(0x0f);
3156     emit_byte(0x8c);
3157 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3158 gbeauche 1.1 }
3159    
3160     static __inline__ void raw_jz(uae_u32 t)
3161     {
3162     emit_byte(0x0f);
3163     emit_byte(0x84);
3164 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3165 gbeauche 1.1 }
3166    
3167     static __inline__ void raw_jnz(uae_u32 t)
3168     {
3169     emit_byte(0x0f);
3170     emit_byte(0x85);
3171 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3172 gbeauche 1.1 }
3173    
3174     static __inline__ void raw_jnz_l_oponly(void)
3175     {
3176     emit_byte(0x0f);
3177     emit_byte(0x85);
3178     }
3179    
3180     static __inline__ void raw_jcc_l_oponly(int cc)
3181     {
3182     emit_byte(0x0f);
3183     emit_byte(0x80+cc);
3184     }
3185    
3186     static __inline__ void raw_jnz_b_oponly(void)
3187     {
3188     emit_byte(0x75);
3189     }
3190    
3191     static __inline__ void raw_jz_b_oponly(void)
3192     {
3193     emit_byte(0x74);
3194     }
3195    
3196     static __inline__ void raw_jcc_b_oponly(int cc)
3197     {
3198     emit_byte(0x70+cc);
3199     }
3200    
3201     static __inline__ void raw_jmp_l_oponly(void)
3202     {
3203     emit_byte(0xe9);
3204     }
3205    
3206     static __inline__ void raw_jmp_b_oponly(void)
3207     {
3208     emit_byte(0xeb);
3209     }
3210    
3211     static __inline__ void raw_ret(void)
3212     {
3213     emit_byte(0xc3);
3214     }
3215    
3216     static __inline__ void raw_nop(void)
3217     {
3218     emit_byte(0x90);
3219     }
3220    
3221 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3222     {
3223     /* Source: GNU Binutils 2.12.90.0.15 */
3224     /* Various efficient no-op patterns for aligning code labels.
3225     Note: Don't try to assemble the instructions in the comments.
3226     0L and 0w are not legal. */
3227     static const uae_u8 f32_1[] =
3228     {0x90}; /* nop */
3229     static const uae_u8 f32_2[] =
3230     {0x89,0xf6}; /* movl %esi,%esi */
3231     static const uae_u8 f32_3[] =
3232     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3233     static const uae_u8 f32_4[] =
3234     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3235     static const uae_u8 f32_5[] =
3236     {0x90, /* nop */
3237     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3238     static const uae_u8 f32_6[] =
3239     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3240     static const uae_u8 f32_7[] =
3241     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3242     static const uae_u8 f32_8[] =
3243     {0x90, /* nop */
3244     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3245     static const uae_u8 f32_9[] =
3246     {0x89,0xf6, /* movl %esi,%esi */
3247     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3248     static const uae_u8 f32_10[] =
3249     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3250     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3251     static const uae_u8 f32_11[] =
3252     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3253     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3254     static const uae_u8 f32_12[] =
3255     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3256     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3257     static const uae_u8 f32_13[] =
3258     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3259     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3260     static const uae_u8 f32_14[] =
3261     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3262     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3263     static const uae_u8 f32_15[] =
3264     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3265     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3266     static const uae_u8 f32_16[] =
3267     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3268     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3269     static const uae_u8 *const f32_patt[] = {
3270     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3271     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3272     };
3273 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3274 gbeauche 1.8
3275 gbeauche 1.21 #if defined(__x86_64__)
3276     /* The recommended way to pad 64bit code is to use NOPs preceded by
3277     maximally four 0x66 prefixes. Balance the size of nops. */
3278     if (nbytes == 0)
3279     return;
3280    
3281     int i;
3282     int nnops = (nbytes + 3) / 4;
3283     int len = nbytes / nnops;
3284     int remains = nbytes - nnops * len;
3285    
3286     for (i = 0; i < remains; i++) {
3287     emit_block(prefixes, len);
3288     raw_nop();
3289     }
3290     for (; i < nnops; i++) {
3291     emit_block(prefixes, len - 1);
3292     raw_nop();
3293     }
3294     #else
3295 gbeauche 1.8 int nloops = nbytes / 16;
3296     while (nloops-- > 0)
3297     emit_block(f32_16, sizeof(f32_16));
3298    
3299     nbytes %= 16;
3300     if (nbytes)
3301     emit_block(f32_patt[nbytes - 1], nbytes);
3302 gbeauche 1.21 #endif
3303 gbeauche 1.8 }
3304    
3305 gbeauche 1.1
3306     /*************************************************************************
3307     * Flag handling, to and fro UAE flag register *
3308     *************************************************************************/
3309    
3310 gbeauche 1.36 static __inline__ void raw_flags_evicted(int r)
3311 gbeauche 1.1 {
3312     //live.state[FLAGTMP].status=CLEAN;
3313     live.state[FLAGTMP].status=INMEM;
3314     live.state[FLAGTMP].realreg=-1;
3315     /* We just "evicted" FLAGTMP. */
3316     if (live.nat[r].nholds!=1) {
3317     /* Huh? */
3318     abort();
3319     }
3320     live.nat[r].nholds=0;
3321 gbeauche 1.36 }
3322    
3323     #define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */
3324     static __inline__ void raw_flags_to_reg_FLAGREG(int r)
3325     {
3326     raw_lahf(0); /* Most flags in AH */
3327     //raw_setcc(r,0); /* V flag in AL */
3328     raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3329    
3330     #if 1 /* Let's avoid those nasty partial register stalls */
3331     //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3332     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
3333     raw_flags_evicted(r);
3334 gbeauche 1.1 #endif
3335     }
3336    
3337 gbeauche 1.36 #define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */
3338     static __inline__ void raw_reg_to_flags_FLAGREG(int r)
3339 gbeauche 1.1 {
3340     raw_cmp_b_ri(r,-127); /* set V */
3341     raw_sahf(0);
3342     }
3343    
3344 gbeauche 1.36 #define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */
3345     static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp)
3346 gbeauche 1.24 {
3347     raw_mov_l_rr(tmp,s);
3348     raw_lahf(s); /* flags into ah */
3349     raw_and_l_ri(s,0xffffbfff);
3350     raw_and_l_ri(tmp,0x00004000);
3351     raw_xor_l_ri(tmp,0x00004000);
3352     raw_or_l(s,tmp);
3353     raw_sahf(s);
3354     }
3355    
3356 gbeauche 1.36 static __inline__ void raw_flags_init_FLAGREG(void) { }
3357 gbeauche 1.1
3358 gbeauche 1.36 #define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */
3359     static __inline__ void raw_flags_to_reg_FLAGSTK(int r)
3360 gbeauche 1.1 {
3361     raw_pushfl();
3362     raw_pop_l_r(r);
3363 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3364 gbeauche 1.36 raw_flags_evicted(r);
3365 gbeauche 1.1 }
3366    
3367 gbeauche 1.36 #define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */
3368     static __inline__ void raw_reg_to_flags_FLAGSTK(int r)
3369 gbeauche 1.1 {
3370     raw_push_l_r(r);
3371     raw_popfl();
3372     }
3373    
3374 gbeauche 1.36 #define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */
3375     static __inline__ void raw_flags_set_zero_FLAGSTK(int s, int tmp)
3376 gbeauche 1.24 {
3377     raw_mov_l_rr(tmp,s);
3378     raw_pushfl();
3379     raw_pop_l_r(s);
3380     raw_and_l_ri(s,0xffffffbf);
3381     raw_and_l_ri(tmp,0x00000040);
3382     raw_xor_l_ri(tmp,0x00000040);
3383     raw_or_l(s,tmp);
3384     raw_push_l_r(s);
3385     raw_popfl();
3386     }
3387 gbeauche 1.36
3388     static __inline__ void raw_flags_init_FLAGSTK(void) { }
3389    
3390     #if defined(__x86_64__)
3391     /* Try to use the LAHF/SETO method on x86_64 since it is faster.
3392     This can't be the default because some older CPUs don't support
3393     LAHF/SAHF in long mode. */
3394     static int FLAG_NREG1_FLAGGEN = 0;
3395     static __inline__ void raw_flags_to_reg_FLAGGEN(int r)
3396     {
3397     if (have_lahf_lm) {
3398     // NOTE: the interpreter uses the normal EFLAGS layout
3399     // pushf/popf CF(0) ZF( 6) SF( 7) OF(11)
3400     // sahf/lahf CF(8) ZF(14) SF(15) OF( 0)
3401     assert(r == 0);
3402     raw_setcc(r,0); /* V flag in AL */
3403     raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */
3404     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0);
3405     raw_lahf(0); /* most flags in AH */
3406     raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX);
3407     raw_flags_evicted(r);
3408     }
3409     else
3410     raw_flags_to_reg_FLAGSTK(r);
3411     }
3412    
3413     static int FLAG_NREG2_FLAGGEN = 0;
3414     static __inline__ void raw_reg_to_flags_FLAGGEN(int r)
3415     {
3416     if (have_lahf_lm) {
3417     raw_xchg_b_rr(0,AH_INDEX);
3418     raw_cmp_b_ri(r,-120); /* set V */
3419     raw_sahf(0);
3420     }
3421     else
3422     raw_reg_to_flags_FLAGSTK(r);
3423     }
3424    
3425     static int FLAG_NREG3_FLAGGEN = 0;
3426     static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp)
3427     {
3428     if (have_lahf_lm)
3429     raw_flags_set_zero_FLAGREG(s, tmp);
3430     else
3431     raw_flags_set_zero_FLAGSTK(s, tmp);
3432     }
3433    
3434     static __inline__ void raw_flags_init_FLAGGEN(void)
3435     {
3436     if (have_lahf_lm) {
3437     FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG;
3438     FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG;
3439     FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG;
3440     }
3441     else {
3442     FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK;
3443     FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK;
3444     FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK;
3445     }
3446     }
3447 gbeauche 1.1 #endif
3448    
3449 gbeauche 1.36 #ifdef SAHF_SETO_PROFITABLE
3450     #define FLAG_SUFFIX FLAGREG
3451     #elif defined __x86_64__
3452     #define FLAG_SUFFIX FLAGGEN
3453     #else
3454     #define FLAG_SUFFIX FLAGSTK
3455     #endif
3456    
3457     #define FLAG_GLUE_2(x, y) x ## _ ## y
3458     #define FLAG_GLUE_1(x, y) FLAG_GLUE_2(x, y)
3459     #define FLAG_GLUE(x) FLAG_GLUE_1(x, FLAG_SUFFIX)
3460    
3461     #define raw_flags_init FLAG_GLUE(raw_flags_init)
3462     #define FLAG_NREG1 FLAG_GLUE(FLAG_NREG1)
3463     #define raw_flags_to_reg FLAG_GLUE(raw_flags_to_reg)
3464     #define FLAG_NREG2 FLAG_GLUE(FLAG_NREG2)
3465     #define raw_reg_to_flags FLAG_GLUE(raw_reg_to_flags)
3466     #define FLAG_NREG3 FLAG_GLUE(FLAG_NREG3)
3467     #define raw_flags_set_zero FLAG_GLUE(raw_flags_set_zero)
3468    
3469 gbeauche 1.1 /* Apparently, there are enough instructions between flag store and
3470     flag reload to avoid the partial memory stall */
3471     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3472     {
3473     #if 1
3474 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3475 gbeauche 1.1 #else
3476 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3477     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3478 gbeauche 1.1 #endif
3479     }
3480    
3481     /* FLAGX is byte sized, and we *do* write it at that size */
3482     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3483     {
3484     if (live.nat[target].canbyte)
3485 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3486 gbeauche 1.1 else if (live.nat[target].canword)
3487 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3488 gbeauche 1.1 else
3489 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3490 gbeauche 1.1 }
3491    
3492 gbeauche 1.31 static __inline__ void raw_dec_sp(int off)
3493     {
3494     if (off) raw_sub_l_ri(ESP_INDEX,off);
3495     }
3496    
3497 gbeauche 1.1 static __inline__ void raw_inc_sp(int off)
3498     {
3499 gbeauche 1.31 if (off) raw_add_l_ri(ESP_INDEX,off);
3500 gbeauche 1.1 }
3501    
3502     /*************************************************************************
3503     * Handling mistaken direct memory access *
3504     *************************************************************************/
3505    
3506     // gb-- I don't need that part for JIT Basilisk II
3507     #if defined(NATMEM_OFFSET) && 0
3508     #include <asm/sigcontext.h>
3509     #include <signal.h>
3510    
3511     #define SIG_READ 1
3512     #define SIG_WRITE 2
3513    
3514     static int in_handler=0;
3515     static uae_u8 veccode[256];
3516    
3517     static void vec(int x, struct sigcontext sc)
3518     {
3519     uae_u8* i=(uae_u8*)sc.eip;
3520     uae_u32 addr=sc.cr2;
3521     int r=-1;
3522     int size=4;
3523     int dir=-1;
3524     int len=0;
3525     int j;
3526    
3527     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3528     if (!canbang)
3529     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3530     if (in_handler)
3531     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3532    
3533     if (canbang && i>=compiled_code && i<=current_compile_p) {
3534     if (*i==0x66) {
3535     i++;
3536     size=2;
3537     len++;
3538     }
3539    
3540     switch(i[0]) {
3541     case 0x8a:
3542     if ((i[1]&0xc0)==0x80) {
3543     r=(i[1]>>3)&7;
3544     dir=SIG_READ;
3545     size=1;
3546     len+=6;
3547     break;
3548     }
3549     break;
3550     case 0x88:
3551     if ((i[1]&0xc0)==0x80) {
3552     r=(i[1]>>3)&7;
3553     dir=SIG_WRITE;
3554     size=1;
3555     len+=6;
3556     break;
3557     }
3558     break;
3559     case 0x8b:
3560     if ((i[1]&0xc0)==0x80) {
3561     r=(i[1]>>3)&7;
3562     dir=SIG_READ;
3563     len+=6;
3564     break;
3565     }
3566     if ((i[1]&0xc0)==0x40) {
3567     r=(i[1]>>3)&7;
3568     dir=SIG_READ;
3569     len+=3;
3570     break;
3571     }
3572     break;
3573     case 0x89:
3574     if ((i[1]&0xc0)==0x80) {
3575     r=(i[1]>>3)&7;
3576     dir=SIG_WRITE;
3577     len+=6;
3578     break;
3579     }
3580     if ((i[1]&0xc0)==0x40) {
3581     r=(i[1]>>3)&7;
3582     dir=SIG_WRITE;
3583     len+=3;
3584     break;
3585     }
3586     break;
3587     }
3588     }
3589    
3590     if (r!=-1) {
3591     void* pr=NULL;
3592     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3593    
3594     switch(r) {
3595     case 0: pr=&(sc.eax); break;
3596     case 1: pr=&(sc.ecx); break;
3597     case 2: pr=&(sc.edx); break;
3598     case 3: pr=&(sc.ebx); break;
3599     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3600     case 5: pr=(size>1)?
3601     (void*)(&(sc.ebp)):
3602     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3603     case 6: pr=(size>1)?
3604     (void*)(&(sc.esi)):
3605     (void*)(((uae_u8*)&(sc.edx))+1); break;
3606     case 7: pr=(size>1)?
3607     (void*)(&(sc.edi)):
3608     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3609     default: abort();
3610     }
3611     if (pr) {
3612     blockinfo* bi;
3613    
3614     if (currprefs.comp_oldsegv) {
3615     addr-=NATMEM_OFFSET;
3616    
3617     if ((addr>=0x10000000 && addr<0x40000000) ||
3618     (addr>=0x50000000)) {
3619     write_log("Suspicious address in %x SEGV handler.\n",addr);
3620     }
3621     if (dir==SIG_READ) {
3622     switch(size) {
3623     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3624     case 2: *((uae_u16*)pr)=get_word(addr); break;
3625     case 4: *((uae_u32*)pr)=get_long(addr); break;
3626     default: abort();
3627     }
3628     }
3629     else { /* write */
3630     switch(size) {
3631     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3632     case 2: put_word(addr,*((uae_u16*)pr)); break;
3633     case 4: put_long(addr,*((uae_u32*)pr)); break;
3634     default: abort();
3635     }
3636     }
3637     write_log("Handled one access!\n");
3638     fflush(stdout);
3639     segvcount++;
3640     sc.eip+=len;
3641     }
3642     else {
3643     void* tmp=target;
3644     int i;
3645     uae_u8 vecbuf[5];
3646    
3647     addr-=NATMEM_OFFSET;
3648    
3649     if ((addr>=0x10000000 && addr<0x40000000) ||
3650     (addr>=0x50000000)) {
3651     write_log("Suspicious address in %x SEGV handler.\n",addr);
3652     }
3653    
3654     target=(uae_u8*)sc.eip;
3655     for (i=0;i<5;i++)
3656     vecbuf[i]=target[i];
3657     emit_byte(0xe9);
3658 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3659 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3660    
3661     write_log("Handled one access!\n");
3662     fflush(stdout);
3663     segvcount++;
3664    
3665     target=veccode;
3666    
3667     if (dir==SIG_READ) {
3668     switch(size) {
3669     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3670     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3671     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3672     default: abort();
3673     }
3674     }
3675     else { /* write */
3676     switch(size) {
3677     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3678     case 2: put_word(addr,*((uae_u16*)pr)); break;
3679     case 4: put_long(addr,*((uae_u32*)pr)); break;
3680     default: abort();
3681     }
3682     }
3683     for (i=0;i<5;i++)
3684     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3685 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3686 gbeauche 1.1 emit_byte(0xe9);
3687 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3688 gbeauche 1.1 in_handler=1;
3689     target=tmp;
3690     }
3691     bi=active;
3692     while (bi) {
3693     if (bi->handler &&
3694     (uae_u8*)bi->direct_handler<=i &&
3695     (uae_u8*)bi->nexthandler>i) {
3696     write_log("deleted trigger (%p<%p<%p) %p\n",
3697     bi->handler,
3698     i,
3699     bi->nexthandler,
3700     bi->pc_p);
3701     invalidate_block(bi);
3702     raise_in_cl_list(bi);
3703     set_special(0);
3704     return;
3705     }
3706     bi=bi->next;
3707     }
3708     /* Not found in the active list. Might be a rom routine that
3709     is in the dormant list */
3710     bi=dormant;
3711     while (bi) {
3712     if (bi->handler &&
3713     (uae_u8*)bi->direct_handler<=i &&
3714     (uae_u8*)bi->nexthandler>i) {
3715     write_log("deleted trigger (%p<%p<%p) %p\n",
3716     bi->handler,
3717     i,
3718     bi->nexthandler,
3719     bi->pc_p);
3720     invalidate_block(bi);
3721     raise_in_cl_list(bi);
3722     set_special(0);
3723     return;
3724     }
3725     bi=bi->next;
3726     }
3727     write_log("Huh? Could not find trigger!\n");
3728     return;
3729     }
3730     }
3731     write_log("Can't handle access!\n");
3732     for (j=0;j<10;j++) {
3733     write_log("instruction byte %2d is %02x\n",j,i[j]);
3734     }
3735     write_log("Please send the above info (starting at \"fault address\") to\n"
3736     "bmeyer@csse.monash.edu.au\n"
3737     "This shouldn't happen ;-)\n");
3738     fflush(stdout);
3739     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3740     }
3741     #endif
3742    
3743    
3744     /*************************************************************************
3745     * Checking for CPU features *
3746     *************************************************************************/
3747    
3748 gbeauche 1.3 struct cpuinfo_x86 {
3749     uae_u8 x86; // CPU family
3750     uae_u8 x86_vendor; // CPU vendor
3751     uae_u8 x86_processor; // CPU canonical processor type
3752     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3753     uae_u32 x86_hwcap;
3754     uae_u8 x86_model;
3755     uae_u8 x86_mask;
3756     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3757     char x86_vendor_id[16];
3758     };
3759     struct cpuinfo_x86 cpuinfo;
3760    
3761     enum {
3762     X86_VENDOR_INTEL = 0,
3763     X86_VENDOR_CYRIX = 1,
3764     X86_VENDOR_AMD = 2,
3765     X86_VENDOR_UMC = 3,
3766     X86_VENDOR_NEXGEN = 4,
3767     X86_VENDOR_CENTAUR = 5,
3768     X86_VENDOR_RISE = 6,
3769     X86_VENDOR_TRANSMETA = 7,
3770     X86_VENDOR_NSC = 8,
3771     X86_VENDOR_UNKNOWN = 0xff
3772     };
3773    
3774     enum {
3775     X86_PROCESSOR_I386, /* 80386 */
3776     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3777     X86_PROCESSOR_PENTIUM,
3778     X86_PROCESSOR_PENTIUMPRO,
3779     X86_PROCESSOR_K6,
3780     X86_PROCESSOR_ATHLON,
3781     X86_PROCESSOR_PENTIUM4,
3782 gbeauche 1.28 X86_PROCESSOR_X86_64,
3783 gbeauche 1.3 X86_PROCESSOR_max
3784     };
3785    
3786     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3787     "80386",
3788     "80486",
3789     "Pentium",
3790     "PentiumPro",
3791     "K6",
3792     "Athlon",
3793 gbeauche 1.16 "Pentium4",
3794 gbeauche 1.28 "x86-64"
3795 gbeauche 1.3 };
3796    
3797     static struct ptt {
3798     const int align_loop;
3799     const int align_loop_max_skip;
3800     const int align_jump;
3801     const int align_jump_max_skip;
3802     const int align_func;
3803     }
3804     x86_alignments[X86_PROCESSOR_max] = {
3805     { 4, 3, 4, 3, 4 },
3806     { 16, 15, 16, 15, 16 },
3807     { 16, 7, 16, 7, 16 },
3808     { 16, 15, 16, 7, 16 },
3809     { 32, 7, 32, 7, 32 },
3810 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3811 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3812     { 16, 7, 16, 7, 16 }
3813 gbeauche 1.3 };
3814 gbeauche 1.1
3815 gbeauche 1.3 static void
3816     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3817 gbeauche 1.1 {
3818 gbeauche 1.3 char *v = c->x86_vendor_id;
3819    
3820     if (!strcmp(v, "GenuineIntel"))
3821     c->x86_vendor = X86_VENDOR_INTEL;
3822     else if (!strcmp(v, "AuthenticAMD"))
3823     c->x86_vendor = X86_VENDOR_AMD;
3824     else if (!strcmp(v, "CyrixInstead"))
3825     c->x86_vendor = X86_VENDOR_CYRIX;
3826     else if (!strcmp(v, "Geode by NSC"))
3827     c->x86_vendor = X86_VENDOR_NSC;
3828     else if (!strcmp(v, "UMC UMC UMC "))
3829     c->x86_vendor = X86_VENDOR_UMC;
3830     else if (!strcmp(v, "CentaurHauls"))
3831     c->x86_vendor = X86_VENDOR_CENTAUR;
3832     else if (!strcmp(v, "NexGenDriven"))
3833     c->x86_vendor = X86_VENDOR_NEXGEN;
3834     else if (!strcmp(v, "RiseRiseRise"))
3835     c->x86_vendor = X86_VENDOR_RISE;
3836     else if (!strcmp(v, "GenuineTMx86") ||
3837     !strcmp(v, "TransmetaCPU"))
3838     c->x86_vendor = X86_VENDOR_TRANSMETA;
3839     else
3840     c->x86_vendor = X86_VENDOR_UNKNOWN;
3841     }
3842 gbeauche 1.1
3843 gbeauche 1.3 static void
3844     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3845     {
3846 gbeauche 1.27 const int CPUID_SPACE = 4096;
3847     uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3848     if (cpuid_space == VM_MAP_FAILED)
3849     abort();
3850     vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3851    
3852 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3853 gbeauche 1.3 uae_u8* tmp=get_target();
3854 gbeauche 1.1
3855 gbeauche 1.20 s_op = op;
3856 gbeauche 1.3 set_target(cpuid_space);
3857     raw_push_l_r(0); /* eax */
3858     raw_push_l_r(1); /* ecx */
3859     raw_push_l_r(2); /* edx */
3860     raw_push_l_r(3); /* ebx */
3861 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3862 gbeauche 1.3 raw_cpuid(0);
3863 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3864     raw_mov_l_mr((uintptr)&s_ebx,3);
3865     raw_mov_l_mr((uintptr)&s_ecx,1);
3866     raw_mov_l_mr((uintptr)&s_edx,2);
3867 gbeauche 1.3 raw_pop_l_r(3);
3868     raw_pop_l_r(2);
3869     raw_pop_l_r(1);
3870     raw_pop_l_r(0);
3871     raw_ret();
3872     set_target(tmp);
3873 gbeauche 1.1
3874 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3875 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3876     if (ebx != NULL) *ebx = s_ebx;
3877     if (ecx != NULL) *ecx = s_ecx;
3878     if (edx != NULL) *edx = s_edx;
3879 gbeauche 1.27
3880     vm_release(cpuid_space, CPUID_SPACE);
3881 gbeauche 1.1 }
3882    
3883 gbeauche 1.3 static void
3884     raw_init_cpu(void)
3885 gbeauche 1.1 {
3886 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3887    
3888     /* Defaults */
3889 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3890 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3891     c->cpuid_level = -1; /* CPUID not detected */
3892     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3893     c->x86_vendor_id[0] = '\0'; /* Unset */
3894     c->x86_hwcap = 0;
3895    
3896     /* Get vendor name */
3897     c->x86_vendor_id[12] = '\0';
3898     cpuid(0x00000000,
3899     (uae_u32 *)&c->cpuid_level,
3900     (uae_u32 *)&c->x86_vendor_id[0],
3901     (uae_u32 *)&c->x86_vendor_id[8],
3902     (uae_u32 *)&c->x86_vendor_id[4]);
3903     x86_get_cpu_vendor(c);
3904    
3905     /* Intel-defined flags: level 0x00000001 */
3906     c->x86_brand_id = 0;
3907     if ( c->cpuid_level >= 0x00000001 ) {
3908     uae_u32 tfms, brand_id;
3909     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3910     c->x86 = (tfms >> 8) & 15;
3911 gbeauche 1.29 if (c->x86 == 0xf)
3912     c->x86 += (tfms >> 20) & 0xff; /* extended family */
3913 gbeauche 1.3 c->x86_model = (tfms >> 4) & 15;
3914 gbeauche 1.29 if (c->x86_model == 0xf)
3915     c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3916 gbeauche 1.3 c->x86_brand_id = brand_id & 0xff;
3917     c->x86_mask = tfms & 15;
3918     } else {
3919     /* Have CPUID level 0 only - unheard of */
3920     c->x86 = 4;
3921     }
3922    
3923 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3924     uae_u32 xlvl;
3925     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3926     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3927     if ( xlvl >= 0x80000001 ) {
3928 gbeauche 1.28 uae_u32 features, extra_features;
3929     cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3930 gbeauche 1.16 if (features & (1 << 29)) {
3931     /* Assume x86-64 if long mode is supported */
3932 gbeauche 1.28 c->x86_processor = X86_PROCESSOR_X86_64;
3933 gbeauche 1.16 }
3934 gbeauche 1.28 if (extra_features & (1 << 0))
3935     have_lahf_lm = true;
3936 gbeauche 1.16 }
3937     }
3938    
3939 gbeauche 1.3 /* Canonicalize processor ID */
3940     switch (c->x86) {
3941     case 3:
3942     c->x86_processor = X86_PROCESSOR_I386;
3943     break;
3944     case 4:
3945     c->x86_processor = X86_PROCESSOR_I486;
3946     break;
3947     case 5:
3948     if (c->x86_vendor == X86_VENDOR_AMD)
3949     c->x86_processor = X86_PROCESSOR_K6;
3950     else
3951     c->x86_processor = X86_PROCESSOR_PENTIUM;
3952     break;
3953     case 6:
3954     if (c->x86_vendor == X86_VENDOR_AMD)
3955     c->x86_processor = X86_PROCESSOR_ATHLON;
3956     else
3957     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3958     break;
3959     case 15:
3960 gbeauche 1.29 if (c->x86_processor == X86_PROCESSOR_max) {
3961     switch (c->x86_vendor) {
3962     case X86_VENDOR_INTEL:
3963     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3964     break;
3965     case X86_VENDOR_AMD:
3966     /* Assume a 32-bit Athlon processor if not in long mode */
3967     c->x86_processor = X86_PROCESSOR_ATHLON;
3968     break;
3969     }
3970     }
3971     break;
3972 gbeauche 1.3 }
3973     if (c->x86_processor == X86_PROCESSOR_max) {
3974 gbeauche 1.30 c->x86_processor = X86_PROCESSOR_I386;
3975     fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3976 gbeauche 1.3 fprintf(stderr, " Family : %d\n", c->x86);
3977     fprintf(stderr, " Model : %d\n", c->x86_model);
3978     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3979 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3980 gbeauche 1.3 if (c->x86_brand_id)
3981     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3982     }
3983    
3984     /* Have CMOV support? */
3985 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3986 gbeauche 1.38 #if defined(__x86_64__)
3987     if (!have_cmov) {
3988     write_log("x86-64 implementations are bound to have CMOV!\n");
3989     abort();
3990     }
3991     #endif
3992 gbeauche 1.3
3993     /* Can the host CPU suffer from partial register stalls? */
3994     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3995     #if 1
3996     /* It appears that partial register writes are a bad idea even on
3997 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3998     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3999 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
4000     have_rat_stall = true;
4001 gbeauche 1.1 #endif
4002 gbeauche 1.3
4003     /* Alignments */
4004     if (tune_alignment) {
4005     align_loops = x86_alignments[c->x86_processor].align_loop;
4006     align_jumps = x86_alignments[c->x86_processor].align_jump;
4007     }
4008    
4009     write_log("Max CPUID level=%d Processor is %s [%s]\n",
4010     c->cpuid_level, c->x86_vendor_id,
4011     x86_processor_string_table[c->x86_processor]);
4012 gbeauche 1.36
4013     raw_flags_init();
4014 gbeauche 1.1 }
4015    
4016 gbeauche 1.10 static bool target_check_bsf(void)
4017     {
4018     bool mismatch = false;
4019     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
4020     for (int g_CF = 0; g_CF <= 1; g_CF++) {
4021     for (int g_OF = 0; g_OF <= 1; g_OF++) {
4022     for (int g_SF = 0; g_SF <= 1; g_SF++) {
4023     for (int value = -1; value <= 1; value++) {
4024 gbeauche 1.25 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
4025     unsigned long tmp = value;
4026 gbeauche 1.10 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
4027 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
4028 gbeauche 1.10 int OF = (flags >> 11) & 1;
4029     int SF = (flags >> 7) & 1;
4030     int ZF = (flags >> 6) & 1;
4031     int CF = flags & 1;
4032     tmp = (value == 0);
4033     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
4034     mismatch = true;
4035     }
4036     }}}}
4037     if (mismatch)
4038     write_log("Target CPU defines all flags on BSF instruction\n");
4039     return !mismatch;
4040     }
4041    
4042 gbeauche 1.1
4043     /*************************************************************************
4044     * FPU stuff *
4045     *************************************************************************/
4046    
4047    
4048     static __inline__ void raw_fp_init(void)
4049     {
4050     int i;
4051    
4052     for (i=0;i<N_FREGS;i++)
4053     live.spos[i]=-2;
4054     live.tos=-1; /* Stack is empty */
4055     }
4056    
4057     static __inline__ void raw_fp_cleanup_drop(void)
4058     {
4059     #if 0
4060     /* using FINIT instead of popping all the entries.
4061     Seems to have side effects --- there is display corruption in
4062     Quake when this is used */
4063     if (live.tos>1) {
4064     emit_byte(0x9b);
4065     emit_byte(0xdb);
4066     emit_byte(0xe3);
4067     live.tos=-1;
4068     }
4069     #endif
4070     while (live.tos>=1) {
4071     emit_byte(0xde);
4072     emit_byte(0xd9);
4073     live.tos-=2;
4074     }
4075     while (live.tos>=0) {
4076     emit_byte(0xdd);
4077     emit_byte(0xd8);
4078     live.tos--;
4079     }
4080     raw_fp_init();
4081     }
4082    
4083     static __inline__ void make_tos(int r)
4084     {
4085     int p,q;
4086    
4087     if (live.spos[r]<0) { /* Register not yet on stack */
4088     emit_byte(0xd9);
4089     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
4090     live.tos++;
4091     live.spos[r]=live.tos;
4092     live.onstack[live.tos]=r;
4093     return;
4094     }
4095     /* Register is on stack */
4096     if (live.tos==live.spos[r])
4097     return;
4098     p=live.spos[r];
4099     q=live.onstack[live.tos];
4100    
4101     emit_byte(0xd9);
4102     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
4103     live.onstack[live.tos]=r;
4104     live.spos[r]=live.tos;
4105     live.onstack[p]=q;
4106     live.spos[q]=p;
4107     }
4108    
4109     static __inline__ void make_tos2(int r, int r2)
4110     {
4111     int q;
4112    
4113     make_tos(r2); /* Put the reg that's supposed to end up in position2
4114     on top */
4115    
4116     if (live.spos[r]<0) { /* Register not yet on stack */
4117     make_tos(r); /* This will extend the stack */
4118     return;
4119     }
4120     /* Register is on stack */
4121     emit_byte(0xd9);
4122     emit_byte(0xc9); /* Move r2 into position 2 */
4123    
4124     q=live.onstack[live.tos-1];
4125     live.onstack[live.tos]=q;
4126     live.spos[q]=live.tos;
4127     live.onstack[live.tos-1]=r2;
4128     live.spos[r2]=live.tos-1;
4129    
4130     make_tos(r); /* And r into 1 */
4131     }
4132    
4133     static __inline__ int stackpos(int r)
4134     {
4135     if (live.spos[r]<0)
4136     abort();
4137     if (live.tos<live.spos[r]) {
4138     printf("Looking for spos for fnreg %d\n",r);
4139     abort();
4140     }
4141     return live.tos-live.spos[r];
4142     }
4143    
4144     static __inline__ void usereg(int r)
4145     {
4146     if (live.spos[r]<0)
4147     make_tos(r);
4148     }
4149    
4150     /* This is called with one FP value in a reg *above* tos, which it will
4151     pop off the stack if necessary */
4152     static __inline__ void tos_make(int r)
4153     {
4154     if (live.spos[r]<0) {
4155     live.tos++;
4156     live.spos[r]=live.tos;
4157     live.onstack[live.tos]=r;
4158     return;
4159     }
4160     emit_byte(0xdd);
4161     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
4162     and pop it*/
4163     }
4164 gbeauche 1.23
4165     /* FP helper functions */
4166     #if USE_NEW_RTASM
4167     #define DEFINE_OP(NAME, GEN) \
4168     static inline void raw_##NAME(uint32 m) \
4169     { \
4170     GEN(m, X86_NOREG, X86_NOREG, 1); \
4171     }
4172     DEFINE_OP(fstl, FSTLm);
4173     DEFINE_OP(fstpl, FSTPLm);
4174     DEFINE_OP(fldl, FLDLm);
4175     DEFINE_OP(fildl, FILDLm);
4176     DEFINE_OP(fistl, FISTLm);
4177     DEFINE_OP(flds, FLDSm);
4178     DEFINE_OP(fsts, FSTSm);
4179     DEFINE_OP(fstpt, FSTPTm);
4180     DEFINE_OP(fldt, FLDTm);
4181     #else
4182     #define DEFINE_OP(NAME, OP1, OP2) \
4183     static inline void raw_##NAME(uint32 m) \
4184     { \
4185     emit_byte(OP1); \
4186     emit_byte(OP2); \
4187     emit_long(m); \
4188     }
4189     DEFINE_OP(fstl, 0xdd, 0x15);
4190     DEFINE_OP(fstpl, 0xdd, 0x1d);
4191     DEFINE_OP(fldl, 0xdd, 0x05);
4192     DEFINE_OP(fildl, 0xdb, 0x05);
4193     DEFINE_OP(fistl, 0xdb, 0x15);
4194     DEFINE_OP(flds, 0xd9, 0x05);
4195     DEFINE_OP(fsts, 0xd9, 0x15);
4196     DEFINE_OP(fstpt, 0xdb, 0x3d);
4197     DEFINE_OP(fldt, 0xdb, 0x2d);
4198     #endif
4199     #undef DEFINE_OP
4200    
4201 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4202     {
4203     make_tos(r);
4204 gbeauche 1.23 raw_fstl(m);
4205 gbeauche 1.1 }
4206     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4207    
4208     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4209     {
4210     make_tos(r);
4211 gbeauche 1.23 raw_fstpl(m);
4212 gbeauche 1.1 live.onstack[live.tos]=-1;
4213     live.tos--;
4214     live.spos[r]=-2;
4215     }
4216     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4217    
4218     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4219     {
4220 gbeauche 1.23 raw_fldl(m);
4221 gbeauche 1.1 tos_make(r);
4222     }
4223     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4224    
4225     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4226     {
4227 gbeauche 1.23 raw_fildl(m);
4228 gbeauche 1.1 tos_make(r);
4229     }
4230     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4231    
4232     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4233     {
4234     make_tos(r);
4235 gbeauche 1.23 raw_fistl(m);
4236 gbeauche 1.1 }
4237     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4238    
4239     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4240     {
4241 gbeauche 1.23 raw_flds(m);
4242 gbeauche 1.1 tos_make(r);
4243     }
4244     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4245    
4246     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4247     {
4248     make_tos(r);
4249 gbeauche 1.23 raw_fsts(m);
4250 gbeauche 1.1 }
4251     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4252    
4253     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4254     {
4255     int rs;
4256    
4257     /* Stupid x87 can't write a long double to mem without popping the
4258     stack! */
4259     usereg(r);
4260     rs=stackpos(r);
4261     emit_byte(0xd9); /* Get a copy to the top of stack */
4262     emit_byte(0xc0+rs);
4263    
4264 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4265 gbeauche 1.1 }
4266     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4267    
4268     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4269     {
4270     int rs;
4271    
4272     make_tos(r);
4273 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4274 gbeauche 1.1 live.onstack[live.tos]=-1;
4275     live.tos--;
4276     live.spos[r]=-2;
4277     }
4278     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4279    
4280     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4281     {
4282 gbeauche 1.23 raw_fldt(m);
4283 gbeauche 1.1 tos_make(r);
4284     }
4285     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4286    
4287     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4288     {
4289     emit_byte(0xd9);
4290     emit_byte(0xeb);
4291     tos_make(r);
4292     }
4293     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4294    
4295     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4296     {
4297     emit_byte(0xd9);
4298     emit_byte(0xec);
4299     tos_make(r);
4300     }
4301     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4302    
4303     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4304     {
4305     emit_byte(0xd9);
4306     emit_byte(0xea);
4307     tos_make(r);
4308     }
4309     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4310    
4311     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4312     {
4313     emit_byte(0xd9);
4314     emit_byte(0xed);
4315     tos_make(r);
4316     }
4317     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4318    
4319     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4320     {
4321     emit_byte(0xd9);
4322     emit_byte(0xe8);
4323     tos_make(r);
4324     }
4325     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4326    
4327     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4328     {
4329     emit_byte(0xd9);
4330     emit_byte(0xee);
4331     tos_make(r);
4332     }
4333     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4334    
4335     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4336     {
4337     int ds;
4338    
4339     usereg(s);
4340     ds=stackpos(s);
4341     if (ds==0 && live.spos[d]>=0) {
4342     /* source is on top of stack, and we already have the dest */
4343     int dd=stackpos(d);
4344     emit_byte(0xdd);
4345     emit_byte(0xd0+dd);
4346     }
4347     else {
4348     emit_byte(0xd9);
4349     emit_byte(0xc0+ds); /* duplicate source on tos */
4350     tos_make(d); /* store to destination, pop if necessary */
4351     }
4352     }
4353     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4354    
4355     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4356     {
4357     emit_byte(0xd9);
4358     emit_byte(0xa8+index);
4359     emit_long(base);
4360     }
4361     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4362    
4363    
4364     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4365     {
4366     int ds;
4367    
4368     if (d!=s) {
4369     usereg(s);
4370     ds=stackpos(s);
4371     emit_byte(0xd9);
4372     emit_byte(0xc0+ds); /* duplicate source */
4373     emit_byte(0xd9);
4374     emit_byte(0xfa); /* take square root */
4375     tos_make(d); /* store to destination */
4376     }
4377     else {
4378     make_tos(d);
4379     emit_byte(0xd9);
4380     emit_byte(0xfa); /* take square root */
4381     }
4382     }
4383     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4384    
4385     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4386     {
4387     int ds;
4388    
4389     if (d!=s) {
4390     usereg(s);
4391     ds=stackpos(s);
4392     emit_byte(0xd9);
4393     emit_byte(0xc0+ds); /* duplicate source */
4394     emit_byte(0xd9);
4395     emit_byte(0xe1); /* take fabs */
4396     tos_make(d); /* store to destination */
4397     }
4398     else {
4399     make_tos(d);
4400     emit_byte(0xd9);
4401     emit_byte(0xe1); /* take fabs */
4402     }
4403     }
4404     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4405    
4406     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4407     {
4408     int ds;
4409    
4410     if (d!=s) {
4411     usereg(s);
4412     ds=stackpos(s);
4413     emit_byte(0xd9);
4414     emit_byte(0xc0+ds); /* duplicate source */
4415     emit_byte(0xd9);
4416     emit_byte(0xfc); /* take frndint */
4417     tos_make(d); /* store to destination */
4418     }
4419     else {
4420     make_tos(d);
4421     emit_byte(0xd9);
4422     emit_byte(0xfc); /* take frndint */
4423     }
4424     }
4425     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4426    
4427     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4428     {
4429     int ds;
4430    
4431     if (d!=s) {
4432     usereg(s);
4433     ds=stackpos(s);
4434     emit_byte(0xd9);
4435     emit_byte(0xc0+ds); /* duplicate source */
4436     emit_byte(0xd9);
4437     emit_byte(0xff); /* take cos */
4438     tos_make(d); /* store to destination */
4439     }
4440     else {
4441     make_tos(d);
4442     emit_byte(0xd9);
4443     emit_byte(0xff); /* take cos */
4444     }
4445     }
4446     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4447    
4448     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4449     {
4450     int ds;
4451    
4452     if (d!=s) {
4453     usereg(s);
4454     ds=stackpos(s);
4455     emit_byte(0xd9);
4456     emit_byte(0xc0+ds); /* duplicate source */
4457     emit_byte(0xd9);
4458     emit_byte(0xfe); /* take sin */
4459     tos_make(d); /* store to destination */
4460     }
4461     else {
4462     make_tos(d);
4463     emit_byte(0xd9);
4464     emit_byte(0xfe); /* take sin */
4465     }
4466     }
4467     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4468    
4469 gbeauche 1.34 static const double one=1;
4470 gbeauche 1.1 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4471     {
4472     int ds;
4473    
4474     usereg(s);
4475     ds=stackpos(s);
4476     emit_byte(0xd9);
4477     emit_byte(0xc0+ds); /* duplicate source */
4478    
4479     emit_byte(0xd9);
4480     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4481     emit_byte(0xd9);
4482     emit_byte(0xfc); /* rndint */
4483     emit_byte(0xd9);
4484     emit_byte(0xc9); /* swap top two elements */
4485     emit_byte(0xd8);
4486     emit_byte(0xe1); /* subtract rounded from original */
4487     emit_byte(0xd9);
4488     emit_byte(0xf0); /* f2xm1 */
4489 gbeauche 1.34 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4490 gbeauche 1.1 emit_byte(0xd9);
4491     emit_byte(0xfd); /* and scale it */
4492     emit_byte(0xdd);
4493     emit_byte(0xd9); /* take he rounded value off */
4494     tos_make(d); /* store to destination */
4495     }
4496     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4497    
4498     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4499     {
4500     int ds;
4501    
4502     usereg(s);
4503     ds=stackpos(s);
4504     emit_byte(0xd9);
4505     emit_byte(0xc0+ds); /* duplicate source */
4506     emit_byte(0xd9);
4507     emit_byte(0xea); /* fldl2e */
4508     emit_byte(0xde);
4509     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4510    
4511     emit_byte(0xd9);
4512     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4513     emit_byte(0xd9);
4514     emit_byte(0xfc); /* rndint */
4515     emit_byte(0xd9);
4516     emit_byte(0xc9); /* swap top two elements */
4517     emit_byte(0xd8);
4518     emit_byte(0xe1); /* subtract rounded from original */
4519     emit_byte(0xd9);
4520     emit_byte(0xf0); /* f2xm1 */
4521 gbeauche 1.34 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4522 gbeauche 1.1 emit_byte(0xd9);
4523     emit_byte(0xfd); /* and scale it */
4524     emit_byte(0xdd);
4525     emit_byte(0xd9); /* take he rounded value off */
4526     tos_make(d); /* store to destination */
4527     }
4528     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4529    
4530     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4531     {
4532     int ds;
4533    
4534     usereg(s);
4535     ds=stackpos(s);
4536     emit_byte(0xd9);
4537     emit_byte(0xc0+ds); /* duplicate source */
4538     emit_byte(0xd9);
4539     emit_byte(0xe8); /* push '1' */
4540     emit_byte(0xd9);
4541     emit_byte(0xc9); /* swap top two */
4542     emit_byte(0xd9);
4543     emit_byte(0xf1); /* take 1*log2(x) */
4544     tos_make(d); /* store to destination */
4545     }
4546     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4547    
4548    
4549     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4550     {
4551     int ds;
4552    
4553     if (d!=s) {
4554     usereg(s);
4555     ds=stackpos(s);
4556     emit_byte(0xd9);
4557     emit_byte(0xc0+ds); /* duplicate source */
4558     emit_byte(0xd9);
4559     emit_byte(0xe0); /* take fchs */
4560     tos_make(d); /* store to destination */
4561     }
4562     else {
4563     make_tos(d);
4564     emit_byte(0xd9);
4565     emit_byte(0xe0); /* take fchs */
4566     }
4567     }
4568     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4569    
4570     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4571     {
4572     int ds;
4573    
4574     usereg(s);
4575     usereg(d);
4576    
4577     if (live.spos[s]==live.tos) {
4578     /* Source is on top of stack */
4579     ds=stackpos(d);
4580     emit_byte(0xdc);
4581     emit_byte(0xc0+ds); /* add source to dest*/
4582     }
4583     else {
4584     make_tos(d);
4585     ds=stackpos(s);
4586    
4587     emit_byte(0xd8);
4588     emit_byte(0xc0+ds); /* add source to dest*/
4589     }
4590     }
4591     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4592    
4593     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4594     {
4595     int ds;
4596    
4597     usereg(s);
4598     usereg(d);
4599    
4600     if (live.spos[s]==live.tos) {
4601     /* Source is on top of stack */
4602     ds=stackpos(d);
4603     emit_byte(0xdc);
4604     emit_byte(0xe8+ds); /* sub source from dest*/
4605     }
4606     else {
4607     make_tos(d);
4608     ds=stackpos(s);
4609    
4610     emit_byte(0xd8);
4611     emit_byte(0xe0+ds); /* sub src from dest */
4612     }
4613     }
4614     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4615    
4616     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4617     {
4618     int ds;
4619    
4620     usereg(s);
4621     usereg(d);
4622    
4623     make_tos(d);
4624     ds=stackpos(s);
4625    
4626     emit_byte(0xdd);
4627     emit_byte(0xe0+ds); /* cmp dest with source*/
4628     }
4629     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4630    
4631     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4632     {
4633     int ds;
4634    
4635     usereg(s);
4636     usereg(d);
4637    
4638     if (live.spos[s]==live.tos) {
4639     /* Source is on top of stack */
4640     ds=stackpos(d);
4641     emit_byte(0xdc);
4642     emit_byte(0xc8+ds); /* mul dest by source*/
4643     }
4644     else {
4645     make_tos(d);
4646     ds=stackpos(s);
4647    
4648     emit_byte(0xd8);
4649     emit_byte(0xc8+ds); /* mul dest by source*/
4650     }
4651     }
4652     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4653    
4654     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4655     {
4656     int ds;
4657    
4658     usereg(s);
4659     usereg(d);
4660    
4661     if (live.spos[s]==live.tos) {
4662     /* Source is on top of stack */
4663     ds=stackpos(d);
4664     emit_byte(0xdc);
4665     emit_byte(0xf8+ds); /* div dest by source */
4666     }
4667     else {
4668     make_tos(d);
4669     ds=stackpos(s);
4670    
4671     emit_byte(0xd8);
4672     emit_byte(0xf0+ds); /* div dest by source*/
4673     }
4674     }
4675     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4676    
4677     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4678     {
4679     int ds;
4680    
4681     usereg(s);
4682     usereg(d);
4683    
4684     make_tos2(d,s);
4685     ds=stackpos(s);
4686    
4687     if (ds!=1) {
4688     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4689     abort();
4690     }
4691     emit_byte(0xd9);
4692     emit_byte(0xf8); /* take rem from dest by source */
4693     }
4694     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4695    
4696     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4697     {
4698     int ds;
4699    
4700     usereg(s);
4701     usereg(d);
4702    
4703     make_tos2(d,s);
4704     ds=stackpos(s);
4705    
4706     if (ds!=1) {
4707     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4708     abort();
4709     }
4710     emit_byte(0xd9);
4711     emit_byte(0xf5); /* take rem1 from dest by source */
4712     }
4713     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4714    
4715    
4716     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4717     {
4718     make_tos(r);
4719     emit_byte(0xd9); /* ftst */
4720     emit_byte(0xe4);
4721     }
4722     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4723    
4724     /* %eax register is clobbered if target processor doesn't support fucomi */
4725     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4726     #define FFLAG_NREG EAX_INDEX
4727    
4728     static __inline__ void raw_fflags_into_flags(int r)
4729     {
4730     int p;
4731    
4732     usereg(r);
4733     p=stackpos(r);
4734    
4735     emit_byte(0xd9);
4736     emit_byte(0xee); /* Push 0 */
4737     emit_byte(0xd9);
4738     emit_byte(0xc9+p); /* swap top two around */
4739     if (have_cmov) {
4740     // gb-- fucomi is for P6 cores only, not K6-2 then...
4741     emit_byte(0xdb);
4742     emit_byte(0xe9+p); /* fucomi them */
4743     }
4744     else {
4745     emit_byte(0xdd);
4746     emit_byte(0xe1+p); /* fucom them */
4747     emit_byte(0x9b);
4748     emit_byte(0xdf);
4749     emit_byte(0xe0); /* fstsw ax */
4750     raw_sahf(0); /* sahf */
4751     }
4752     emit_byte(0xdd);
4753     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4754     }