ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.33
Committed: 2006-02-06T23:06:54Z (18 years, 8 months ago) by gbeauche
Branch: MAIN
Changes since 1.32: +7 -3 lines
Log Message:
Fix SAHF_SETO_PROFITABLE code for x86-64 platforms.

This was only an experiment. Improvement was marginal: only +3% on AMD64
(an Athlon 64 3200+). However, it may be interesting to test it on EM64T
(e.g. newer P4s) since an older P3/800, hence in 32-bit mode, got a +15%
improvement in Speedometer 4 benchmarks.

Rationale: lahf/seto sequences avoid load/stores to the stack (push/pop)
and it was thus hoped to be faster.

Anyhow, SAHF_SETO_PROFITABLE can only be enabled manually at this time.
Edit your generated Makefile for testing, but first make sure your CPU
supports lahf in 64-bit mode (lahf_lm flag in /proc/cpuinfo).

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.26 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 gbeauche 1.26 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.33 /* XXX this has to match X86_Reg8H_Base + 4 */
56     #define AH_INDEX (0x10+4+EAX_INDEX)
57     #define CH_INDEX (0x10+4+ECX_INDEX)
58     #define DH_INDEX (0x10+4+EDX_INDEX)
59     #define BH_INDEX (0x10+4+EBX_INDEX)
60 gbeauche 1.1
61     /* The register in which subroutines return an integer return value */
62 gbeauche 1.20 #define REG_RESULT EAX_INDEX
63 gbeauche 1.1
64     /* The registers subroutines take their first and second argument in */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     /* Handle the _fastcall parameters of ECX and EDX */
67 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
68     #define REG_PAR2 EDX_INDEX
69     #elif defined(__x86_64__)
70     #define REG_PAR1 EDI_INDEX
71     #define REG_PAR2 ESI_INDEX
72 gbeauche 1.1 #else
73 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
74     #define REG_PAR2 EDX_INDEX
75 gbeauche 1.1 #endif
76    
77 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
78 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
79 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
80 gbeauche 1.1 #else
81 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
82 gbeauche 1.1 #endif
83    
84 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
85 gbeauche 1.1 -1 if any reg will do */
86 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
87     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
88 gbeauche 1.1
89 gbeauche 1.31 #define STACK_ALIGN 16
90     #define STACK_OFFSET sizeof(void *)
91    
92 gbeauche 1.1 uae_s8 always_used[]={4,-1};
93 gbeauche 1.20 #if defined(__x86_64__)
94     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
95     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
96     #else
97 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
98     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
99 gbeauche 1.20 #endif
100 gbeauche 1.1
101 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
102     /* Make sure interpretive core does not use cpuopti */
103     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
104 gbeauche 1.20 #error FIXME: code not ready
105 gbeauche 1.17 #else
106 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
107     by the caller */
108 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
109 gbeauche 1.17 #endif
110 gbeauche 1.1
111     /* This *should* be the same as call_saved. But:
112     - We might not really know which registers are saved, and which aren't,
113     so we need to preserve some, but don't want to rely on everyone else
114     also saving those registers
115     - Special registers (such like the stack pointer) should not be "preserved"
116     by pushing, even though they are "saved" across function calls
117     */
118 gbeauche 1.21 #if defined(__x86_64__)
119 gbeauche 1.32 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
120 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
121     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
122 gbeauche 1.21 #else
123 gbeauche 1.32 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
124     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
125 gbeauche 1.21 #endif
126 gbeauche 1.1
127     /* Whether classes of instructions do or don't clobber the native flags */
128     #define CLOBBER_MOV
129     #define CLOBBER_LEA
130     #define CLOBBER_CMOV
131     #define CLOBBER_POP
132     #define CLOBBER_PUSH
133     #define CLOBBER_SUB clobber_flags()
134     #define CLOBBER_SBB clobber_flags()
135     #define CLOBBER_CMP clobber_flags()
136     #define CLOBBER_ADD clobber_flags()
137     #define CLOBBER_ADC clobber_flags()
138     #define CLOBBER_AND clobber_flags()
139     #define CLOBBER_OR clobber_flags()
140     #define CLOBBER_XOR clobber_flags()
141    
142     #define CLOBBER_ROL clobber_flags()
143     #define CLOBBER_ROR clobber_flags()
144     #define CLOBBER_SHLL clobber_flags()
145     #define CLOBBER_SHRL clobber_flags()
146     #define CLOBBER_SHRA clobber_flags()
147     #define CLOBBER_TEST clobber_flags()
148     #define CLOBBER_CL16
149     #define CLOBBER_CL8
150 gbeauche 1.20 #define CLOBBER_SE32
151 gbeauche 1.1 #define CLOBBER_SE16
152     #define CLOBBER_SE8
153 gbeauche 1.20 #define CLOBBER_ZE32
154 gbeauche 1.1 #define CLOBBER_ZE16
155     #define CLOBBER_ZE8
156     #define CLOBBER_SW16 clobber_flags()
157     #define CLOBBER_SW32
158     #define CLOBBER_SETCC
159     #define CLOBBER_MUL clobber_flags()
160     #define CLOBBER_BT clobber_flags()
161     #define CLOBBER_BSF clobber_flags()
162    
163 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
164 gbeauche 1.20 #if defined(__x86_64__)
165     #define USE_NEW_RTASM 1
166     #endif
167    
168     #if USE_NEW_RTASM
169 gbeauche 1.13
170     #if defined(__x86_64__)
171     #define X86_TARGET_64BIT 1
172     #endif
173     #define X86_FLAT_REGISTERS 0
174 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
175     #define X86_OPTIMIZE_ROTSHI 1
176 gbeauche 1.13 #include "codegen_x86.h"
177    
178     #define x86_emit_byte(B) emit_byte(B)
179     #define x86_emit_word(W) emit_word(W)
180     #define x86_emit_long(L) emit_long(L)
181 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
182 gbeauche 1.13 #define x86_get_target() get_target()
183     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
184    
185     static void jit_fail(const char *msg, const char *file, int line, const char *function)
186     {
187     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
188     function, file, line, msg);
189     abort();
190     }
191    
192     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
193     {
194 gbeauche 1.20 #if defined(__x86_64__)
195     PUSHQr(r);
196     #else
197 gbeauche 1.13 PUSHLr(r);
198 gbeauche 1.20 #endif
199 gbeauche 1.13 }
200     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
201    
202     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
203     {
204 gbeauche 1.20 #if defined(__x86_64__)
205     POPQr(r);
206     #else
207 gbeauche 1.13 POPLr(r);
208 gbeauche 1.20 #endif
209 gbeauche 1.13 }
210     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
211    
212 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
213     {
214     #if defined(__x86_64__)
215     POPQm(d, X86_NOREG, X86_NOREG, 1);
216     #else
217     POPLm(d, X86_NOREG, X86_NOREG, 1);
218     #endif
219     }
220     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
221    
222 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
223     {
224     BTLir(i, r);
225     }
226     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
227    
228     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
229     {
230     BTLrr(b, r);
231     }
232     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
233    
234     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
235     {
236     BTCLir(i, r);
237     }
238     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
239    
240     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
241     {
242     BTCLrr(b, r);
243     }
244     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
245    
246     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
247     {
248     BTRLir(i, r);
249     }
250     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
251    
252     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
253     {
254     BTRLrr(b, r);
255     }
256     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
257    
258     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
259     {
260     BTSLir(i, r);
261     }
262     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
263    
264     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
265     {
266     BTSLrr(b, r);
267     }
268     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
269    
270     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
271     {
272     SUBWir(i, d);
273     }
274     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
275    
276     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
277     {
278     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
279     }
280     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
281    
282     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
283     {
284     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
285     }
286     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
287    
288     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
289     {
290     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
291     }
292     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
293    
294     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
295     {
296     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
297     }
298     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
299    
300     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
301     {
302     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
303     }
304     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
305    
306     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
307     {
308     ROLBir(i, r);
309     }
310     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
311    
312     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
313     {
314     ROLWir(i, r);
315     }
316     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
317    
318     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
319     {
320     ROLLir(i, r);
321     }
322     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
323    
324     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
325     {
326     ROLLrr(r, d);
327     }
328     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
329    
330     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
331     {
332     ROLWrr(r, d);
333     }
334     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
335    
336     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
337     {
338     ROLBrr(r, d);
339     }
340     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
341    
342     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
343     {
344     SHLLrr(r, d);
345     }
346     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
347    
348     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
349     {
350     SHLWrr(r, d);
351     }
352     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
353    
354     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
355     {
356     SHLBrr(r, d);
357     }
358     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
359    
360     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
361     {
362     RORBir(i, r);
363     }
364     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
365    
366     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
367     {
368     RORWir(i, r);
369     }
370     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
371    
372     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
373     {
374     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
375     }
376     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
377    
378     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
379     {
380     RORLir(i, r);
381     }
382     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
383    
384     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
385     {
386     RORLrr(r, d);
387     }
388     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
389    
390     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
391     {
392     RORWrr(r, d);
393     }
394     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
395    
396     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
397     {
398     RORBrr(r, d);
399     }
400     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
401    
402     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
403     {
404     SHRLrr(r, d);
405     }
406     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
407    
408     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
409     {
410     SHRWrr(r, d);
411     }
412     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
413    
414     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
415     {
416     SHRBrr(r, d);
417     }
418     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
419    
420     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
421     {
422 gbeauche 1.14 SARLrr(r, d);
423 gbeauche 1.13 }
424     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
425    
426     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
427     {
428 gbeauche 1.14 SARWrr(r, d);
429 gbeauche 1.13 }
430     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
431    
432     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
433     {
434 gbeauche 1.14 SARBrr(r, d);
435 gbeauche 1.13 }
436     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
437    
438     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
439     {
440     SHLLir(i, r);
441     }
442     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
443    
444     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
445     {
446     SHLWir(i, r);
447     }
448     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
449    
450     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
451     {
452     SHLBir(i, r);
453     }
454     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
455    
456     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
457     {
458     SHRLir(i, r);
459     }
460     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
461    
462     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
463     {
464     SHRWir(i, r);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
469     {
470     SHRBir(i, r);
471     }
472     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
473    
474     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
475     {
476 gbeauche 1.14 SARLir(i, r);
477 gbeauche 1.13 }
478     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
479    
480     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
481     {
482 gbeauche 1.14 SARWir(i, r);
483 gbeauche 1.13 }
484     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
485    
486     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
487     {
488 gbeauche 1.14 SARBir(i, r);
489 gbeauche 1.13 }
490     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
491    
492     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
493     {
494     SAHF();
495     }
496     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
497    
498     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
499     {
500     CPUID();
501     }
502     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
503    
504     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
505     {
506     LAHF();
507     }
508     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
509    
510     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
511     {
512     SETCCir(cc, d);
513     }
514     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
515    
516     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
517     {
518     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
519     }
520     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
521    
522     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
523     {
524 gbeauche 1.15 if (have_cmov)
525     CMOVLrr(cc, s, d);
526     else { /* replacement using branch and mov */
527     #if defined(__x86_64__)
528     write_log("x86-64 implementations are bound to have CMOV!\n");
529     abort();
530     #endif
531     JCCSii(cc^1, 2);
532     MOVLrr(s, d);
533     }
534 gbeauche 1.13 }
535     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
536    
537     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
538     {
539     BSFLrr(s, d);
540     }
541     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
542    
543 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
544     {
545     MOVSLQrr(s, d);
546     }
547     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
548    
549 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
550     {
551     MOVSWLrr(s, d);
552     }
553     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
554    
555     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
556     {
557     MOVSBLrr(s, d);
558     }
559     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
560    
561     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
562     {
563     MOVZWLrr(s, d);
564     }
565     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
566    
567     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
568     {
569     MOVZBLrr(s, d);
570     }
571     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
572    
573     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
574     {
575 gbeauche 1.14 IMULLrr(s, d);
576 gbeauche 1.13 }
577     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
578    
579     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
580     {
581 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
582     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
583 gbeauche 1.13 abort();
584 gbeauche 1.14 }
585     IMULLr(s);
586 gbeauche 1.13 }
587     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
588    
589     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
590     {
591 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
592     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
593 gbeauche 1.13 abort();
594 gbeauche 1.14 }
595     MULLr(s);
596 gbeauche 1.13 }
597     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
598    
599     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
600     {
601 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
602 gbeauche 1.13 }
603     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
604    
605     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
606     {
607     MOVBrr(s, d);
608     }
609     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
610    
611     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
612     {
613     MOVWrr(s, d);
614     }
615     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
616    
617     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
618     {
619     MOVLmr(0, baser, index, factor, d);
620     }
621     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
622    
623     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
624     {
625     MOVWmr(0, baser, index, factor, d);
626     }
627     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
628    
629     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
630     {
631     MOVBmr(0, baser, index, factor, d);
632     }
633     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
634    
635     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
636     {
637     MOVLrm(s, 0, baser, index, factor);
638     }
639     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
640    
641     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
642     {
643     MOVWrm(s, 0, baser, index, factor);
644     }
645     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
646    
647     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
648     {
649     MOVBrm(s, 0, baser, index, factor);
650     }
651     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
652    
653     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
654     {
655     MOVLrm(s, base, baser, index, factor);
656     }
657     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
658    
659     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
660     {
661     MOVWrm(s, base, baser, index, factor);
662     }
663     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
664    
665     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
666     {
667     MOVBrm(s, base, baser, index, factor);
668     }
669     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
670    
671     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
672     {
673     MOVLmr(base, baser, index, factor, d);
674     }
675     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
676    
677     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
678     {
679     MOVWmr(base, baser, index, factor, d);
680     }
681     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
682    
683     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
684     {
685     MOVBmr(base, baser, index, factor, d);
686     }
687     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
688    
689     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
690     {
691     MOVLmr(base, X86_NOREG, index, factor, d);
692     }
693     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
694    
695     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
696     {
697 gbeauche 1.15 if (have_cmov)
698     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
699     else { /* replacement using branch and mov */
700     #if defined(__x86_64__)
701     write_log("x86-64 implementations are bound to have CMOV!\n");
702     abort();
703     #endif
704     JCCSii(cond^1, 7);
705     MOVLmr(base, X86_NOREG, index, factor, d);
706     }
707 gbeauche 1.13 }
708     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
709    
710     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
711     {
712 gbeauche 1.15 if (have_cmov)
713     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
714     else { /* replacement using branch and mov */
715     #if defined(__x86_64__)
716     write_log("x86-64 implementations are bound to have CMOV!\n");
717     abort();
718     #endif
719     JCCSii(cond^1, 6);
720     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
721     }
722 gbeauche 1.13 }
723     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
724    
725     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
726     {
727     MOVLmr(offset, s, X86_NOREG, 1, d);
728     }
729     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
730    
731     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
732     {
733     MOVWmr(offset, s, X86_NOREG, 1, d);
734     }
735     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
736    
737     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
738     {
739     MOVBmr(offset, s, X86_NOREG, 1, d);
740     }
741     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
742    
743     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
744     {
745     MOVLmr(offset, s, X86_NOREG, 1, d);
746     }
747     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
748    
749     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
750     {
751     MOVWmr(offset, s, X86_NOREG, 1, d);
752     }
753     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
754    
755     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
756     {
757     MOVBmr(offset, s, X86_NOREG, 1, d);
758     }
759     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
760    
761     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
762     {
763     MOVLim(i, offset, d, X86_NOREG, 1);
764     }
765     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
766    
767     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
768     {
769     MOVWim(i, offset, d, X86_NOREG, 1);
770     }
771     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
772    
773     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
774     {
775     MOVBim(i, offset, d, X86_NOREG, 1);
776     }
777     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
778    
779     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
780     {
781     MOVLrm(s, offset, d, X86_NOREG, 1);
782     }
783     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
784    
785     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
786     {
787     MOVWrm(s, offset, d, X86_NOREG, 1);
788     }
789     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
790    
791     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
792     {
793     MOVBrm(s, offset, d, X86_NOREG, 1);
794     }
795     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
796    
797     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
798     {
799     LEALmr(offset, s, X86_NOREG, 1, d);
800     }
801     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
802    
803     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
804     {
805     LEALmr(offset, s, index, factor, d);
806     }
807     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
808    
809     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
810     {
811     LEALmr(0, s, index, factor, d);
812     }
813     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
814    
815     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
816     {
817     MOVLrm(s, offset, d, X86_NOREG, 1);
818     }
819     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
820    
821     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
822     {
823     MOVWrm(s, offset, d, X86_NOREG, 1);
824     }
825     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
826    
827     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
828     {
829     MOVBrm(s, offset, d, X86_NOREG, 1);
830     }
831     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
832    
833     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
834     {
835     BSWAPLr(r);
836     }
837     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
838    
839     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
840     {
841     ROLWir(8, r);
842     }
843     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
844    
845     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
846     {
847     MOVLrr(s, d);
848     }
849     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
850    
851     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
852     {
853     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
854     }
855     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
856    
857     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
858     {
859     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
860     }
861     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
862    
863     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
864     {
865     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
866     }
867     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
868    
869     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
870     {
871     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
872     }
873     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
874    
875     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
876     {
877     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
878     }
879     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
880    
881     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
882     {
883     MOVLir(s, d);
884     }
885     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
886    
887     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
888     {
889     MOVWir(s, d);
890     }
891     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
892    
893     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
894     {
895     MOVBir(s, d);
896     }
897     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
898    
899     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
900     {
901     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
902     }
903     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
904    
905     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
906     {
907     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
908     }
909     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
910    
911     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
912     {
913     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
914     }
915     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
916    
917     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
918     {
919     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
920     }
921     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
922    
923     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
924     {
925     TESTLir(i, d);
926     }
927     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
928    
929     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
930     {
931     TESTLrr(s, d);
932     }
933     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
934    
935     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
936     {
937     TESTWrr(s, d);
938     }
939     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
940    
941     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
942     {
943     TESTBrr(s, d);
944     }
945     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
946    
947 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
948     {
949     XORLir(i, d);
950     }
951     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
952    
953 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
954     {
955     ANDLir(i, d);
956     }
957     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
958    
959     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
960     {
961     ANDWir(i, d);
962     }
963     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
964    
965     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
966     {
967     ANDLrr(s, d);
968     }
969     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
970    
971     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
972     {
973     ANDWrr(s, d);
974     }
975     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
976    
977     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
978     {
979     ANDBrr(s, d);
980     }
981     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
982    
983     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
984     {
985     ORLir(i, d);
986     }
987     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
988    
989     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
990     {
991     ORLrr(s, d);
992     }
993     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
994    
995     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
996     {
997     ORWrr(s, d);
998     }
999     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1000    
1001     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1002     {
1003     ORBrr(s, d);
1004     }
1005     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1006    
1007     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1008     {
1009     ADCLrr(s, d);
1010     }
1011     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1012    
1013     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1014     {
1015     ADCWrr(s, d);
1016     }
1017     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1018    
1019     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1020     {
1021     ADCBrr(s, d);
1022     }
1023     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1024    
1025     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1026     {
1027     ADDLrr(s, d);
1028     }
1029     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1030    
1031     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1032     {
1033     ADDWrr(s, d);
1034     }
1035     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1036    
1037     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1038     {
1039     ADDBrr(s, d);
1040     }
1041     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1042    
1043     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1044     {
1045     SUBLir(i, d);
1046     }
1047     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1048    
1049     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1050     {
1051     SUBBir(i, d);
1052     }
1053     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1054    
1055     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1056     {
1057     ADDLir(i, d);
1058     }
1059     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1060    
1061     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1062     {
1063     ADDWir(i, d);
1064     }
1065     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1066    
1067     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1068     {
1069     ADDBir(i, d);
1070     }
1071     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1072    
1073     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1074     {
1075     SBBLrr(s, d);
1076     }
1077     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1078    
1079     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1080     {
1081     SBBWrr(s, d);
1082     }
1083     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1084    
1085     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1086     {
1087     SBBBrr(s, d);
1088     }
1089     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1090    
1091     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1092     {
1093     SUBLrr(s, d);
1094     }
1095     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1096    
1097     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1098     {
1099     SUBWrr(s, d);
1100     }
1101     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1102    
1103     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1104     {
1105     SUBBrr(s, d);
1106     }
1107     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1108    
1109     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1110     {
1111     CMPLrr(s, d);
1112     }
1113     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1114    
1115     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1116     {
1117     CMPLir(i, r);
1118     }
1119     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1120    
1121     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1122     {
1123     CMPWrr(s, d);
1124     }
1125     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1126    
1127     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1128     {
1129     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1130     }
1131     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1132    
1133     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1134     {
1135     CMPBir(i, d);
1136     }
1137     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1138    
1139     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1140     {
1141     CMPBrr(s, d);
1142     }
1143     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1144    
1145     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1146     {
1147     CMPLmr(offset, X86_NOREG, index, factor, d);
1148     }
1149     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1150    
1151     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1152     {
1153     XORLrr(s, d);
1154     }
1155     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1156    
1157     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1158     {
1159     XORWrr(s, d);
1160     }
1161     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1162    
1163     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1164     {
1165     XORBrr(s, d);
1166     }
1167     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1168    
1169     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1170     {
1171     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1172     }
1173     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1174    
1175     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1176     {
1177     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1178     }
1179     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1180    
1181     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1182     {
1183     XCHGLrr(r2, r1);
1184     }
1185     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1186    
1187     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1188     {
1189 gbeauche 1.18 PUSHF();
1190 gbeauche 1.13 }
1191     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1192    
1193     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1194     {
1195 gbeauche 1.18 POPF();
1196 gbeauche 1.13 }
1197     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1198    
1199     #else
1200    
1201 gbeauche 1.2 const bool optimize_accum = true;
1202 gbeauche 1.1 const bool optimize_imm8 = true;
1203     const bool optimize_shift_once = true;
1204    
1205     /*************************************************************************
1206     * Actual encoding of the instructions on the target CPU *
1207     *************************************************************************/
1208    
1209 gbeauche 1.2 static __inline__ int isaccum(int r)
1210     {
1211     return (r == EAX_INDEX);
1212     }
1213    
1214 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1215     {
1216     return (x>=-128 && x<=127);
1217     }
1218    
1219     static __inline__ int isword(uae_s32 x)
1220     {
1221     return (x>=-32768 && x<=32767);
1222     }
1223    
1224     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1225     {
1226     emit_byte(0x50+r);
1227     }
1228     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1229    
1230     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1231     {
1232     emit_byte(0x58+r);
1233     }
1234     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1235    
1236 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1237     {
1238     emit_byte(0x8f);
1239     emit_byte(0x05);
1240     emit_long(d);
1241     }
1242     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1243    
1244 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1245     {
1246     emit_byte(0x0f);
1247     emit_byte(0xba);
1248     emit_byte(0xe0+r);
1249     emit_byte(i);
1250     }
1251     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1252    
1253     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1254     {
1255     emit_byte(0x0f);
1256     emit_byte(0xa3);
1257     emit_byte(0xc0+8*b+r);
1258     }
1259     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1260    
1261     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1262     {
1263     emit_byte(0x0f);
1264     emit_byte(0xba);
1265     emit_byte(0xf8+r);
1266     emit_byte(i);
1267     }
1268     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1269    
1270     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1271     {
1272     emit_byte(0x0f);
1273     emit_byte(0xbb);
1274     emit_byte(0xc0+8*b+r);
1275     }
1276     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1277    
1278    
1279     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1280     {
1281     emit_byte(0x0f);
1282     emit_byte(0xba);
1283     emit_byte(0xf0+r);
1284     emit_byte(i);
1285     }
1286     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1287    
1288     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1289     {
1290     emit_byte(0x0f);
1291     emit_byte(0xb3);
1292     emit_byte(0xc0+8*b+r);
1293     }
1294     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1295    
1296     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1297     {
1298     emit_byte(0x0f);
1299     emit_byte(0xba);
1300     emit_byte(0xe8+r);
1301     emit_byte(i);
1302     }
1303     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1304    
1305     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1306     {
1307     emit_byte(0x0f);
1308     emit_byte(0xab);
1309     emit_byte(0xc0+8*b+r);
1310     }
1311     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1312    
1313     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1314     {
1315     emit_byte(0x66);
1316     if (isbyte(i)) {
1317     emit_byte(0x83);
1318     emit_byte(0xe8+d);
1319     emit_byte(i);
1320     }
1321     else {
1322 gbeauche 1.2 if (optimize_accum && isaccum(d))
1323     emit_byte(0x2d);
1324     else {
1325 gbeauche 1.1 emit_byte(0x81);
1326     emit_byte(0xe8+d);
1327 gbeauche 1.2 }
1328 gbeauche 1.1 emit_word(i);
1329     }
1330     }
1331     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1332    
1333    
1334     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1335     {
1336     emit_byte(0x8b);
1337     emit_byte(0x05+8*d);
1338     emit_long(s);
1339     }
1340     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1341    
1342     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1343     {
1344     emit_byte(0xc7);
1345     emit_byte(0x05);
1346     emit_long(d);
1347     emit_long(s);
1348     }
1349     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1350    
1351     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1352     {
1353     emit_byte(0x66);
1354     emit_byte(0xc7);
1355     emit_byte(0x05);
1356     emit_long(d);
1357     emit_word(s);
1358     }
1359     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1360    
1361     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1362     {
1363     emit_byte(0xc6);
1364     emit_byte(0x05);
1365     emit_long(d);
1366     emit_byte(s);
1367     }
1368     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1369    
1370     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1371     {
1372     if (optimize_shift_once && (i == 1)) {
1373     emit_byte(0xd0);
1374     emit_byte(0x05);
1375     emit_long(d);
1376     }
1377     else {
1378     emit_byte(0xc0);
1379     emit_byte(0x05);
1380     emit_long(d);
1381     emit_byte(i);
1382     }
1383     }
1384     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1385    
1386     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1387     {
1388     if (optimize_shift_once && (i == 1)) {
1389     emit_byte(0xd0);
1390     emit_byte(0xc0+r);
1391     }
1392     else {
1393     emit_byte(0xc0);
1394     emit_byte(0xc0+r);
1395     emit_byte(i);
1396     }
1397     }
1398     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1399    
1400     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1401     {
1402     emit_byte(0x66);
1403     emit_byte(0xc1);
1404     emit_byte(0xc0+r);
1405     emit_byte(i);
1406     }
1407     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1408    
1409     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1410     {
1411     if (optimize_shift_once && (i == 1)) {
1412     emit_byte(0xd1);
1413     emit_byte(0xc0+r);
1414     }
1415     else {
1416     emit_byte(0xc1);
1417     emit_byte(0xc0+r);
1418     emit_byte(i);
1419     }
1420     }
1421     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1422    
1423     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1424     {
1425     emit_byte(0xd3);
1426     emit_byte(0xc0+d);
1427     }
1428     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1429    
1430     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1431     {
1432     emit_byte(0x66);
1433     emit_byte(0xd3);
1434     emit_byte(0xc0+d);
1435     }
1436     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1437    
1438     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1439     {
1440     emit_byte(0xd2);
1441     emit_byte(0xc0+d);
1442     }
1443     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1444    
1445     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1446     {
1447     emit_byte(0xd3);
1448     emit_byte(0xe0+d);
1449     }
1450     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1451    
1452     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1453     {
1454     emit_byte(0x66);
1455     emit_byte(0xd3);
1456     emit_byte(0xe0+d);
1457     }
1458     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1459    
1460     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1461     {
1462     emit_byte(0xd2);
1463     emit_byte(0xe0+d);
1464     }
1465     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1466    
1467     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1468     {
1469     if (optimize_shift_once && (i == 1)) {
1470     emit_byte(0xd0);
1471     emit_byte(0xc8+r);
1472     }
1473     else {
1474     emit_byte(0xc0);
1475     emit_byte(0xc8+r);
1476     emit_byte(i);
1477     }
1478     }
1479     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1480    
1481     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1482     {
1483     emit_byte(0x66);
1484     emit_byte(0xc1);
1485     emit_byte(0xc8+r);
1486     emit_byte(i);
1487     }
1488     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1489    
1490     // gb-- used for making an fpcr value in compemu_fpp.cpp
1491     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1492     {
1493     emit_byte(0x0b);
1494     emit_byte(0x05+8*d);
1495     emit_long(s);
1496     }
1497     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1498    
1499     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1500     {
1501     if (optimize_shift_once && (i == 1)) {
1502     emit_byte(0xd1);
1503     emit_byte(0xc8+r);
1504     }
1505     else {
1506     emit_byte(0xc1);
1507     emit_byte(0xc8+r);
1508     emit_byte(i);
1509     }
1510     }
1511     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1512    
1513     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1514     {
1515     emit_byte(0xd3);
1516     emit_byte(0xc8+d);
1517     }
1518     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1519    
1520     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1521     {
1522     emit_byte(0x66);
1523     emit_byte(0xd3);
1524     emit_byte(0xc8+d);
1525     }
1526     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1527    
1528     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1529     {
1530     emit_byte(0xd2);
1531     emit_byte(0xc8+d);
1532     }
1533     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1534    
1535     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1536     {
1537     emit_byte(0xd3);
1538     emit_byte(0xe8+d);
1539     }
1540     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1541    
1542     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1543     {
1544     emit_byte(0x66);
1545     emit_byte(0xd3);
1546     emit_byte(0xe8+d);
1547     }
1548     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1549    
1550     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1551     {
1552     emit_byte(0xd2);
1553     emit_byte(0xe8+d);
1554     }
1555     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1556    
1557     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1558     {
1559     emit_byte(0xd3);
1560     emit_byte(0xf8+d);
1561     }
1562     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1563    
1564     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1565     {
1566     emit_byte(0x66);
1567     emit_byte(0xd3);
1568     emit_byte(0xf8+d);
1569     }
1570     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1571    
1572     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1573     {
1574     emit_byte(0xd2);
1575     emit_byte(0xf8+d);
1576     }
1577     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1578    
1579     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1580     {
1581     if (optimize_shift_once && (i == 1)) {
1582     emit_byte(0xd1);
1583     emit_byte(0xe0+r);
1584     }
1585     else {
1586     emit_byte(0xc1);
1587     emit_byte(0xe0+r);
1588     emit_byte(i);
1589     }
1590     }
1591     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1592    
1593     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1594     {
1595     emit_byte(0x66);
1596     emit_byte(0xc1);
1597     emit_byte(0xe0+r);
1598     emit_byte(i);
1599     }
1600     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1601    
1602     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1603     {
1604     if (optimize_shift_once && (i == 1)) {
1605     emit_byte(0xd0);
1606     emit_byte(0xe0+r);
1607     }
1608     else {
1609     emit_byte(0xc0);
1610     emit_byte(0xe0+r);
1611     emit_byte(i);
1612     }
1613     }
1614     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1615    
1616     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1617     {
1618     if (optimize_shift_once && (i == 1)) {
1619     emit_byte(0xd1);
1620     emit_byte(0xe8+r);
1621     }
1622     else {
1623     emit_byte(0xc1);
1624     emit_byte(0xe8+r);
1625     emit_byte(i);
1626     }
1627     }
1628     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1629    
1630     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1631     {
1632     emit_byte(0x66);
1633     emit_byte(0xc1);
1634     emit_byte(0xe8+r);
1635     emit_byte(i);
1636     }
1637     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1638    
1639     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1640     {
1641     if (optimize_shift_once && (i == 1)) {
1642     emit_byte(0xd0);
1643     emit_byte(0xe8+r);
1644     }
1645     else {
1646     emit_byte(0xc0);
1647     emit_byte(0xe8+r);
1648     emit_byte(i);
1649     }
1650     }
1651     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1652    
1653     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1654     {
1655     if (optimize_shift_once && (i == 1)) {
1656     emit_byte(0xd1);
1657     emit_byte(0xf8+r);
1658     }
1659     else {
1660     emit_byte(0xc1);
1661     emit_byte(0xf8+r);
1662     emit_byte(i);
1663     }
1664     }
1665     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1666    
1667     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1668     {
1669     emit_byte(0x66);
1670     emit_byte(0xc1);
1671     emit_byte(0xf8+r);
1672     emit_byte(i);
1673     }
1674     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1675    
1676     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1677     {
1678     if (optimize_shift_once && (i == 1)) {
1679     emit_byte(0xd0);
1680     emit_byte(0xf8+r);
1681     }
1682     else {
1683     emit_byte(0xc0);
1684     emit_byte(0xf8+r);
1685     emit_byte(i);
1686     }
1687     }
1688     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1689    
1690     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1691     {
1692     emit_byte(0x9e);
1693     }
1694     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1695    
1696     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1697     {
1698     emit_byte(0x0f);
1699     emit_byte(0xa2);
1700     }
1701     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1702    
1703     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1704     {
1705     emit_byte(0x9f);
1706     }
1707     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1708    
1709     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1710     {
1711     emit_byte(0x0f);
1712     emit_byte(0x90+cc);
1713     emit_byte(0xc0+d);
1714     }
1715     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1716    
1717     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1718     {
1719     emit_byte(0x0f);
1720     emit_byte(0x90+cc);
1721     emit_byte(0x05);
1722     emit_long(d);
1723     }
1724     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1725    
1726     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1727     {
1728     if (have_cmov) {
1729     emit_byte(0x0f);
1730     emit_byte(0x40+cc);
1731     emit_byte(0xc0+8*d+s);
1732     }
1733     else { /* replacement using branch and mov */
1734     int uncc=(cc^1);
1735     emit_byte(0x70+uncc);
1736     emit_byte(2); /* skip next 2 bytes if not cc=true */
1737     emit_byte(0x89);
1738     emit_byte(0xc0+8*s+d);
1739     }
1740     }
1741     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1742    
1743     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1744     {
1745     emit_byte(0x0f);
1746     emit_byte(0xbc);
1747     emit_byte(0xc0+8*d+s);
1748     }
1749     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1750    
1751     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1752     {
1753     emit_byte(0x0f);
1754     emit_byte(0xbf);
1755     emit_byte(0xc0+8*d+s);
1756     }
1757     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1758    
1759     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1760     {
1761     emit_byte(0x0f);
1762     emit_byte(0xbe);
1763     emit_byte(0xc0+8*d+s);
1764     }
1765     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1766    
1767     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1768     {
1769     emit_byte(0x0f);
1770     emit_byte(0xb7);
1771     emit_byte(0xc0+8*d+s);
1772     }
1773     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1774    
1775     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1776     {
1777     emit_byte(0x0f);
1778     emit_byte(0xb6);
1779     emit_byte(0xc0+8*d+s);
1780     }
1781     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1782    
1783     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1784     {
1785     emit_byte(0x0f);
1786     emit_byte(0xaf);
1787     emit_byte(0xc0+8*d+s);
1788     }
1789     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1790    
1791     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1792     {
1793     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1794     abort();
1795     emit_byte(0xf7);
1796     emit_byte(0xea);
1797     }
1798     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1799    
1800     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1801     {
1802     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1803     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1804     abort();
1805     }
1806     emit_byte(0xf7);
1807     emit_byte(0xe2);
1808     }
1809     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1810    
1811     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1812     {
1813     abort(); /* %^$&%^$%#^ x86! */
1814     emit_byte(0x0f);
1815     emit_byte(0xaf);
1816     emit_byte(0xc0+8*d+s);
1817     }
1818     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1819    
1820     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1821     {
1822     emit_byte(0x88);
1823     emit_byte(0xc0+8*s+d);
1824     }
1825     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1826    
1827     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1828     {
1829     emit_byte(0x66);
1830     emit_byte(0x89);
1831     emit_byte(0xc0+8*s+d);
1832     }
1833     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1834    
1835     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1836     {
1837     int isebp=(baser==5)?0x40:0;
1838     int fi;
1839    
1840     switch(factor) {
1841     case 1: fi=0; break;
1842     case 2: fi=1; break;
1843     case 4: fi=2; break;
1844     case 8: fi=3; break;
1845     default: abort();
1846     }
1847    
1848    
1849     emit_byte(0x8b);
1850     emit_byte(0x04+8*d+isebp);
1851     emit_byte(baser+8*index+0x40*fi);
1852     if (isebp)
1853     emit_byte(0x00);
1854     }
1855     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1856    
1857     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1858     {
1859     int fi;
1860     int isebp;
1861    
1862     switch(factor) {
1863     case 1: fi=0; break;
1864     case 2: fi=1; break;
1865     case 4: fi=2; break;
1866     case 8: fi=3; break;
1867     default: abort();
1868     }
1869     isebp=(baser==5)?0x40:0;
1870    
1871     emit_byte(0x66);
1872     emit_byte(0x8b);
1873     emit_byte(0x04+8*d+isebp);
1874     emit_byte(baser+8*index+0x40*fi);
1875     if (isebp)
1876     emit_byte(0x00);
1877     }
1878     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1879    
1880     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1881     {
1882     int fi;
1883     int isebp;
1884    
1885     switch(factor) {
1886     case 1: fi=0; break;
1887     case 2: fi=1; break;
1888     case 4: fi=2; break;
1889     case 8: fi=3; break;
1890     default: abort();
1891     }
1892     isebp=(baser==5)?0x40:0;
1893    
1894     emit_byte(0x8a);
1895     emit_byte(0x04+8*d+isebp);
1896     emit_byte(baser+8*index+0x40*fi);
1897     if (isebp)
1898     emit_byte(0x00);
1899     }
1900     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1901    
1902     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1903     {
1904     int fi;
1905     int isebp;
1906    
1907     switch(factor) {
1908     case 1: fi=0; break;
1909     case 2: fi=1; break;
1910     case 4: fi=2; break;
1911     case 8: fi=3; break;
1912     default: abort();
1913     }
1914    
1915    
1916     isebp=(baser==5)?0x40:0;
1917    
1918     emit_byte(0x89);
1919     emit_byte(0x04+8*s+isebp);
1920     emit_byte(baser+8*index+0x40*fi);
1921     if (isebp)
1922     emit_byte(0x00);
1923     }
1924     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1925    
1926     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1927     {
1928     int fi;
1929     int isebp;
1930    
1931     switch(factor) {
1932     case 1: fi=0; break;
1933     case 2: fi=1; break;
1934     case 4: fi=2; break;
1935     case 8: fi=3; break;
1936     default: abort();
1937     }
1938     isebp=(baser==5)?0x40:0;
1939    
1940     emit_byte(0x66);
1941     emit_byte(0x89);
1942     emit_byte(0x04+8*s+isebp);
1943     emit_byte(baser+8*index+0x40*fi);
1944     if (isebp)
1945     emit_byte(0x00);
1946     }
1947     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1948    
1949     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1950     {
1951     int fi;
1952     int isebp;
1953    
1954     switch(factor) {
1955     case 1: fi=0; break;
1956     case 2: fi=1; break;
1957     case 4: fi=2; break;
1958     case 8: fi=3; break;
1959     default: abort();
1960     }
1961     isebp=(baser==5)?0x40:0;
1962    
1963     emit_byte(0x88);
1964     emit_byte(0x04+8*s+isebp);
1965     emit_byte(baser+8*index+0x40*fi);
1966     if (isebp)
1967     emit_byte(0x00);
1968     }
1969     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1970    
1971     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1972     {
1973     int fi;
1974    
1975     switch(factor) {
1976     case 1: fi=0; break;
1977     case 2: fi=1; break;
1978     case 4: fi=2; break;
1979     case 8: fi=3; break;
1980     default: abort();
1981     }
1982    
1983     emit_byte(0x89);
1984     emit_byte(0x84+8*s);
1985     emit_byte(baser+8*index+0x40*fi);
1986     emit_long(base);
1987     }
1988     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1989    
1990     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1991     {
1992     int fi;
1993    
1994     switch(factor) {
1995     case 1: fi=0; break;
1996     case 2: fi=1; break;
1997     case 4: fi=2; break;
1998     case 8: fi=3; break;
1999     default: abort();
2000     }
2001    
2002     emit_byte(0x66);
2003     emit_byte(0x89);
2004     emit_byte(0x84+8*s);
2005     emit_byte(baser+8*index+0x40*fi);
2006     emit_long(base);
2007     }
2008     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2009    
2010     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2011     {
2012     int fi;
2013    
2014     switch(factor) {
2015     case 1: fi=0; break;
2016     case 2: fi=1; break;
2017     case 4: fi=2; break;
2018     case 8: fi=3; break;
2019     default: abort();
2020     }
2021    
2022     emit_byte(0x88);
2023     emit_byte(0x84+8*s);
2024     emit_byte(baser+8*index+0x40*fi);
2025     emit_long(base);
2026     }
2027     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2028    
2029     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2030     {
2031     int fi;
2032    
2033     switch(factor) {
2034     case 1: fi=0; break;
2035     case 2: fi=1; break;
2036     case 4: fi=2; break;
2037     case 8: fi=3; break;
2038     default: abort();
2039     }
2040    
2041     emit_byte(0x8b);
2042     emit_byte(0x84+8*d);
2043     emit_byte(baser+8*index+0x40*fi);
2044     emit_long(base);
2045     }
2046     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2047    
2048     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2049     {
2050     int fi;
2051    
2052     switch(factor) {
2053     case 1: fi=0; break;
2054     case 2: fi=1; break;
2055     case 4: fi=2; break;
2056     case 8: fi=3; break;
2057     default: abort();
2058     }
2059    
2060     emit_byte(0x66);
2061     emit_byte(0x8b);
2062     emit_byte(0x84+8*d);
2063     emit_byte(baser+8*index+0x40*fi);
2064     emit_long(base);
2065     }
2066     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2067    
2068     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2069     {
2070     int fi;
2071    
2072     switch(factor) {
2073     case 1: fi=0; break;
2074     case 2: fi=1; break;
2075     case 4: fi=2; break;
2076     case 8: fi=3; break;
2077     default: abort();
2078     }
2079    
2080     emit_byte(0x8a);
2081     emit_byte(0x84+8*d);
2082     emit_byte(baser+8*index+0x40*fi);
2083     emit_long(base);
2084     }
2085     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2086    
2087     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2088     {
2089     int fi;
2090     switch(factor) {
2091     case 1: fi=0; break;
2092     case 2: fi=1; break;
2093     case 4: fi=2; break;
2094     case 8: fi=3; break;
2095     default:
2096     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2097     abort();
2098     }
2099     emit_byte(0x8b);
2100     emit_byte(0x04+8*d);
2101     emit_byte(0x05+8*index+64*fi);
2102     emit_long(base);
2103     }
2104     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2105    
2106     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2107     {
2108     int fi;
2109     switch(factor) {
2110     case 1: fi=0; break;
2111     case 2: fi=1; break;
2112     case 4: fi=2; break;
2113     case 8: fi=3; break;
2114     default:
2115     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2116     abort();
2117     }
2118     if (have_cmov) {
2119     emit_byte(0x0f);
2120     emit_byte(0x40+cond);
2121     emit_byte(0x04+8*d);
2122     emit_byte(0x05+8*index+64*fi);
2123     emit_long(base);
2124     }
2125     else { /* replacement using branch and mov */
2126     int uncc=(cond^1);
2127     emit_byte(0x70+uncc);
2128     emit_byte(7); /* skip next 7 bytes if not cc=true */
2129     emit_byte(0x8b);
2130     emit_byte(0x04+8*d);
2131     emit_byte(0x05+8*index+64*fi);
2132     emit_long(base);
2133     }
2134     }
2135     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2136    
2137     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2138     {
2139     if (have_cmov) {
2140     emit_byte(0x0f);
2141     emit_byte(0x40+cond);
2142     emit_byte(0x05+8*d);
2143     emit_long(mem);
2144     }
2145     else { /* replacement using branch and mov */
2146     int uncc=(cond^1);
2147     emit_byte(0x70+uncc);
2148     emit_byte(6); /* skip next 6 bytes if not cc=true */
2149     emit_byte(0x8b);
2150     emit_byte(0x05+8*d);
2151     emit_long(mem);
2152     }
2153     }
2154     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2155    
2156     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2157     {
2158 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2159 gbeauche 1.1 emit_byte(0x8b);
2160     emit_byte(0x40+8*d+s);
2161     emit_byte(offset);
2162     }
2163     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2164    
2165     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2166     {
2167 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2168 gbeauche 1.1 emit_byte(0x66);
2169     emit_byte(0x8b);
2170     emit_byte(0x40+8*d+s);
2171     emit_byte(offset);
2172     }
2173     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2174    
2175     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2176     {
2177 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2178 gbeauche 1.1 emit_byte(0x8a);
2179     emit_byte(0x40+8*d+s);
2180     emit_byte(offset);
2181     }
2182     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2183    
2184     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2185     {
2186     emit_byte(0x8b);
2187     emit_byte(0x80+8*d+s);
2188     emit_long(offset);
2189     }
2190     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2191    
2192     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2193     {
2194     emit_byte(0x66);
2195     emit_byte(0x8b);
2196     emit_byte(0x80+8*d+s);
2197     emit_long(offset);
2198     }
2199     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2200    
2201     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2202     {
2203     emit_byte(0x8a);
2204     emit_byte(0x80+8*d+s);
2205     emit_long(offset);
2206     }
2207     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2208    
2209     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2210     {
2211 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2212 gbeauche 1.1 emit_byte(0xc7);
2213     emit_byte(0x40+d);
2214     emit_byte(offset);
2215     emit_long(i);
2216     }
2217     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2218    
2219     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2220     {
2221 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2222 gbeauche 1.1 emit_byte(0x66);
2223     emit_byte(0xc7);
2224     emit_byte(0x40+d);
2225     emit_byte(offset);
2226     emit_word(i);
2227     }
2228     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2229    
2230     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2231     {
2232 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2233 gbeauche 1.1 emit_byte(0xc6);
2234     emit_byte(0x40+d);
2235     emit_byte(offset);
2236     emit_byte(i);
2237     }
2238     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2239    
2240     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2241     {
2242 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2243 gbeauche 1.1 emit_byte(0x89);
2244     emit_byte(0x40+8*s+d);
2245     emit_byte(offset);
2246     }
2247     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2248    
2249     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2250     {
2251 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2252 gbeauche 1.1 emit_byte(0x66);
2253     emit_byte(0x89);
2254     emit_byte(0x40+8*s+d);
2255     emit_byte(offset);
2256     }
2257     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2258    
2259     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2260     {
2261 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2262 gbeauche 1.1 emit_byte(0x88);
2263     emit_byte(0x40+8*s+d);
2264     emit_byte(offset);
2265     }
2266     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2267    
2268     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2269     {
2270     if (optimize_imm8 && isbyte(offset)) {
2271     emit_byte(0x8d);
2272     emit_byte(0x40+8*d+s);
2273     emit_byte(offset);
2274     }
2275     else {
2276     emit_byte(0x8d);
2277     emit_byte(0x80+8*d+s);
2278     emit_long(offset);
2279     }
2280     }
2281     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2282    
2283     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2284     {
2285     int fi;
2286    
2287     switch(factor) {
2288     case 1: fi=0; break;
2289     case 2: fi=1; break;
2290     case 4: fi=2; break;
2291     case 8: fi=3; break;
2292     default: abort();
2293     }
2294    
2295     if (optimize_imm8 && isbyte(offset)) {
2296     emit_byte(0x8d);
2297     emit_byte(0x44+8*d);
2298     emit_byte(0x40*fi+8*index+s);
2299     emit_byte(offset);
2300     }
2301     else {
2302     emit_byte(0x8d);
2303     emit_byte(0x84+8*d);
2304     emit_byte(0x40*fi+8*index+s);
2305     emit_long(offset);
2306     }
2307     }
2308     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2309    
2310     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2311     {
2312     int isebp=(s==5)?0x40:0;
2313     int fi;
2314    
2315     switch(factor) {
2316     case 1: fi=0; break;
2317     case 2: fi=1; break;
2318     case 4: fi=2; break;
2319     case 8: fi=3; break;
2320     default: abort();
2321     }
2322    
2323     emit_byte(0x8d);
2324     emit_byte(0x04+8*d+isebp);
2325     emit_byte(0x40*fi+8*index+s);
2326     if (isebp)
2327     emit_byte(0);
2328     }
2329     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2330    
2331     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2332     {
2333     if (optimize_imm8 && isbyte(offset)) {
2334     emit_byte(0x89);
2335     emit_byte(0x40+8*s+d);
2336     emit_byte(offset);
2337     }
2338     else {
2339     emit_byte(0x89);
2340     emit_byte(0x80+8*s+d);
2341     emit_long(offset);
2342     }
2343     }
2344     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2345    
2346     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2347     {
2348     emit_byte(0x66);
2349     emit_byte(0x89);
2350     emit_byte(0x80+8*s+d);
2351     emit_long(offset);
2352     }
2353     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2354    
2355     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2356     {
2357     if (optimize_imm8 && isbyte(offset)) {
2358     emit_byte(0x88);
2359     emit_byte(0x40+8*s+d);
2360     emit_byte(offset);
2361     }
2362     else {
2363     emit_byte(0x88);
2364     emit_byte(0x80+8*s+d);
2365     emit_long(offset);
2366     }
2367     }
2368     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2369    
2370     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2371     {
2372     emit_byte(0x0f);
2373     emit_byte(0xc8+r);
2374     }
2375     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2376    
2377     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2378     {
2379     emit_byte(0x66);
2380     emit_byte(0xc1);
2381     emit_byte(0xc0+r);
2382     emit_byte(0x08);
2383     }
2384     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2385    
2386     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2387     {
2388     emit_byte(0x89);
2389     emit_byte(0xc0+8*s+d);
2390     }
2391     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2392    
2393     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2394     {
2395     emit_byte(0x89);
2396     emit_byte(0x05+8*s);
2397     emit_long(d);
2398     }
2399     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2400    
2401     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2402     {
2403     emit_byte(0x66);
2404     emit_byte(0x89);
2405     emit_byte(0x05+8*s);
2406     emit_long(d);
2407     }
2408     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2409    
2410     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2411     {
2412     emit_byte(0x66);
2413     emit_byte(0x8b);
2414     emit_byte(0x05+8*d);
2415     emit_long(s);
2416     }
2417     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2418    
2419     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2420     {
2421     emit_byte(0x88);
2422 gbeauche 1.33 emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
2423 gbeauche 1.1 emit_long(d);
2424     }
2425     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2426    
2427     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2428     {
2429     emit_byte(0x8a);
2430     emit_byte(0x05+8*d);
2431     emit_long(s);
2432     }
2433     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2434    
2435     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2436     {
2437     emit_byte(0xb8+d);
2438     emit_long(s);
2439     }
2440     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2441    
2442     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2443     {
2444     emit_byte(0x66);
2445     emit_byte(0xb8+d);
2446     emit_word(s);
2447     }
2448     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2449    
2450     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2451     {
2452     emit_byte(0xb0+d);
2453     emit_byte(s);
2454     }
2455     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2456    
2457     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2458     {
2459     emit_byte(0x81);
2460     emit_byte(0x15);
2461     emit_long(d);
2462     emit_long(s);
2463     }
2464     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2465    
2466     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2467     {
2468     if (optimize_imm8 && isbyte(s)) {
2469     emit_byte(0x83);
2470     emit_byte(0x05);
2471     emit_long(d);
2472     emit_byte(s);
2473     }
2474     else {
2475     emit_byte(0x81);
2476     emit_byte(0x05);
2477     emit_long(d);
2478     emit_long(s);
2479     }
2480     }
2481     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2482    
2483     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2484     {
2485     emit_byte(0x66);
2486     emit_byte(0x81);
2487     emit_byte(0x05);
2488     emit_long(d);
2489     emit_word(s);
2490     }
2491     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2492    
2493     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2494     {
2495     emit_byte(0x80);
2496     emit_byte(0x05);
2497     emit_long(d);
2498     emit_byte(s);
2499     }
2500     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2501    
2502     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2503     {
2504 gbeauche 1.2 if (optimize_accum && isaccum(d))
2505     emit_byte(0xa9);
2506     else {
2507 gbeauche 1.1 emit_byte(0xf7);
2508     emit_byte(0xc0+d);
2509 gbeauche 1.2 }
2510 gbeauche 1.1 emit_long(i);
2511     }
2512     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2513    
2514     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2515     {
2516     emit_byte(0x85);
2517     emit_byte(0xc0+8*s+d);
2518     }
2519     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2520    
2521     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2522     {
2523     emit_byte(0x66);
2524     emit_byte(0x85);
2525     emit_byte(0xc0+8*s+d);
2526     }
2527     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2528    
2529     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2530     {
2531     emit_byte(0x84);
2532     emit_byte(0xc0+8*s+d);
2533     }
2534     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2535    
2536 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2537     {
2538     emit_byte(0x81);
2539     emit_byte(0xf0+d);
2540     emit_long(i);
2541     }
2542     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2543    
2544 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2545     {
2546     if (optimize_imm8 && isbyte(i)) {
2547 gbeauche 1.2 emit_byte(0x83);
2548     emit_byte(0xe0+d);
2549     emit_byte(i);
2550 gbeauche 1.1 }
2551     else {
2552 gbeauche 1.2 if (optimize_accum && isaccum(d))
2553     emit_byte(0x25);
2554     else {
2555     emit_byte(0x81);
2556     emit_byte(0xe0+d);
2557     }
2558     emit_long(i);
2559 gbeauche 1.1 }
2560     }
2561     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2562    
2563     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2564     {
2565 gbeauche 1.2 emit_byte(0x66);
2566     if (optimize_imm8 && isbyte(i)) {
2567     emit_byte(0x83);
2568     emit_byte(0xe0+d);
2569     emit_byte(i);
2570     }
2571     else {
2572     if (optimize_accum && isaccum(d))
2573     emit_byte(0x25);
2574     else {
2575     emit_byte(0x81);
2576     emit_byte(0xe0+d);
2577     }
2578     emit_word(i);
2579     }
2580 gbeauche 1.1 }
2581     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2582    
2583     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2584     {
2585     emit_byte(0x21);
2586     emit_byte(0xc0+8*s+d);
2587     }
2588     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2589    
2590     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2591     {
2592     emit_byte(0x66);
2593     emit_byte(0x21);
2594     emit_byte(0xc0+8*s+d);
2595     }
2596     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2597    
2598     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2599     {
2600     emit_byte(0x20);
2601     emit_byte(0xc0+8*s+d);
2602     }
2603     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2604    
2605     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2606     {
2607     if (optimize_imm8 && isbyte(i)) {
2608     emit_byte(0x83);
2609     emit_byte(0xc8+d);
2610     emit_byte(i);
2611     }
2612     else {
2613 gbeauche 1.2 if (optimize_accum && isaccum(d))
2614     emit_byte(0x0d);
2615     else {
2616 gbeauche 1.1 emit_byte(0x81);
2617     emit_byte(0xc8+d);
2618 gbeauche 1.2 }
2619 gbeauche 1.1 emit_long(i);
2620     }
2621     }
2622     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2623    
2624     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2625     {
2626     emit_byte(0x09);
2627     emit_byte(0xc0+8*s+d);
2628     }
2629     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2630    
2631     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2632     {
2633     emit_byte(0x66);
2634     emit_byte(0x09);
2635     emit_byte(0xc0+8*s+d);
2636     }
2637     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2638    
2639     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2640     {
2641     emit_byte(0x08);
2642     emit_byte(0xc0+8*s+d);
2643     }
2644     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2645    
2646     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2647     {
2648     emit_byte(0x11);
2649     emit_byte(0xc0+8*s+d);
2650     }
2651     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2652    
2653     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2654     {
2655     emit_byte(0x66);
2656     emit_byte(0x11);
2657     emit_byte(0xc0+8*s+d);
2658     }
2659     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2660    
2661     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2662     {
2663     emit_byte(0x10);
2664     emit_byte(0xc0+8*s+d);
2665     }
2666     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2667    
2668     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2669     {
2670     emit_byte(0x01);
2671     emit_byte(0xc0+8*s+d);
2672     }
2673     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2674    
2675     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2676     {
2677     emit_byte(0x66);
2678     emit_byte(0x01);
2679     emit_byte(0xc0+8*s+d);
2680     }
2681     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2682    
2683     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2684     {
2685     emit_byte(0x00);
2686     emit_byte(0xc0+8*s+d);
2687     }
2688     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2689    
2690     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2691     {
2692     if (isbyte(i)) {
2693     emit_byte(0x83);
2694     emit_byte(0xe8+d);
2695     emit_byte(i);
2696     }
2697     else {
2698 gbeauche 1.2 if (optimize_accum && isaccum(d))
2699     emit_byte(0x2d);
2700     else {
2701 gbeauche 1.1 emit_byte(0x81);
2702     emit_byte(0xe8+d);
2703 gbeauche 1.2 }
2704 gbeauche 1.1 emit_long(i);
2705     }
2706     }
2707     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2708    
2709     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2710     {
2711 gbeauche 1.2 if (optimize_accum && isaccum(d))
2712     emit_byte(0x2c);
2713     else {
2714 gbeauche 1.1 emit_byte(0x80);
2715     emit_byte(0xe8+d);
2716 gbeauche 1.2 }
2717 gbeauche 1.1 emit_byte(i);
2718     }
2719     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2720    
2721     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2722     {
2723     if (isbyte(i)) {
2724     emit_byte(0x83);
2725     emit_byte(0xc0+d);
2726     emit_byte(i);
2727     }
2728     else {
2729 gbeauche 1.2 if (optimize_accum && isaccum(d))
2730     emit_byte(0x05);
2731     else {
2732 gbeauche 1.1 emit_byte(0x81);
2733     emit_byte(0xc0+d);
2734 gbeauche 1.2 }
2735 gbeauche 1.1 emit_long(i);
2736     }
2737     }
2738     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2739    
2740     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2741     {
2742 gbeauche 1.2 emit_byte(0x66);
2743 gbeauche 1.1 if (isbyte(i)) {
2744     emit_byte(0x83);
2745     emit_byte(0xc0+d);
2746     emit_byte(i);
2747     }
2748     else {
2749 gbeauche 1.2 if (optimize_accum && isaccum(d))
2750     emit_byte(0x05);
2751     else {
2752 gbeauche 1.1 emit_byte(0x81);
2753     emit_byte(0xc0+d);
2754 gbeauche 1.2 }
2755 gbeauche 1.1 emit_word(i);
2756     }
2757     }
2758     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2759    
2760     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2761     {
2762 gbeauche 1.2 if (optimize_accum && isaccum(d))
2763     emit_byte(0x04);
2764     else {
2765     emit_byte(0x80);
2766     emit_byte(0xc0+d);
2767     }
2768 gbeauche 1.1 emit_byte(i);
2769     }
2770     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2771    
2772     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2773     {
2774     emit_byte(0x19);
2775     emit_byte(0xc0+8*s+d);
2776     }
2777     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2778    
2779     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2780     {
2781     emit_byte(0x66);
2782     emit_byte(0x19);
2783     emit_byte(0xc0+8*s+d);
2784     }
2785     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2786    
2787     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2788     {
2789     emit_byte(0x18);
2790     emit_byte(0xc0+8*s+d);
2791     }
2792     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2793    
2794     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2795     {
2796     emit_byte(0x29);
2797     emit_byte(0xc0+8*s+d);
2798     }
2799     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2800    
2801     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2802     {
2803     emit_byte(0x66);
2804     emit_byte(0x29);
2805     emit_byte(0xc0+8*s+d);
2806     }
2807     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2808    
2809     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2810     {
2811     emit_byte(0x28);
2812     emit_byte(0xc0+8*s+d);
2813     }
2814     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2815    
2816     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2817     {
2818     emit_byte(0x39);
2819     emit_byte(0xc0+8*s+d);
2820     }
2821     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2822    
2823     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2824     {
2825     if (optimize_imm8 && isbyte(i)) {
2826     emit_byte(0x83);
2827     emit_byte(0xf8+r);
2828     emit_byte(i);
2829     }
2830     else {
2831 gbeauche 1.2 if (optimize_accum && isaccum(r))
2832     emit_byte(0x3d);
2833     else {
2834 gbeauche 1.1 emit_byte(0x81);
2835     emit_byte(0xf8+r);
2836 gbeauche 1.2 }
2837 gbeauche 1.1 emit_long(i);
2838     }
2839     }
2840     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2841    
2842     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2843     {
2844     emit_byte(0x66);
2845     emit_byte(0x39);
2846     emit_byte(0xc0+8*s+d);
2847     }
2848     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2849    
2850 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2851     {
2852     emit_byte(0x80);
2853     emit_byte(0x3d);
2854     emit_long(d);
2855     emit_byte(s);
2856     }
2857     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2858    
2859 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2860     {
2861 gbeauche 1.2 if (optimize_accum && isaccum(d))
2862     emit_byte(0x3c);
2863     else {
2864 gbeauche 1.1 emit_byte(0x80);
2865     emit_byte(0xf8+d);
2866 gbeauche 1.2 }
2867 gbeauche 1.1 emit_byte(i);
2868     }
2869     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2870    
2871     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2872     {
2873     emit_byte(0x38);
2874     emit_byte(0xc0+8*s+d);
2875     }
2876     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2877    
2878     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2879     {
2880     int fi;
2881    
2882     switch(factor) {
2883     case 1: fi=0; break;
2884     case 2: fi=1; break;
2885     case 4: fi=2; break;
2886     case 8: fi=3; break;
2887     default: abort();
2888     }
2889     emit_byte(0x39);
2890     emit_byte(0x04+8*d);
2891     emit_byte(5+8*index+0x40*fi);
2892     emit_long(offset);
2893     }
2894     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2895    
2896     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2897     {
2898     emit_byte(0x31);
2899     emit_byte(0xc0+8*s+d);
2900     }
2901     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2902    
2903     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2904     {
2905     emit_byte(0x66);
2906     emit_byte(0x31);
2907     emit_byte(0xc0+8*s+d);
2908     }
2909     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2910    
2911     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2912     {
2913     emit_byte(0x30);
2914     emit_byte(0xc0+8*s+d);
2915     }
2916     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2917    
2918     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2919     {
2920     if (optimize_imm8 && isbyte(s)) {
2921     emit_byte(0x83);
2922     emit_byte(0x2d);
2923     emit_long(d);
2924     emit_byte(s);
2925     }
2926     else {
2927     emit_byte(0x81);
2928     emit_byte(0x2d);
2929     emit_long(d);
2930     emit_long(s);
2931     }
2932     }
2933     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2934    
2935     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2936     {
2937     if (optimize_imm8 && isbyte(s)) {
2938     emit_byte(0x83);
2939     emit_byte(0x3d);
2940     emit_long(d);
2941     emit_byte(s);
2942     }
2943     else {
2944     emit_byte(0x81);
2945     emit_byte(0x3d);
2946     emit_long(d);
2947     emit_long(s);
2948     }
2949     }
2950     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2951    
2952     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2953     {
2954     emit_byte(0x87);
2955     emit_byte(0xc0+8*r1+r2);
2956     }
2957     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2958    
2959     /*************************************************************************
2960     * FIXME: mem access modes probably wrong *
2961     *************************************************************************/
2962    
2963     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2964     {
2965     emit_byte(0x9c);
2966     }
2967     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2968    
2969     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2970     {
2971     emit_byte(0x9d);
2972     }
2973     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2974 gbeauche 1.13
2975     #endif
2976 gbeauche 1.1
2977     /*************************************************************************
2978     * Unoptimizable stuff --- jump *
2979     *************************************************************************/
2980    
2981     static __inline__ void raw_call_r(R4 r)
2982     {
2983 gbeauche 1.20 #if USE_NEW_RTASM
2984     CALLsr(r);
2985     #else
2986 gbeauche 1.1 emit_byte(0xff);
2987     emit_byte(0xd0+r);
2988 gbeauche 1.20 #endif
2989 gbeauche 1.5 }
2990    
2991     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2992     {
2993 gbeauche 1.20 #if USE_NEW_RTASM
2994     CALLsm(base, X86_NOREG, r, m);
2995     #else
2996 gbeauche 1.5 int mu;
2997     switch(m) {
2998     case 1: mu=0; break;
2999     case 2: mu=1; break;
3000     case 4: mu=2; break;
3001     case 8: mu=3; break;
3002     default: abort();
3003     }
3004     emit_byte(0xff);
3005     emit_byte(0x14);
3006     emit_byte(0x05+8*r+0x40*mu);
3007     emit_long(base);
3008 gbeauche 1.20 #endif
3009 gbeauche 1.1 }
3010    
3011     static __inline__ void raw_jmp_r(R4 r)
3012     {
3013 gbeauche 1.20 #if USE_NEW_RTASM
3014     JMPsr(r);
3015     #else
3016 gbeauche 1.1 emit_byte(0xff);
3017     emit_byte(0xe0+r);
3018 gbeauche 1.20 #endif
3019 gbeauche 1.1 }
3020    
3021     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3022     {
3023 gbeauche 1.20 #if USE_NEW_RTASM
3024     JMPsm(base, X86_NOREG, r, m);
3025     #else
3026 gbeauche 1.1 int mu;
3027     switch(m) {
3028     case 1: mu=0; break;
3029     case 2: mu=1; break;
3030     case 4: mu=2; break;
3031     case 8: mu=3; break;
3032     default: abort();
3033     }
3034     emit_byte(0xff);
3035     emit_byte(0x24);
3036     emit_byte(0x05+8*r+0x40*mu);
3037     emit_long(base);
3038 gbeauche 1.20 #endif
3039 gbeauche 1.1 }
3040    
3041     static __inline__ void raw_jmp_m(uae_u32 base)
3042     {
3043     emit_byte(0xff);
3044     emit_byte(0x25);
3045     emit_long(base);
3046     }
3047    
3048    
3049     static __inline__ void raw_call(uae_u32 t)
3050     {
3051 gbeauche 1.20 #if USE_NEW_RTASM
3052     CALLm(t);
3053     #else
3054 gbeauche 1.1 emit_byte(0xe8);
3055     emit_long(t-(uae_u32)target-4);
3056 gbeauche 1.20 #endif
3057 gbeauche 1.1 }
3058    
3059     static __inline__ void raw_jmp(uae_u32 t)
3060     {
3061 gbeauche 1.20 #if USE_NEW_RTASM
3062     JMPm(t);
3063     #else
3064 gbeauche 1.1 emit_byte(0xe9);
3065     emit_long(t-(uae_u32)target-4);
3066 gbeauche 1.20 #endif
3067 gbeauche 1.1 }
3068    
3069     static __inline__ void raw_jl(uae_u32 t)
3070     {
3071     emit_byte(0x0f);
3072     emit_byte(0x8c);
3073 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3074 gbeauche 1.1 }
3075    
3076     static __inline__ void raw_jz(uae_u32 t)
3077     {
3078     emit_byte(0x0f);
3079     emit_byte(0x84);
3080 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3081 gbeauche 1.1 }
3082    
3083     static __inline__ void raw_jnz(uae_u32 t)
3084     {
3085     emit_byte(0x0f);
3086     emit_byte(0x85);
3087 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3088 gbeauche 1.1 }
3089    
3090     static __inline__ void raw_jnz_l_oponly(void)
3091     {
3092     emit_byte(0x0f);
3093     emit_byte(0x85);
3094     }
3095    
3096     static __inline__ void raw_jcc_l_oponly(int cc)
3097     {
3098     emit_byte(0x0f);
3099     emit_byte(0x80+cc);
3100     }
3101    
3102     static __inline__ void raw_jnz_b_oponly(void)
3103     {
3104     emit_byte(0x75);
3105     }
3106    
3107     static __inline__ void raw_jz_b_oponly(void)
3108     {
3109     emit_byte(0x74);
3110     }
3111    
3112     static __inline__ void raw_jcc_b_oponly(int cc)
3113     {
3114     emit_byte(0x70+cc);
3115     }
3116    
3117     static __inline__ void raw_jmp_l_oponly(void)
3118     {
3119     emit_byte(0xe9);
3120     }
3121    
3122     static __inline__ void raw_jmp_b_oponly(void)
3123     {
3124     emit_byte(0xeb);
3125     }
3126    
3127     static __inline__ void raw_ret(void)
3128     {
3129     emit_byte(0xc3);
3130     }
3131    
3132     static __inline__ void raw_nop(void)
3133     {
3134     emit_byte(0x90);
3135     }
3136    
3137 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3138     {
3139     /* Source: GNU Binutils 2.12.90.0.15 */
3140     /* Various efficient no-op patterns for aligning code labels.
3141     Note: Don't try to assemble the instructions in the comments.
3142     0L and 0w are not legal. */
3143     static const uae_u8 f32_1[] =
3144     {0x90}; /* nop */
3145     static const uae_u8 f32_2[] =
3146     {0x89,0xf6}; /* movl %esi,%esi */
3147     static const uae_u8 f32_3[] =
3148     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3149     static const uae_u8 f32_4[] =
3150     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3151     static const uae_u8 f32_5[] =
3152     {0x90, /* nop */
3153     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3154     static const uae_u8 f32_6[] =
3155     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3156     static const uae_u8 f32_7[] =
3157     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3158     static const uae_u8 f32_8[] =
3159     {0x90, /* nop */
3160     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3161     static const uae_u8 f32_9[] =
3162     {0x89,0xf6, /* movl %esi,%esi */
3163     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3164     static const uae_u8 f32_10[] =
3165     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3166     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3167     static const uae_u8 f32_11[] =
3168     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3169     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3170     static const uae_u8 f32_12[] =
3171     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3172     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3173     static const uae_u8 f32_13[] =
3174     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3175     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3176     static const uae_u8 f32_14[] =
3177     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3178     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3179     static const uae_u8 f32_15[] =
3180     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3181     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3182     static const uae_u8 f32_16[] =
3183     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3184     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3185     static const uae_u8 *const f32_patt[] = {
3186     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3187     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3188     };
3189 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3190 gbeauche 1.8
3191 gbeauche 1.21 #if defined(__x86_64__)
3192     /* The recommended way to pad 64bit code is to use NOPs preceded by
3193     maximally four 0x66 prefixes. Balance the size of nops. */
3194     if (nbytes == 0)
3195     return;
3196    
3197     int i;
3198     int nnops = (nbytes + 3) / 4;
3199     int len = nbytes / nnops;
3200     int remains = nbytes - nnops * len;
3201    
3202     for (i = 0; i < remains; i++) {
3203     emit_block(prefixes, len);
3204     raw_nop();
3205     }
3206     for (; i < nnops; i++) {
3207     emit_block(prefixes, len - 1);
3208     raw_nop();
3209     }
3210     #else
3211 gbeauche 1.8 int nloops = nbytes / 16;
3212     while (nloops-- > 0)
3213     emit_block(f32_16, sizeof(f32_16));
3214    
3215     nbytes %= 16;
3216     if (nbytes)
3217     emit_block(f32_patt[nbytes - 1], nbytes);
3218 gbeauche 1.21 #endif
3219 gbeauche 1.8 }
3220    
3221 gbeauche 1.1
3222     /*************************************************************************
3223     * Flag handling, to and fro UAE flag register *
3224     *************************************************************************/
3225    
3226     #ifdef SAHF_SETO_PROFITABLE
3227    
3228     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3229     static __inline__ void raw_flags_to_reg(int r)
3230     {
3231     raw_lahf(0); /* Most flags in AH */
3232     //raw_setcc(r,0); /* V flag in AL */
3233 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3234 gbeauche 1.1
3235     #if 1 /* Let's avoid those nasty partial register stalls */
3236 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3237 gbeauche 1.33 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
3238 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3239     live.state[FLAGTMP].status=INMEM;
3240     live.state[FLAGTMP].realreg=-1;
3241     /* We just "evicted" FLAGTMP. */
3242     if (live.nat[r].nholds!=1) {
3243     /* Huh? */
3244     abort();
3245     }
3246     live.nat[r].nholds=0;
3247     #endif
3248     }
3249    
3250     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3251     static __inline__ void raw_reg_to_flags(int r)
3252     {
3253     raw_cmp_b_ri(r,-127); /* set V */
3254     raw_sahf(0);
3255     }
3256    
3257 gbeauche 1.24 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3258     static __inline__ void raw_flags_set_zero(int s, int tmp)
3259     {
3260     raw_mov_l_rr(tmp,s);
3261     raw_lahf(s); /* flags into ah */
3262     raw_and_l_ri(s,0xffffbfff);
3263     raw_and_l_ri(tmp,0x00004000);
3264     raw_xor_l_ri(tmp,0x00004000);
3265     raw_or_l(s,tmp);
3266     raw_sahf(s);
3267     }
3268    
3269 gbeauche 1.1 #else
3270    
3271     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3272     static __inline__ void raw_flags_to_reg(int r)
3273     {
3274     raw_pushfl();
3275     raw_pop_l_r(r);
3276 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3277 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3278     live.state[FLAGTMP].status=INMEM;
3279     live.state[FLAGTMP].realreg=-1;
3280     /* We just "evicted" FLAGTMP. */
3281     if (live.nat[r].nholds!=1) {
3282     /* Huh? */
3283     abort();
3284     }
3285     live.nat[r].nholds=0;
3286     }
3287    
3288     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3289     static __inline__ void raw_reg_to_flags(int r)
3290     {
3291     raw_push_l_r(r);
3292     raw_popfl();
3293     }
3294    
3295 gbeauche 1.24 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3296     static __inline__ void raw_flags_set_zero(int s, int tmp)
3297     {
3298     raw_mov_l_rr(tmp,s);
3299     raw_pushfl();
3300     raw_pop_l_r(s);
3301     raw_and_l_ri(s,0xffffffbf);
3302     raw_and_l_ri(tmp,0x00000040);
3303     raw_xor_l_ri(tmp,0x00000040);
3304     raw_or_l(s,tmp);
3305     raw_push_l_r(s);
3306     raw_popfl();
3307     }
3308 gbeauche 1.1 #endif
3309    
3310     /* Apparently, there are enough instructions between flag store and
3311     flag reload to avoid the partial memory stall */
3312     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3313     {
3314     #if 1
3315 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3316 gbeauche 1.1 #else
3317 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3318     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3319 gbeauche 1.1 #endif
3320     }
3321    
3322     /* FLAGX is byte sized, and we *do* write it at that size */
3323     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3324     {
3325     if (live.nat[target].canbyte)
3326 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3327 gbeauche 1.1 else if (live.nat[target].canword)
3328 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3329 gbeauche 1.1 else
3330 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3331 gbeauche 1.1 }
3332    
3333 gbeauche 1.31 static __inline__ void raw_dec_sp(int off)
3334     {
3335     if (off) raw_sub_l_ri(ESP_INDEX,off);
3336     }
3337    
3338 gbeauche 1.1 static __inline__ void raw_inc_sp(int off)
3339     {
3340 gbeauche 1.31 if (off) raw_add_l_ri(ESP_INDEX,off);
3341 gbeauche 1.1 }
3342    
3343     /*************************************************************************
3344     * Handling mistaken direct memory access *
3345     *************************************************************************/
3346    
3347     // gb-- I don't need that part for JIT Basilisk II
3348     #if defined(NATMEM_OFFSET) && 0
3349     #include <asm/sigcontext.h>
3350     #include <signal.h>
3351    
3352     #define SIG_READ 1
3353     #define SIG_WRITE 2
3354    
3355     static int in_handler=0;
3356     static uae_u8 veccode[256];
3357    
3358     static void vec(int x, struct sigcontext sc)
3359     {
3360     uae_u8* i=(uae_u8*)sc.eip;
3361     uae_u32 addr=sc.cr2;
3362     int r=-1;
3363     int size=4;
3364     int dir=-1;
3365     int len=0;
3366     int j;
3367    
3368     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3369     if (!canbang)
3370     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3371     if (in_handler)
3372     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3373    
3374     if (canbang && i>=compiled_code && i<=current_compile_p) {
3375     if (*i==0x66) {
3376     i++;
3377     size=2;
3378     len++;
3379     }
3380    
3381     switch(i[0]) {
3382     case 0x8a:
3383     if ((i[1]&0xc0)==0x80) {
3384     r=(i[1]>>3)&7;
3385     dir=SIG_READ;
3386     size=1;
3387     len+=6;
3388     break;
3389     }
3390     break;
3391     case 0x88:
3392     if ((i[1]&0xc0)==0x80) {
3393     r=(i[1]>>3)&7;
3394     dir=SIG_WRITE;
3395     size=1;
3396     len+=6;
3397     break;
3398     }
3399     break;
3400     case 0x8b:
3401     if ((i[1]&0xc0)==0x80) {
3402     r=(i[1]>>3)&7;
3403     dir=SIG_READ;
3404     len+=6;
3405     break;
3406     }
3407     if ((i[1]&0xc0)==0x40) {
3408     r=(i[1]>>3)&7;
3409     dir=SIG_READ;
3410     len+=3;
3411     break;
3412     }
3413     break;
3414     case 0x89:
3415     if ((i[1]&0xc0)==0x80) {
3416     r=(i[1]>>3)&7;
3417     dir=SIG_WRITE;
3418     len+=6;
3419     break;
3420     }
3421     if ((i[1]&0xc0)==0x40) {
3422     r=(i[1]>>3)&7;
3423     dir=SIG_WRITE;
3424     len+=3;
3425     break;
3426     }
3427     break;
3428     }
3429     }
3430    
3431     if (r!=-1) {
3432     void* pr=NULL;
3433     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3434    
3435     switch(r) {
3436     case 0: pr=&(sc.eax); break;
3437     case 1: pr=&(sc.ecx); break;
3438     case 2: pr=&(sc.edx); break;
3439     case 3: pr=&(sc.ebx); break;
3440     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3441     case 5: pr=(size>1)?
3442     (void*)(&(sc.ebp)):
3443     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3444     case 6: pr=(size>1)?
3445     (void*)(&(sc.esi)):
3446     (void*)(((uae_u8*)&(sc.edx))+1); break;
3447     case 7: pr=(size>1)?
3448     (void*)(&(sc.edi)):
3449     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3450     default: abort();
3451     }
3452     if (pr) {
3453     blockinfo* bi;
3454    
3455     if (currprefs.comp_oldsegv) {
3456     addr-=NATMEM_OFFSET;
3457    
3458     if ((addr>=0x10000000 && addr<0x40000000) ||
3459     (addr>=0x50000000)) {
3460     write_log("Suspicious address in %x SEGV handler.\n",addr);
3461     }
3462     if (dir==SIG_READ) {
3463     switch(size) {
3464     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3465     case 2: *((uae_u16*)pr)=get_word(addr); break;
3466     case 4: *((uae_u32*)pr)=get_long(addr); break;
3467     default: abort();
3468     }
3469     }
3470     else { /* write */
3471     switch(size) {
3472     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3473     case 2: put_word(addr,*((uae_u16*)pr)); break;
3474     case 4: put_long(addr,*((uae_u32*)pr)); break;
3475     default: abort();
3476     }
3477     }
3478     write_log("Handled one access!\n");
3479     fflush(stdout);
3480     segvcount++;
3481     sc.eip+=len;
3482     }
3483     else {
3484     void* tmp=target;
3485     int i;
3486     uae_u8 vecbuf[5];
3487    
3488     addr-=NATMEM_OFFSET;
3489    
3490     if ((addr>=0x10000000 && addr<0x40000000) ||
3491     (addr>=0x50000000)) {
3492     write_log("Suspicious address in %x SEGV handler.\n",addr);
3493     }
3494    
3495     target=(uae_u8*)sc.eip;
3496     for (i=0;i<5;i++)
3497     vecbuf[i]=target[i];
3498     emit_byte(0xe9);
3499 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3500 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3501    
3502     write_log("Handled one access!\n");
3503     fflush(stdout);
3504     segvcount++;
3505    
3506     target=veccode;
3507    
3508     if (dir==SIG_READ) {
3509     switch(size) {
3510     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3511     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3512     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3513     default: abort();
3514     }
3515     }
3516     else { /* write */
3517     switch(size) {
3518     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3519     case 2: put_word(addr,*((uae_u16*)pr)); break;
3520     case 4: put_long(addr,*((uae_u32*)pr)); break;
3521     default: abort();
3522     }
3523     }
3524     for (i=0;i<5;i++)
3525     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3526 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3527 gbeauche 1.1 emit_byte(0xe9);
3528 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3529 gbeauche 1.1 in_handler=1;
3530     target=tmp;
3531     }
3532     bi=active;
3533     while (bi) {
3534     if (bi->handler &&
3535     (uae_u8*)bi->direct_handler<=i &&
3536     (uae_u8*)bi->nexthandler>i) {
3537     write_log("deleted trigger (%p<%p<%p) %p\n",
3538     bi->handler,
3539     i,
3540     bi->nexthandler,
3541     bi->pc_p);
3542     invalidate_block(bi);
3543     raise_in_cl_list(bi);
3544     set_special(0);
3545     return;
3546     }
3547     bi=bi->next;
3548     }
3549     /* Not found in the active list. Might be a rom routine that
3550     is in the dormant list */
3551     bi=dormant;
3552     while (bi) {
3553     if (bi->handler &&
3554     (uae_u8*)bi->direct_handler<=i &&
3555     (uae_u8*)bi->nexthandler>i) {
3556     write_log("deleted trigger (%p<%p<%p) %p\n",
3557     bi->handler,
3558     i,
3559     bi->nexthandler,
3560     bi->pc_p);
3561     invalidate_block(bi);
3562     raise_in_cl_list(bi);
3563     set_special(0);
3564     return;
3565     }
3566     bi=bi->next;
3567     }
3568     write_log("Huh? Could not find trigger!\n");
3569     return;
3570     }
3571     }
3572     write_log("Can't handle access!\n");
3573     for (j=0;j<10;j++) {
3574     write_log("instruction byte %2d is %02x\n",j,i[j]);
3575     }
3576     write_log("Please send the above info (starting at \"fault address\") to\n"
3577     "bmeyer@csse.monash.edu.au\n"
3578     "This shouldn't happen ;-)\n");
3579     fflush(stdout);
3580     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3581     }
3582     #endif
3583    
3584    
3585     /*************************************************************************
3586     * Checking for CPU features *
3587     *************************************************************************/
3588    
3589 gbeauche 1.3 struct cpuinfo_x86 {
3590     uae_u8 x86; // CPU family
3591     uae_u8 x86_vendor; // CPU vendor
3592     uae_u8 x86_processor; // CPU canonical processor type
3593     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3594     uae_u32 x86_hwcap;
3595     uae_u8 x86_model;
3596     uae_u8 x86_mask;
3597     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3598     char x86_vendor_id[16];
3599     };
3600     struct cpuinfo_x86 cpuinfo;
3601    
3602     enum {
3603     X86_VENDOR_INTEL = 0,
3604     X86_VENDOR_CYRIX = 1,
3605     X86_VENDOR_AMD = 2,
3606     X86_VENDOR_UMC = 3,
3607     X86_VENDOR_NEXGEN = 4,
3608     X86_VENDOR_CENTAUR = 5,
3609     X86_VENDOR_RISE = 6,
3610     X86_VENDOR_TRANSMETA = 7,
3611     X86_VENDOR_NSC = 8,
3612     X86_VENDOR_UNKNOWN = 0xff
3613     };
3614    
3615     enum {
3616     X86_PROCESSOR_I386, /* 80386 */
3617     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3618     X86_PROCESSOR_PENTIUM,
3619     X86_PROCESSOR_PENTIUMPRO,
3620     X86_PROCESSOR_K6,
3621     X86_PROCESSOR_ATHLON,
3622     X86_PROCESSOR_PENTIUM4,
3623 gbeauche 1.28 X86_PROCESSOR_X86_64,
3624 gbeauche 1.3 X86_PROCESSOR_max
3625     };
3626    
3627     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3628     "80386",
3629     "80486",
3630     "Pentium",
3631     "PentiumPro",
3632     "K6",
3633     "Athlon",
3634 gbeauche 1.16 "Pentium4",
3635 gbeauche 1.28 "x86-64"
3636 gbeauche 1.3 };
3637    
3638     static struct ptt {
3639     const int align_loop;
3640     const int align_loop_max_skip;
3641     const int align_jump;
3642     const int align_jump_max_skip;
3643     const int align_func;
3644     }
3645     x86_alignments[X86_PROCESSOR_max] = {
3646     { 4, 3, 4, 3, 4 },
3647     { 16, 15, 16, 15, 16 },
3648     { 16, 7, 16, 7, 16 },
3649     { 16, 15, 16, 7, 16 },
3650     { 32, 7, 32, 7, 32 },
3651 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3652 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3653     { 16, 7, 16, 7, 16 }
3654 gbeauche 1.3 };
3655 gbeauche 1.1
3656 gbeauche 1.3 static void
3657     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3658 gbeauche 1.1 {
3659 gbeauche 1.3 char *v = c->x86_vendor_id;
3660    
3661     if (!strcmp(v, "GenuineIntel"))
3662     c->x86_vendor = X86_VENDOR_INTEL;
3663     else if (!strcmp(v, "AuthenticAMD"))
3664     c->x86_vendor = X86_VENDOR_AMD;
3665     else if (!strcmp(v, "CyrixInstead"))
3666     c->x86_vendor = X86_VENDOR_CYRIX;
3667     else if (!strcmp(v, "Geode by NSC"))
3668     c->x86_vendor = X86_VENDOR_NSC;
3669     else if (!strcmp(v, "UMC UMC UMC "))
3670     c->x86_vendor = X86_VENDOR_UMC;
3671     else if (!strcmp(v, "CentaurHauls"))
3672     c->x86_vendor = X86_VENDOR_CENTAUR;
3673     else if (!strcmp(v, "NexGenDriven"))
3674     c->x86_vendor = X86_VENDOR_NEXGEN;
3675     else if (!strcmp(v, "RiseRiseRise"))
3676     c->x86_vendor = X86_VENDOR_RISE;
3677     else if (!strcmp(v, "GenuineTMx86") ||
3678     !strcmp(v, "TransmetaCPU"))
3679     c->x86_vendor = X86_VENDOR_TRANSMETA;
3680     else
3681     c->x86_vendor = X86_VENDOR_UNKNOWN;
3682     }
3683 gbeauche 1.1
3684 gbeauche 1.3 static void
3685     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3686     {
3687 gbeauche 1.27 const int CPUID_SPACE = 4096;
3688     uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3689     if (cpuid_space == VM_MAP_FAILED)
3690     abort();
3691     vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3692    
3693 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3694 gbeauche 1.3 uae_u8* tmp=get_target();
3695 gbeauche 1.1
3696 gbeauche 1.20 s_op = op;
3697 gbeauche 1.3 set_target(cpuid_space);
3698     raw_push_l_r(0); /* eax */
3699     raw_push_l_r(1); /* ecx */
3700     raw_push_l_r(2); /* edx */
3701     raw_push_l_r(3); /* ebx */
3702 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3703 gbeauche 1.3 raw_cpuid(0);
3704 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3705     raw_mov_l_mr((uintptr)&s_ebx,3);
3706     raw_mov_l_mr((uintptr)&s_ecx,1);
3707     raw_mov_l_mr((uintptr)&s_edx,2);
3708 gbeauche 1.3 raw_pop_l_r(3);
3709     raw_pop_l_r(2);
3710     raw_pop_l_r(1);
3711     raw_pop_l_r(0);
3712     raw_ret();
3713     set_target(tmp);
3714 gbeauche 1.1
3715 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3716 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3717     if (ebx != NULL) *ebx = s_ebx;
3718     if (ecx != NULL) *ecx = s_ecx;
3719     if (edx != NULL) *edx = s_edx;
3720 gbeauche 1.27
3721     vm_release(cpuid_space, CPUID_SPACE);
3722 gbeauche 1.1 }
3723    
3724 gbeauche 1.3 static void
3725     raw_init_cpu(void)
3726 gbeauche 1.1 {
3727 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3728    
3729     /* Defaults */
3730 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3731 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3732     c->cpuid_level = -1; /* CPUID not detected */
3733     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3734     c->x86_vendor_id[0] = '\0'; /* Unset */
3735     c->x86_hwcap = 0;
3736    
3737     /* Get vendor name */
3738     c->x86_vendor_id[12] = '\0';
3739     cpuid(0x00000000,
3740     (uae_u32 *)&c->cpuid_level,
3741     (uae_u32 *)&c->x86_vendor_id[0],
3742     (uae_u32 *)&c->x86_vendor_id[8],
3743     (uae_u32 *)&c->x86_vendor_id[4]);
3744     x86_get_cpu_vendor(c);
3745    
3746     /* Intel-defined flags: level 0x00000001 */
3747     c->x86_brand_id = 0;
3748     if ( c->cpuid_level >= 0x00000001 ) {
3749     uae_u32 tfms, brand_id;
3750     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3751     c->x86 = (tfms >> 8) & 15;
3752 gbeauche 1.29 if (c->x86 == 0xf)
3753     c->x86 += (tfms >> 20) & 0xff; /* extended family */
3754 gbeauche 1.3 c->x86_model = (tfms >> 4) & 15;
3755 gbeauche 1.29 if (c->x86_model == 0xf)
3756     c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3757 gbeauche 1.3 c->x86_brand_id = brand_id & 0xff;
3758     c->x86_mask = tfms & 15;
3759     } else {
3760     /* Have CPUID level 0 only - unheard of */
3761     c->x86 = 4;
3762     }
3763    
3764 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3765     uae_u32 xlvl;
3766     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3767     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3768     if ( xlvl >= 0x80000001 ) {
3769 gbeauche 1.28 uae_u32 features, extra_features;
3770     cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3771 gbeauche 1.16 if (features & (1 << 29)) {
3772     /* Assume x86-64 if long mode is supported */
3773 gbeauche 1.28 c->x86_processor = X86_PROCESSOR_X86_64;
3774 gbeauche 1.16 }
3775 gbeauche 1.28 if (extra_features & (1 << 0))
3776     have_lahf_lm = true;
3777 gbeauche 1.16 }
3778     }
3779    
3780 gbeauche 1.3 /* Canonicalize processor ID */
3781     switch (c->x86) {
3782     case 3:
3783     c->x86_processor = X86_PROCESSOR_I386;
3784     break;
3785     case 4:
3786     c->x86_processor = X86_PROCESSOR_I486;
3787     break;
3788     case 5:
3789     if (c->x86_vendor == X86_VENDOR_AMD)
3790     c->x86_processor = X86_PROCESSOR_K6;
3791     else
3792     c->x86_processor = X86_PROCESSOR_PENTIUM;
3793     break;
3794     case 6:
3795     if (c->x86_vendor == X86_VENDOR_AMD)
3796     c->x86_processor = X86_PROCESSOR_ATHLON;
3797     else
3798     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3799     break;
3800     case 15:
3801 gbeauche 1.29 if (c->x86_processor == X86_PROCESSOR_max) {
3802     switch (c->x86_vendor) {
3803     case X86_VENDOR_INTEL:
3804     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3805     break;
3806     case X86_VENDOR_AMD:
3807     /* Assume a 32-bit Athlon processor if not in long mode */
3808     c->x86_processor = X86_PROCESSOR_ATHLON;
3809     break;
3810     }
3811     }
3812     break;
3813 gbeauche 1.3 }
3814     if (c->x86_processor == X86_PROCESSOR_max) {
3815 gbeauche 1.30 c->x86_processor = X86_PROCESSOR_I386;
3816     fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3817 gbeauche 1.3 fprintf(stderr, " Family : %d\n", c->x86);
3818     fprintf(stderr, " Model : %d\n", c->x86_model);
3819     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3820 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3821 gbeauche 1.3 if (c->x86_brand_id)
3822     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3823     }
3824    
3825     /* Have CMOV support? */
3826 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3827 gbeauche 1.3
3828     /* Can the host CPU suffer from partial register stalls? */
3829     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3830     #if 1
3831     /* It appears that partial register writes are a bad idea even on
3832 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3833     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3834 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3835     have_rat_stall = true;
3836 gbeauche 1.1 #endif
3837 gbeauche 1.3
3838     /* Alignments */
3839     if (tune_alignment) {
3840     align_loops = x86_alignments[c->x86_processor].align_loop;
3841     align_jumps = x86_alignments[c->x86_processor].align_jump;
3842     }
3843    
3844     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3845     c->cpuid_level, c->x86_vendor_id,
3846     x86_processor_string_table[c->x86_processor]);
3847 gbeauche 1.1 }
3848    
3849 gbeauche 1.10 static bool target_check_bsf(void)
3850     {
3851     bool mismatch = false;
3852     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3853     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3854     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3855     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3856     for (int value = -1; value <= 1; value++) {
3857 gbeauche 1.25 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3858     unsigned long tmp = value;
3859 gbeauche 1.10 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3860 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3861 gbeauche 1.10 int OF = (flags >> 11) & 1;
3862     int SF = (flags >> 7) & 1;
3863     int ZF = (flags >> 6) & 1;
3864     int CF = flags & 1;
3865     tmp = (value == 0);
3866     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3867     mismatch = true;
3868     }
3869     }}}}
3870     if (mismatch)
3871     write_log("Target CPU defines all flags on BSF instruction\n");
3872     return !mismatch;
3873     }
3874    
3875 gbeauche 1.1
3876     /*************************************************************************
3877     * FPU stuff *
3878     *************************************************************************/
3879    
3880    
3881     static __inline__ void raw_fp_init(void)
3882     {
3883     int i;
3884    
3885     for (i=0;i<N_FREGS;i++)
3886     live.spos[i]=-2;
3887     live.tos=-1; /* Stack is empty */
3888     }
3889    
3890     static __inline__ void raw_fp_cleanup_drop(void)
3891     {
3892     #if 0
3893     /* using FINIT instead of popping all the entries.
3894     Seems to have side effects --- there is display corruption in
3895     Quake when this is used */
3896     if (live.tos>1) {
3897     emit_byte(0x9b);
3898     emit_byte(0xdb);
3899     emit_byte(0xe3);
3900     live.tos=-1;
3901     }
3902     #endif
3903     while (live.tos>=1) {
3904     emit_byte(0xde);
3905     emit_byte(0xd9);
3906     live.tos-=2;
3907     }
3908     while (live.tos>=0) {
3909     emit_byte(0xdd);
3910     emit_byte(0xd8);
3911     live.tos--;
3912     }
3913     raw_fp_init();
3914     }
3915    
3916     static __inline__ void make_tos(int r)
3917     {
3918     int p,q;
3919    
3920     if (live.spos[r]<0) { /* Register not yet on stack */
3921     emit_byte(0xd9);
3922     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3923     live.tos++;
3924     live.spos[r]=live.tos;
3925     live.onstack[live.tos]=r;
3926     return;
3927     }
3928     /* Register is on stack */
3929     if (live.tos==live.spos[r])
3930     return;
3931     p=live.spos[r];
3932     q=live.onstack[live.tos];
3933    
3934     emit_byte(0xd9);
3935     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3936     live.onstack[live.tos]=r;
3937     live.spos[r]=live.tos;
3938     live.onstack[p]=q;
3939     live.spos[q]=p;
3940     }
3941    
3942     static __inline__ void make_tos2(int r, int r2)
3943     {
3944     int q;
3945    
3946     make_tos(r2); /* Put the reg that's supposed to end up in position2
3947     on top */
3948    
3949     if (live.spos[r]<0) { /* Register not yet on stack */
3950     make_tos(r); /* This will extend the stack */
3951     return;
3952     }
3953     /* Register is on stack */
3954     emit_byte(0xd9);
3955     emit_byte(0xc9); /* Move r2 into position 2 */
3956    
3957     q=live.onstack[live.tos-1];
3958     live.onstack[live.tos]=q;
3959     live.spos[q]=live.tos;
3960     live.onstack[live.tos-1]=r2;
3961     live.spos[r2]=live.tos-1;
3962    
3963     make_tos(r); /* And r into 1 */
3964     }
3965    
3966     static __inline__ int stackpos(int r)
3967     {
3968     if (live.spos[r]<0)
3969     abort();
3970     if (live.tos<live.spos[r]) {
3971     printf("Looking for spos for fnreg %d\n",r);
3972     abort();
3973     }
3974     return live.tos-live.spos[r];
3975     }
3976    
3977     static __inline__ void usereg(int r)
3978     {
3979     if (live.spos[r]<0)
3980     make_tos(r);
3981     }
3982    
3983     /* This is called with one FP value in a reg *above* tos, which it will
3984     pop off the stack if necessary */
3985     static __inline__ void tos_make(int r)
3986     {
3987     if (live.spos[r]<0) {
3988     live.tos++;
3989     live.spos[r]=live.tos;
3990     live.onstack[live.tos]=r;
3991     return;
3992     }
3993     emit_byte(0xdd);
3994     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3995     and pop it*/
3996     }
3997 gbeauche 1.23
3998     /* FP helper functions */
3999     #if USE_NEW_RTASM
4000     #define DEFINE_OP(NAME, GEN) \
4001     static inline void raw_##NAME(uint32 m) \
4002     { \
4003     GEN(m, X86_NOREG, X86_NOREG, 1); \
4004     }
4005     DEFINE_OP(fstl, FSTLm);
4006     DEFINE_OP(fstpl, FSTPLm);
4007     DEFINE_OP(fldl, FLDLm);
4008     DEFINE_OP(fildl, FILDLm);
4009     DEFINE_OP(fistl, FISTLm);
4010     DEFINE_OP(flds, FLDSm);
4011     DEFINE_OP(fsts, FSTSm);
4012     DEFINE_OP(fstpt, FSTPTm);
4013     DEFINE_OP(fldt, FLDTm);
4014     #else
4015     #define DEFINE_OP(NAME, OP1, OP2) \
4016     static inline void raw_##NAME(uint32 m) \
4017     { \
4018     emit_byte(OP1); \
4019     emit_byte(OP2); \
4020     emit_long(m); \
4021     }
4022     DEFINE_OP(fstl, 0xdd, 0x15);
4023     DEFINE_OP(fstpl, 0xdd, 0x1d);
4024     DEFINE_OP(fldl, 0xdd, 0x05);
4025     DEFINE_OP(fildl, 0xdb, 0x05);
4026     DEFINE_OP(fistl, 0xdb, 0x15);
4027     DEFINE_OP(flds, 0xd9, 0x05);
4028     DEFINE_OP(fsts, 0xd9, 0x15);
4029     DEFINE_OP(fstpt, 0xdb, 0x3d);
4030     DEFINE_OP(fldt, 0xdb, 0x2d);
4031     #endif
4032     #undef DEFINE_OP
4033    
4034 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4035     {
4036     make_tos(r);
4037 gbeauche 1.23 raw_fstl(m);
4038 gbeauche 1.1 }
4039     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4040    
4041     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4042     {
4043     make_tos(r);
4044 gbeauche 1.23 raw_fstpl(m);
4045 gbeauche 1.1 live.onstack[live.tos]=-1;
4046     live.tos--;
4047     live.spos[r]=-2;
4048     }
4049     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4050    
4051     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4052     {
4053 gbeauche 1.23 raw_fldl(m);
4054 gbeauche 1.1 tos_make(r);
4055     }
4056     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4057    
4058     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4059     {
4060 gbeauche 1.23 raw_fildl(m);
4061 gbeauche 1.1 tos_make(r);
4062     }
4063     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4064    
4065     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4066     {
4067     make_tos(r);
4068 gbeauche 1.23 raw_fistl(m);
4069 gbeauche 1.1 }
4070     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4071    
4072     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4073     {
4074 gbeauche 1.23 raw_flds(m);
4075 gbeauche 1.1 tos_make(r);
4076     }
4077     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4078    
4079     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4080     {
4081     make_tos(r);
4082 gbeauche 1.23 raw_fsts(m);
4083 gbeauche 1.1 }
4084     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4085    
4086     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4087     {
4088     int rs;
4089    
4090     /* Stupid x87 can't write a long double to mem without popping the
4091     stack! */
4092     usereg(r);
4093     rs=stackpos(r);
4094     emit_byte(0xd9); /* Get a copy to the top of stack */
4095     emit_byte(0xc0+rs);
4096    
4097 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4098 gbeauche 1.1 }
4099     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4100    
4101     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4102     {
4103     int rs;
4104    
4105     make_tos(r);
4106 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4107 gbeauche 1.1 live.onstack[live.tos]=-1;
4108     live.tos--;
4109     live.spos[r]=-2;
4110     }
4111     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4112    
4113     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4114     {
4115 gbeauche 1.23 raw_fldt(m);
4116 gbeauche 1.1 tos_make(r);
4117     }
4118     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4119    
4120     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4121     {
4122     emit_byte(0xd9);
4123     emit_byte(0xeb);
4124     tos_make(r);
4125     }
4126     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4127    
4128     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4129     {
4130     emit_byte(0xd9);
4131     emit_byte(0xec);
4132     tos_make(r);
4133     }
4134     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4135    
4136     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4137     {
4138     emit_byte(0xd9);
4139     emit_byte(0xea);
4140     tos_make(r);
4141     }
4142     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4143    
4144     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4145     {
4146     emit_byte(0xd9);
4147     emit_byte(0xed);
4148     tos_make(r);
4149     }
4150     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4151    
4152     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4153     {
4154     emit_byte(0xd9);
4155     emit_byte(0xe8);
4156     tos_make(r);
4157     }
4158     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4159    
4160     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4161     {
4162     emit_byte(0xd9);
4163     emit_byte(0xee);
4164     tos_make(r);
4165     }
4166     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4167    
4168     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4169     {
4170     int ds;
4171    
4172     usereg(s);
4173     ds=stackpos(s);
4174     if (ds==0 && live.spos[d]>=0) {
4175     /* source is on top of stack, and we already have the dest */
4176     int dd=stackpos(d);
4177     emit_byte(0xdd);
4178     emit_byte(0xd0+dd);
4179     }
4180     else {
4181     emit_byte(0xd9);
4182     emit_byte(0xc0+ds); /* duplicate source on tos */
4183     tos_make(d); /* store to destination, pop if necessary */
4184     }
4185     }
4186     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4187    
4188     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4189     {
4190     emit_byte(0xd9);
4191     emit_byte(0xa8+index);
4192     emit_long(base);
4193     }
4194     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4195    
4196    
4197     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4198     {
4199     int ds;
4200    
4201     if (d!=s) {
4202     usereg(s);
4203     ds=stackpos(s);
4204     emit_byte(0xd9);
4205     emit_byte(0xc0+ds); /* duplicate source */
4206     emit_byte(0xd9);
4207     emit_byte(0xfa); /* take square root */
4208     tos_make(d); /* store to destination */
4209     }
4210     else {
4211     make_tos(d);
4212     emit_byte(0xd9);
4213     emit_byte(0xfa); /* take square root */
4214     }
4215     }
4216     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4217    
4218     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4219     {
4220     int ds;
4221    
4222     if (d!=s) {
4223     usereg(s);
4224     ds=stackpos(s);
4225     emit_byte(0xd9);
4226     emit_byte(0xc0+ds); /* duplicate source */
4227     emit_byte(0xd9);
4228     emit_byte(0xe1); /* take fabs */
4229     tos_make(d); /* store to destination */
4230     }
4231     else {
4232     make_tos(d);
4233     emit_byte(0xd9);
4234     emit_byte(0xe1); /* take fabs */
4235     }
4236     }
4237     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4238    
4239     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4240     {
4241     int ds;
4242    
4243     if (d!=s) {
4244     usereg(s);
4245     ds=stackpos(s);
4246     emit_byte(0xd9);
4247     emit_byte(0xc0+ds); /* duplicate source */
4248     emit_byte(0xd9);
4249     emit_byte(0xfc); /* take frndint */
4250     tos_make(d); /* store to destination */
4251     }
4252     else {
4253     make_tos(d);
4254     emit_byte(0xd9);
4255     emit_byte(0xfc); /* take frndint */
4256     }
4257     }
4258     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4259    
4260     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4261     {
4262     int ds;
4263    
4264     if (d!=s) {
4265     usereg(s);
4266     ds=stackpos(s);
4267     emit_byte(0xd9);
4268     emit_byte(0xc0+ds); /* duplicate source */
4269     emit_byte(0xd9);
4270     emit_byte(0xff); /* take cos */
4271     tos_make(d); /* store to destination */
4272     }
4273     else {
4274     make_tos(d);
4275     emit_byte(0xd9);
4276     emit_byte(0xff); /* take cos */
4277     }
4278     }
4279     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4280    
4281     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4282     {
4283     int ds;
4284    
4285     if (d!=s) {
4286     usereg(s);
4287     ds=stackpos(s);
4288     emit_byte(0xd9);
4289     emit_byte(0xc0+ds); /* duplicate source */
4290     emit_byte(0xd9);
4291     emit_byte(0xfe); /* take sin */
4292     tos_make(d); /* store to destination */
4293     }
4294     else {
4295     make_tos(d);
4296     emit_byte(0xd9);
4297     emit_byte(0xfe); /* take sin */
4298     }
4299     }
4300     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4301    
4302     double one=1;
4303     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4304     {
4305     int ds;
4306    
4307     usereg(s);
4308     ds=stackpos(s);
4309     emit_byte(0xd9);
4310     emit_byte(0xc0+ds); /* duplicate source */
4311    
4312     emit_byte(0xd9);
4313     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4314     emit_byte(0xd9);
4315     emit_byte(0xfc); /* rndint */
4316     emit_byte(0xd9);
4317     emit_byte(0xc9); /* swap top two elements */
4318     emit_byte(0xd8);
4319     emit_byte(0xe1); /* subtract rounded from original */
4320     emit_byte(0xd9);
4321     emit_byte(0xf0); /* f2xm1 */
4322     emit_byte(0xdc);
4323     emit_byte(0x05);
4324 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4325 gbeauche 1.1 emit_byte(0xd9);
4326     emit_byte(0xfd); /* and scale it */
4327     emit_byte(0xdd);
4328     emit_byte(0xd9); /* take he rounded value off */
4329     tos_make(d); /* store to destination */
4330     }
4331     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4332    
4333     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4334     {
4335     int ds;
4336    
4337     usereg(s);
4338     ds=stackpos(s);
4339     emit_byte(0xd9);
4340     emit_byte(0xc0+ds); /* duplicate source */
4341     emit_byte(0xd9);
4342     emit_byte(0xea); /* fldl2e */
4343     emit_byte(0xde);
4344     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4345    
4346     emit_byte(0xd9);
4347     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4348     emit_byte(0xd9);
4349     emit_byte(0xfc); /* rndint */
4350     emit_byte(0xd9);
4351     emit_byte(0xc9); /* swap top two elements */
4352     emit_byte(0xd8);
4353     emit_byte(0xe1); /* subtract rounded from original */
4354     emit_byte(0xd9);
4355     emit_byte(0xf0); /* f2xm1 */
4356     emit_byte(0xdc);
4357     emit_byte(0x05);
4358 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4359 gbeauche 1.1 emit_byte(0xd9);
4360     emit_byte(0xfd); /* and scale it */
4361     emit_byte(0xdd);
4362     emit_byte(0xd9); /* take he rounded value off */
4363     tos_make(d); /* store to destination */
4364     }
4365     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4366    
4367     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4368     {
4369     int ds;
4370    
4371     usereg(s);
4372     ds=stackpos(s);
4373     emit_byte(0xd9);
4374     emit_byte(0xc0+ds); /* duplicate source */
4375     emit_byte(0xd9);
4376     emit_byte(0xe8); /* push '1' */
4377     emit_byte(0xd9);
4378     emit_byte(0xc9); /* swap top two */
4379     emit_byte(0xd9);
4380     emit_byte(0xf1); /* take 1*log2(x) */
4381     tos_make(d); /* store to destination */
4382     }
4383     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4384    
4385    
4386     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4387     {
4388     int ds;
4389    
4390     if (d!=s) {
4391     usereg(s);
4392     ds=stackpos(s);
4393     emit_byte(0xd9);
4394     emit_byte(0xc0+ds); /* duplicate source */
4395     emit_byte(0xd9);
4396     emit_byte(0xe0); /* take fchs */
4397     tos_make(d); /* store to destination */
4398     }
4399     else {
4400     make_tos(d);
4401     emit_byte(0xd9);
4402     emit_byte(0xe0); /* take fchs */
4403     }
4404     }
4405     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4406    
4407     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4408     {
4409     int ds;
4410    
4411     usereg(s);
4412     usereg(d);
4413    
4414     if (live.spos[s]==live.tos) {
4415     /* Source is on top of stack */
4416     ds=stackpos(d);
4417     emit_byte(0xdc);
4418     emit_byte(0xc0+ds); /* add source to dest*/
4419     }
4420     else {
4421     make_tos(d);
4422     ds=stackpos(s);
4423    
4424     emit_byte(0xd8);
4425     emit_byte(0xc0+ds); /* add source to dest*/
4426     }
4427     }
4428     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4429    
4430     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4431     {
4432     int ds;
4433    
4434     usereg(s);
4435     usereg(d);
4436    
4437     if (live.spos[s]==live.tos) {
4438     /* Source is on top of stack */
4439     ds=stackpos(d);
4440     emit_byte(0xdc);
4441     emit_byte(0xe8+ds); /* sub source from dest*/
4442     }
4443     else {
4444     make_tos(d);
4445     ds=stackpos(s);
4446    
4447     emit_byte(0xd8);
4448     emit_byte(0xe0+ds); /* sub src from dest */
4449     }
4450     }
4451     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4452    
4453     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4454     {
4455     int ds;
4456    
4457     usereg(s);
4458     usereg(d);
4459    
4460     make_tos(d);
4461     ds=stackpos(s);
4462    
4463     emit_byte(0xdd);
4464     emit_byte(0xe0+ds); /* cmp dest with source*/
4465     }
4466     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4467    
4468     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4469     {
4470     int ds;
4471    
4472     usereg(s);
4473     usereg(d);
4474    
4475     if (live.spos[s]==live.tos) {
4476     /* Source is on top of stack */
4477     ds=stackpos(d);
4478     emit_byte(0xdc);
4479     emit_byte(0xc8+ds); /* mul dest by source*/
4480     }
4481     else {
4482     make_tos(d);
4483     ds=stackpos(s);
4484    
4485     emit_byte(0xd8);
4486     emit_byte(0xc8+ds); /* mul dest by source*/
4487     }
4488     }
4489     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4490    
4491     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4492     {
4493     int ds;
4494    
4495     usereg(s);
4496     usereg(d);
4497    
4498     if (live.spos[s]==live.tos) {
4499     /* Source is on top of stack */
4500     ds=stackpos(d);
4501     emit_byte(0xdc);
4502     emit_byte(0xf8+ds); /* div dest by source */
4503     }
4504     else {
4505     make_tos(d);
4506     ds=stackpos(s);
4507    
4508     emit_byte(0xd8);
4509     emit_byte(0xf0+ds); /* div dest by source*/
4510     }
4511     }
4512     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4513    
4514     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4515     {
4516     int ds;
4517    
4518     usereg(s);
4519     usereg(d);
4520    
4521     make_tos2(d,s);
4522     ds=stackpos(s);
4523    
4524     if (ds!=1) {
4525     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4526     abort();
4527     }
4528     emit_byte(0xd9);
4529     emit_byte(0xf8); /* take rem from dest by source */
4530     }
4531     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4532    
4533     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4534     {
4535     int ds;
4536    
4537     usereg(s);
4538     usereg(d);
4539    
4540     make_tos2(d,s);
4541     ds=stackpos(s);
4542    
4543     if (ds!=1) {
4544     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4545     abort();
4546     }
4547     emit_byte(0xd9);
4548     emit_byte(0xf5); /* take rem1 from dest by source */
4549     }
4550     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4551    
4552    
4553     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4554     {
4555     make_tos(r);
4556     emit_byte(0xd9); /* ftst */
4557     emit_byte(0xe4);
4558     }
4559     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4560    
4561     /* %eax register is clobbered if target processor doesn't support fucomi */
4562     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4563     #define FFLAG_NREG EAX_INDEX
4564    
4565     static __inline__ void raw_fflags_into_flags(int r)
4566     {
4567     int p;
4568    
4569     usereg(r);
4570     p=stackpos(r);
4571    
4572     emit_byte(0xd9);
4573     emit_byte(0xee); /* Push 0 */
4574     emit_byte(0xd9);
4575     emit_byte(0xc9+p); /* swap top two around */
4576     if (have_cmov) {
4577     // gb-- fucomi is for P6 cores only, not K6-2 then...
4578     emit_byte(0xdb);
4579     emit_byte(0xe9+p); /* fucomi them */
4580     }
4581     else {
4582     emit_byte(0xdd);
4583     emit_byte(0xe1+p); /* fucom them */
4584     emit_byte(0x9b);
4585     emit_byte(0xdf);
4586     emit_byte(0xe0); /* fstsw ax */
4587     raw_sahf(0); /* sahf */
4588     }
4589     emit_byte(0xdd);
4590     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4591     }