ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.32
Committed: 2006-01-16T21:31:41Z (18 years, 5 months ago) by gbeauche
Branch: MAIN
Changes since 1.31: +3 -2 lines
Log Message:
more precise callee-saved register set

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.26 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 gbeauche 1.26 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.1
56     /* The register in which subroutines return an integer return value */
57 gbeauche 1.20 #define REG_RESULT EAX_INDEX
58 gbeauche 1.1
59     /* The registers subroutines take their first and second argument in */
60     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61     /* Handle the _fastcall parameters of ECX and EDX */
62 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
63     #define REG_PAR2 EDX_INDEX
64     #elif defined(__x86_64__)
65     #define REG_PAR1 EDI_INDEX
66     #define REG_PAR2 ESI_INDEX
67 gbeauche 1.1 #else
68 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
69     #define REG_PAR2 EDX_INDEX
70 gbeauche 1.1 #endif
71    
72 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
75 gbeauche 1.1 #else
76 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 gbeauche 1.1 #endif
78    
79 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 gbeauche 1.1 -1 if any reg will do */
81 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83 gbeauche 1.1
84 gbeauche 1.31 #define STACK_ALIGN 16
85     #define STACK_OFFSET sizeof(void *)
86    
87 gbeauche 1.1 uae_s8 always_used[]={4,-1};
88 gbeauche 1.20 #if defined(__x86_64__)
89     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
90     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
91     #else
92 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
93     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
94 gbeauche 1.20 #endif
95 gbeauche 1.1
96 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
97     /* Make sure interpretive core does not use cpuopti */
98     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
99 gbeauche 1.20 #error FIXME: code not ready
100 gbeauche 1.17 #else
101 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
102     by the caller */
103 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
104 gbeauche 1.17 #endif
105 gbeauche 1.1
106     /* This *should* be the same as call_saved. But:
107     - We might not really know which registers are saved, and which aren't,
108     so we need to preserve some, but don't want to rely on everyone else
109     also saving those registers
110     - Special registers (such like the stack pointer) should not be "preserved"
111     by pushing, even though they are "saved" across function calls
112     */
113 gbeauche 1.21 #if defined(__x86_64__)
114 gbeauche 1.32 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
115 gbeauche 1.22 /* preserve r11 because it's generally used to hold pointers to functions */
116     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
117 gbeauche 1.21 #else
118 gbeauche 1.32 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
119     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
120 gbeauche 1.21 #endif
121 gbeauche 1.1
122     /* Whether classes of instructions do or don't clobber the native flags */
123     #define CLOBBER_MOV
124     #define CLOBBER_LEA
125     #define CLOBBER_CMOV
126     #define CLOBBER_POP
127     #define CLOBBER_PUSH
128     #define CLOBBER_SUB clobber_flags()
129     #define CLOBBER_SBB clobber_flags()
130     #define CLOBBER_CMP clobber_flags()
131     #define CLOBBER_ADD clobber_flags()
132     #define CLOBBER_ADC clobber_flags()
133     #define CLOBBER_AND clobber_flags()
134     #define CLOBBER_OR clobber_flags()
135     #define CLOBBER_XOR clobber_flags()
136    
137     #define CLOBBER_ROL clobber_flags()
138     #define CLOBBER_ROR clobber_flags()
139     #define CLOBBER_SHLL clobber_flags()
140     #define CLOBBER_SHRL clobber_flags()
141     #define CLOBBER_SHRA clobber_flags()
142     #define CLOBBER_TEST clobber_flags()
143     #define CLOBBER_CL16
144     #define CLOBBER_CL8
145 gbeauche 1.20 #define CLOBBER_SE32
146 gbeauche 1.1 #define CLOBBER_SE16
147     #define CLOBBER_SE8
148 gbeauche 1.20 #define CLOBBER_ZE32
149 gbeauche 1.1 #define CLOBBER_ZE16
150     #define CLOBBER_ZE8
151     #define CLOBBER_SW16 clobber_flags()
152     #define CLOBBER_SW32
153     #define CLOBBER_SETCC
154     #define CLOBBER_MUL clobber_flags()
155     #define CLOBBER_BT clobber_flags()
156     #define CLOBBER_BSF clobber_flags()
157    
158 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
159 gbeauche 1.20 #if defined(__x86_64__)
160     #define USE_NEW_RTASM 1
161     #endif
162    
163     #if USE_NEW_RTASM
164 gbeauche 1.13
165     #if defined(__x86_64__)
166     #define X86_TARGET_64BIT 1
167     #endif
168     #define X86_FLAT_REGISTERS 0
169 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
170     #define X86_OPTIMIZE_ROTSHI 1
171 gbeauche 1.13 #include "codegen_x86.h"
172    
173     #define x86_emit_byte(B) emit_byte(B)
174     #define x86_emit_word(W) emit_word(W)
175     #define x86_emit_long(L) emit_long(L)
176 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
177 gbeauche 1.13 #define x86_get_target() get_target()
178     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
179    
180     static void jit_fail(const char *msg, const char *file, int line, const char *function)
181     {
182     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
183     function, file, line, msg);
184     abort();
185     }
186    
187     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
188     {
189 gbeauche 1.20 #if defined(__x86_64__)
190     PUSHQr(r);
191     #else
192 gbeauche 1.13 PUSHLr(r);
193 gbeauche 1.20 #endif
194 gbeauche 1.13 }
195     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
196    
197     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
198     {
199 gbeauche 1.20 #if defined(__x86_64__)
200     POPQr(r);
201     #else
202 gbeauche 1.13 POPLr(r);
203 gbeauche 1.20 #endif
204 gbeauche 1.13 }
205     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
206    
207 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
208     {
209     #if defined(__x86_64__)
210     POPQm(d, X86_NOREG, X86_NOREG, 1);
211     #else
212     POPLm(d, X86_NOREG, X86_NOREG, 1);
213     #endif
214     }
215     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
216    
217 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
218     {
219     BTLir(i, r);
220     }
221     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
222    
223     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
224     {
225     BTLrr(b, r);
226     }
227     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
228    
229     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
230     {
231     BTCLir(i, r);
232     }
233     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
234    
235     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
236     {
237     BTCLrr(b, r);
238     }
239     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
240    
241     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
242     {
243     BTRLir(i, r);
244     }
245     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
246    
247     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
248     {
249     BTRLrr(b, r);
250     }
251     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
252    
253     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
254     {
255     BTSLir(i, r);
256     }
257     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
258    
259     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
260     {
261     BTSLrr(b, r);
262     }
263     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
264    
265     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
266     {
267     SUBWir(i, d);
268     }
269     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
270    
271     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
272     {
273     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
274     }
275     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
276    
277     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
278     {
279     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
280     }
281     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
282    
283     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
284     {
285     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
286     }
287     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
288    
289     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
290     {
291     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
292     }
293     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
294    
295     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
296     {
297     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
298     }
299     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
300    
301     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
302     {
303     ROLBir(i, r);
304     }
305     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
306    
307     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
308     {
309     ROLWir(i, r);
310     }
311     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
312    
313     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
314     {
315     ROLLir(i, r);
316     }
317     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
318    
319     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
320     {
321     ROLLrr(r, d);
322     }
323     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
324    
325     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
326     {
327     ROLWrr(r, d);
328     }
329     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
330    
331     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
332     {
333     ROLBrr(r, d);
334     }
335     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
336    
337     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
338     {
339     SHLLrr(r, d);
340     }
341     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
342    
343     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
344     {
345     SHLWrr(r, d);
346     }
347     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
348    
349     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
350     {
351     SHLBrr(r, d);
352     }
353     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
354    
355     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
356     {
357     RORBir(i, r);
358     }
359     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
360    
361     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
362     {
363     RORWir(i, r);
364     }
365     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
366    
367     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
368     {
369     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
370     }
371     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
372    
373     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
374     {
375     RORLir(i, r);
376     }
377     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
378    
379     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
380     {
381     RORLrr(r, d);
382     }
383     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
384    
385     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
386     {
387     RORWrr(r, d);
388     }
389     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
390    
391     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
392     {
393     RORBrr(r, d);
394     }
395     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
396    
397     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
398     {
399     SHRLrr(r, d);
400     }
401     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
402    
403     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
404     {
405     SHRWrr(r, d);
406     }
407     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
408    
409     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
410     {
411     SHRBrr(r, d);
412     }
413     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
414    
415     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
416     {
417 gbeauche 1.14 SARLrr(r, d);
418 gbeauche 1.13 }
419     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
420    
421     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
422     {
423 gbeauche 1.14 SARWrr(r, d);
424 gbeauche 1.13 }
425     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
426    
427     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
428     {
429 gbeauche 1.14 SARBrr(r, d);
430 gbeauche 1.13 }
431     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
432    
433     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
434     {
435     SHLLir(i, r);
436     }
437     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
438    
439     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
440     {
441     SHLWir(i, r);
442     }
443     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
444    
445     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
446     {
447     SHLBir(i, r);
448     }
449     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
450    
451     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
452     {
453     SHRLir(i, r);
454     }
455     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
456    
457     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
458     {
459     SHRWir(i, r);
460     }
461     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
462    
463     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
464     {
465     SHRBir(i, r);
466     }
467     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
468    
469     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
470     {
471 gbeauche 1.14 SARLir(i, r);
472 gbeauche 1.13 }
473     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
474    
475     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
476     {
477 gbeauche 1.14 SARWir(i, r);
478 gbeauche 1.13 }
479     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
480    
481     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
482     {
483 gbeauche 1.14 SARBir(i, r);
484 gbeauche 1.13 }
485     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
486    
487     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
488     {
489     SAHF();
490     }
491     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
492    
493     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
494     {
495     CPUID();
496     }
497     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
498    
499     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
500     {
501     LAHF();
502     }
503     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
504    
505     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
506     {
507     SETCCir(cc, d);
508     }
509     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
510    
511     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
512     {
513     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
514     }
515     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
516    
517     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
518     {
519 gbeauche 1.15 if (have_cmov)
520     CMOVLrr(cc, s, d);
521     else { /* replacement using branch and mov */
522     #if defined(__x86_64__)
523     write_log("x86-64 implementations are bound to have CMOV!\n");
524     abort();
525     #endif
526     JCCSii(cc^1, 2);
527     MOVLrr(s, d);
528     }
529 gbeauche 1.13 }
530     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
531    
532     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
533     {
534     BSFLrr(s, d);
535     }
536     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
537    
538 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
539     {
540     MOVSLQrr(s, d);
541     }
542     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
543    
544 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
545     {
546     MOVSWLrr(s, d);
547     }
548     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
549    
550     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
551     {
552     MOVSBLrr(s, d);
553     }
554     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
555    
556     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
557     {
558     MOVZWLrr(s, d);
559     }
560     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
561    
562     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
563     {
564     MOVZBLrr(s, d);
565     }
566     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
567    
568     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
569     {
570 gbeauche 1.14 IMULLrr(s, d);
571 gbeauche 1.13 }
572     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
573    
574     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
575     {
576 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
577     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
578 gbeauche 1.13 abort();
579 gbeauche 1.14 }
580     IMULLr(s);
581 gbeauche 1.13 }
582     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
583    
584     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
585     {
586 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
587     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
588 gbeauche 1.13 abort();
589 gbeauche 1.14 }
590     MULLr(s);
591 gbeauche 1.13 }
592     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
593    
594     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
595     {
596 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
597 gbeauche 1.13 }
598     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
599    
600     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
601     {
602     MOVBrr(s, d);
603     }
604     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
605    
606     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
607     {
608     MOVWrr(s, d);
609     }
610     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
611    
612     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
613     {
614     MOVLmr(0, baser, index, factor, d);
615     }
616     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
617    
618     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
619     {
620     MOVWmr(0, baser, index, factor, d);
621     }
622     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
623    
624     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
625     {
626     MOVBmr(0, baser, index, factor, d);
627     }
628     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
629    
630     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
631     {
632     MOVLrm(s, 0, baser, index, factor);
633     }
634     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
635    
636     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
637     {
638     MOVWrm(s, 0, baser, index, factor);
639     }
640     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
641    
642     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
643     {
644     MOVBrm(s, 0, baser, index, factor);
645     }
646     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
647    
648     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
649     {
650     MOVLrm(s, base, baser, index, factor);
651     }
652     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
653    
654     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
655     {
656     MOVWrm(s, base, baser, index, factor);
657     }
658     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
659    
660     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
661     {
662     MOVBrm(s, base, baser, index, factor);
663     }
664     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
665    
666     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
667     {
668     MOVLmr(base, baser, index, factor, d);
669     }
670     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
671    
672     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
673     {
674     MOVWmr(base, baser, index, factor, d);
675     }
676     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
677    
678     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
679     {
680     MOVBmr(base, baser, index, factor, d);
681     }
682     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
683    
684     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
685     {
686     MOVLmr(base, X86_NOREG, index, factor, d);
687     }
688     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
689    
690     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
691     {
692 gbeauche 1.15 if (have_cmov)
693     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
694     else { /* replacement using branch and mov */
695     #if defined(__x86_64__)
696     write_log("x86-64 implementations are bound to have CMOV!\n");
697     abort();
698     #endif
699     JCCSii(cond^1, 7);
700     MOVLmr(base, X86_NOREG, index, factor, d);
701     }
702 gbeauche 1.13 }
703     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
704    
705     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
706     {
707 gbeauche 1.15 if (have_cmov)
708     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
709     else { /* replacement using branch and mov */
710     #if defined(__x86_64__)
711     write_log("x86-64 implementations are bound to have CMOV!\n");
712     abort();
713     #endif
714     JCCSii(cond^1, 6);
715     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
716     }
717 gbeauche 1.13 }
718     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
719    
720     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
721     {
722     MOVLmr(offset, s, X86_NOREG, 1, d);
723     }
724     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
725    
726     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
727     {
728     MOVWmr(offset, s, X86_NOREG, 1, d);
729     }
730     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
731    
732     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
733     {
734     MOVBmr(offset, s, X86_NOREG, 1, d);
735     }
736     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
737    
738     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
739     {
740     MOVLmr(offset, s, X86_NOREG, 1, d);
741     }
742     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
743    
744     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
745     {
746     MOVWmr(offset, s, X86_NOREG, 1, d);
747     }
748     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
749    
750     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
751     {
752     MOVBmr(offset, s, X86_NOREG, 1, d);
753     }
754     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
755    
756     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
757     {
758     MOVLim(i, offset, d, X86_NOREG, 1);
759     }
760     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
761    
762     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
763     {
764     MOVWim(i, offset, d, X86_NOREG, 1);
765     }
766     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
767    
768     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
769     {
770     MOVBim(i, offset, d, X86_NOREG, 1);
771     }
772     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
773    
774     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
775     {
776     MOVLrm(s, offset, d, X86_NOREG, 1);
777     }
778     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
779    
780     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
781     {
782     MOVWrm(s, offset, d, X86_NOREG, 1);
783     }
784     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
785    
786     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
787     {
788     MOVBrm(s, offset, d, X86_NOREG, 1);
789     }
790     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
791    
792     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
793     {
794     LEALmr(offset, s, X86_NOREG, 1, d);
795     }
796     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
797    
798     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
799     {
800     LEALmr(offset, s, index, factor, d);
801     }
802     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
803    
804     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
805     {
806     LEALmr(0, s, index, factor, d);
807     }
808     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
809    
810     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
811     {
812     MOVLrm(s, offset, d, X86_NOREG, 1);
813     }
814     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
815    
816     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
817     {
818     MOVWrm(s, offset, d, X86_NOREG, 1);
819     }
820     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
821    
822     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
823     {
824     MOVBrm(s, offset, d, X86_NOREG, 1);
825     }
826     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
827    
828     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
829     {
830     BSWAPLr(r);
831     }
832     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
833    
834     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
835     {
836     ROLWir(8, r);
837     }
838     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
839    
840     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
841     {
842     MOVLrr(s, d);
843     }
844     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
845    
846     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
847     {
848     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
849     }
850     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
851    
852     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
853     {
854     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
855     }
856     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
857    
858     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
859     {
860     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
861     }
862     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
863    
864     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
865     {
866     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
867     }
868     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
869    
870     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
871     {
872     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
873     }
874     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
875    
876     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
877     {
878     MOVLir(s, d);
879     }
880     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
881    
882     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
883     {
884     MOVWir(s, d);
885     }
886     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
887    
888     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
889     {
890     MOVBir(s, d);
891     }
892     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
893    
894     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
895     {
896     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
897     }
898     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
899    
900     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
901     {
902     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
903     }
904     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
905    
906     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
907     {
908     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
909     }
910     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
911    
912     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
913     {
914     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
915     }
916     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
917    
918     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
919     {
920     TESTLir(i, d);
921     }
922     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
923    
924     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
925     {
926     TESTLrr(s, d);
927     }
928     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
929    
930     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
931     {
932     TESTWrr(s, d);
933     }
934     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
935    
936     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
937     {
938     TESTBrr(s, d);
939     }
940     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
941    
942 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
943     {
944     XORLir(i, d);
945     }
946     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
947    
948 gbeauche 1.13 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
949     {
950     ANDLir(i, d);
951     }
952     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
953    
954     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
955     {
956     ANDWir(i, d);
957     }
958     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
959    
960     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
961     {
962     ANDLrr(s, d);
963     }
964     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
965    
966     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
967     {
968     ANDWrr(s, d);
969     }
970     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
971    
972     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
973     {
974     ANDBrr(s, d);
975     }
976     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
977    
978     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
979     {
980     ORLir(i, d);
981     }
982     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
983    
984     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
985     {
986     ORLrr(s, d);
987     }
988     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
989    
990     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
991     {
992     ORWrr(s, d);
993     }
994     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
995    
996     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
997     {
998     ORBrr(s, d);
999     }
1000     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1001    
1002     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1003     {
1004     ADCLrr(s, d);
1005     }
1006     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1007    
1008     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1009     {
1010     ADCWrr(s, d);
1011     }
1012     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1013    
1014     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1015     {
1016     ADCBrr(s, d);
1017     }
1018     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1019    
1020     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1021     {
1022     ADDLrr(s, d);
1023     }
1024     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1025    
1026     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1027     {
1028     ADDWrr(s, d);
1029     }
1030     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1031    
1032     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1033     {
1034     ADDBrr(s, d);
1035     }
1036     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1037    
1038     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1039     {
1040     SUBLir(i, d);
1041     }
1042     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1043    
1044     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1045     {
1046     SUBBir(i, d);
1047     }
1048     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1049    
1050     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1051     {
1052     ADDLir(i, d);
1053     }
1054     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1055    
1056     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1057     {
1058     ADDWir(i, d);
1059     }
1060     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1061    
1062     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1063     {
1064     ADDBir(i, d);
1065     }
1066     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1067    
1068     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1069     {
1070     SBBLrr(s, d);
1071     }
1072     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1073    
1074     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1075     {
1076     SBBWrr(s, d);
1077     }
1078     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1079    
1080     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1081     {
1082     SBBBrr(s, d);
1083     }
1084     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1085    
1086     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1087     {
1088     SUBLrr(s, d);
1089     }
1090     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1091    
1092     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1093     {
1094     SUBWrr(s, d);
1095     }
1096     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1097    
1098     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1099     {
1100     SUBBrr(s, d);
1101     }
1102     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1103    
1104     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1105     {
1106     CMPLrr(s, d);
1107     }
1108     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1109    
1110     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1111     {
1112     CMPLir(i, r);
1113     }
1114     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1115    
1116     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1117     {
1118     CMPWrr(s, d);
1119     }
1120     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1121    
1122     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1123     {
1124     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1125     }
1126     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1127    
1128     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1129     {
1130     CMPBir(i, d);
1131     }
1132     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1133    
1134     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1135     {
1136     CMPBrr(s, d);
1137     }
1138     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1139    
1140     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1141     {
1142     CMPLmr(offset, X86_NOREG, index, factor, d);
1143     }
1144     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1145    
1146     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1147     {
1148     XORLrr(s, d);
1149     }
1150     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1151    
1152     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1153     {
1154     XORWrr(s, d);
1155     }
1156     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1157    
1158     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1159     {
1160     XORBrr(s, d);
1161     }
1162     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1163    
1164     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1165     {
1166     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1167     }
1168     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1169    
1170     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1171     {
1172     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1173     }
1174     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1175    
1176     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1177     {
1178     XCHGLrr(r2, r1);
1179     }
1180     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1181    
1182     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1183     {
1184 gbeauche 1.18 PUSHF();
1185 gbeauche 1.13 }
1186     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1187    
1188     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1189     {
1190 gbeauche 1.18 POPF();
1191 gbeauche 1.13 }
1192     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1193    
1194     #else
1195    
1196 gbeauche 1.2 const bool optimize_accum = true;
1197 gbeauche 1.1 const bool optimize_imm8 = true;
1198     const bool optimize_shift_once = true;
1199    
1200     /*************************************************************************
1201     * Actual encoding of the instructions on the target CPU *
1202     *************************************************************************/
1203    
1204 gbeauche 1.2 static __inline__ int isaccum(int r)
1205     {
1206     return (r == EAX_INDEX);
1207     }
1208    
1209 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1210     {
1211     return (x>=-128 && x<=127);
1212     }
1213    
1214     static __inline__ int isword(uae_s32 x)
1215     {
1216     return (x>=-32768 && x<=32767);
1217     }
1218    
1219     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1220     {
1221     emit_byte(0x50+r);
1222     }
1223     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1224    
1225     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1226     {
1227     emit_byte(0x58+r);
1228     }
1229     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1230    
1231 gbeauche 1.24 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1232     {
1233     emit_byte(0x8f);
1234     emit_byte(0x05);
1235     emit_long(d);
1236     }
1237     LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1238    
1239 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1240     {
1241     emit_byte(0x0f);
1242     emit_byte(0xba);
1243     emit_byte(0xe0+r);
1244     emit_byte(i);
1245     }
1246     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1247    
1248     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1249     {
1250     emit_byte(0x0f);
1251     emit_byte(0xa3);
1252     emit_byte(0xc0+8*b+r);
1253     }
1254     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1255    
1256     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1257     {
1258     emit_byte(0x0f);
1259     emit_byte(0xba);
1260     emit_byte(0xf8+r);
1261     emit_byte(i);
1262     }
1263     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1264    
1265     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1266     {
1267     emit_byte(0x0f);
1268     emit_byte(0xbb);
1269     emit_byte(0xc0+8*b+r);
1270     }
1271     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1272    
1273    
1274     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1275     {
1276     emit_byte(0x0f);
1277     emit_byte(0xba);
1278     emit_byte(0xf0+r);
1279     emit_byte(i);
1280     }
1281     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1282    
1283     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1284     {
1285     emit_byte(0x0f);
1286     emit_byte(0xb3);
1287     emit_byte(0xc0+8*b+r);
1288     }
1289     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1290    
1291     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1292     {
1293     emit_byte(0x0f);
1294     emit_byte(0xba);
1295     emit_byte(0xe8+r);
1296     emit_byte(i);
1297     }
1298     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1299    
1300     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1301     {
1302     emit_byte(0x0f);
1303     emit_byte(0xab);
1304     emit_byte(0xc0+8*b+r);
1305     }
1306     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1307    
1308     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1309     {
1310     emit_byte(0x66);
1311     if (isbyte(i)) {
1312     emit_byte(0x83);
1313     emit_byte(0xe8+d);
1314     emit_byte(i);
1315     }
1316     else {
1317 gbeauche 1.2 if (optimize_accum && isaccum(d))
1318     emit_byte(0x2d);
1319     else {
1320 gbeauche 1.1 emit_byte(0x81);
1321     emit_byte(0xe8+d);
1322 gbeauche 1.2 }
1323 gbeauche 1.1 emit_word(i);
1324     }
1325     }
1326     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1327    
1328    
1329     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1330     {
1331     emit_byte(0x8b);
1332     emit_byte(0x05+8*d);
1333     emit_long(s);
1334     }
1335     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1336    
1337     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1338     {
1339     emit_byte(0xc7);
1340     emit_byte(0x05);
1341     emit_long(d);
1342     emit_long(s);
1343     }
1344     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1345    
1346     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1347     {
1348     emit_byte(0x66);
1349     emit_byte(0xc7);
1350     emit_byte(0x05);
1351     emit_long(d);
1352     emit_word(s);
1353     }
1354     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1355    
1356     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1357     {
1358     emit_byte(0xc6);
1359     emit_byte(0x05);
1360     emit_long(d);
1361     emit_byte(s);
1362     }
1363     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1364    
1365     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1366     {
1367     if (optimize_shift_once && (i == 1)) {
1368     emit_byte(0xd0);
1369     emit_byte(0x05);
1370     emit_long(d);
1371     }
1372     else {
1373     emit_byte(0xc0);
1374     emit_byte(0x05);
1375     emit_long(d);
1376     emit_byte(i);
1377     }
1378     }
1379     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1380    
1381     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1382     {
1383     if (optimize_shift_once && (i == 1)) {
1384     emit_byte(0xd0);
1385     emit_byte(0xc0+r);
1386     }
1387     else {
1388     emit_byte(0xc0);
1389     emit_byte(0xc0+r);
1390     emit_byte(i);
1391     }
1392     }
1393     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1394    
1395     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1396     {
1397     emit_byte(0x66);
1398     emit_byte(0xc1);
1399     emit_byte(0xc0+r);
1400     emit_byte(i);
1401     }
1402     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1403    
1404     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1405     {
1406     if (optimize_shift_once && (i == 1)) {
1407     emit_byte(0xd1);
1408     emit_byte(0xc0+r);
1409     }
1410     else {
1411     emit_byte(0xc1);
1412     emit_byte(0xc0+r);
1413     emit_byte(i);
1414     }
1415     }
1416     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1417    
1418     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1419     {
1420     emit_byte(0xd3);
1421     emit_byte(0xc0+d);
1422     }
1423     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1424    
1425     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1426     {
1427     emit_byte(0x66);
1428     emit_byte(0xd3);
1429     emit_byte(0xc0+d);
1430     }
1431     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1432    
1433     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1434     {
1435     emit_byte(0xd2);
1436     emit_byte(0xc0+d);
1437     }
1438     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1439    
1440     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1441     {
1442     emit_byte(0xd3);
1443     emit_byte(0xe0+d);
1444     }
1445     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1446    
1447     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1448     {
1449     emit_byte(0x66);
1450     emit_byte(0xd3);
1451     emit_byte(0xe0+d);
1452     }
1453     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1454    
1455     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1456     {
1457     emit_byte(0xd2);
1458     emit_byte(0xe0+d);
1459     }
1460     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1461    
1462     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1463     {
1464     if (optimize_shift_once && (i == 1)) {
1465     emit_byte(0xd0);
1466     emit_byte(0xc8+r);
1467     }
1468     else {
1469     emit_byte(0xc0);
1470     emit_byte(0xc8+r);
1471     emit_byte(i);
1472     }
1473     }
1474     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1475    
1476     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1477     {
1478     emit_byte(0x66);
1479     emit_byte(0xc1);
1480     emit_byte(0xc8+r);
1481     emit_byte(i);
1482     }
1483     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1484    
1485     // gb-- used for making an fpcr value in compemu_fpp.cpp
1486     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1487     {
1488     emit_byte(0x0b);
1489     emit_byte(0x05+8*d);
1490     emit_long(s);
1491     }
1492     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1493    
1494     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1495     {
1496     if (optimize_shift_once && (i == 1)) {
1497     emit_byte(0xd1);
1498     emit_byte(0xc8+r);
1499     }
1500     else {
1501     emit_byte(0xc1);
1502     emit_byte(0xc8+r);
1503     emit_byte(i);
1504     }
1505     }
1506     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1507    
1508     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1509     {
1510     emit_byte(0xd3);
1511     emit_byte(0xc8+d);
1512     }
1513     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1514    
1515     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1516     {
1517     emit_byte(0x66);
1518     emit_byte(0xd3);
1519     emit_byte(0xc8+d);
1520     }
1521     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1522    
1523     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1524     {
1525     emit_byte(0xd2);
1526     emit_byte(0xc8+d);
1527     }
1528     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1529    
1530     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1531     {
1532     emit_byte(0xd3);
1533     emit_byte(0xe8+d);
1534     }
1535     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1536    
1537     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1538     {
1539     emit_byte(0x66);
1540     emit_byte(0xd3);
1541     emit_byte(0xe8+d);
1542     }
1543     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1544    
1545     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1546     {
1547     emit_byte(0xd2);
1548     emit_byte(0xe8+d);
1549     }
1550     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1551    
1552     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1553     {
1554     emit_byte(0xd3);
1555     emit_byte(0xf8+d);
1556     }
1557     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1558    
1559     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1560     {
1561     emit_byte(0x66);
1562     emit_byte(0xd3);
1563     emit_byte(0xf8+d);
1564     }
1565     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1566    
1567     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1568     {
1569     emit_byte(0xd2);
1570     emit_byte(0xf8+d);
1571     }
1572     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1573    
1574     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1575     {
1576     if (optimize_shift_once && (i == 1)) {
1577     emit_byte(0xd1);
1578     emit_byte(0xe0+r);
1579     }
1580     else {
1581     emit_byte(0xc1);
1582     emit_byte(0xe0+r);
1583     emit_byte(i);
1584     }
1585     }
1586     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1587    
1588     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1589     {
1590     emit_byte(0x66);
1591     emit_byte(0xc1);
1592     emit_byte(0xe0+r);
1593     emit_byte(i);
1594     }
1595     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1596    
1597     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1598     {
1599     if (optimize_shift_once && (i == 1)) {
1600     emit_byte(0xd0);
1601     emit_byte(0xe0+r);
1602     }
1603     else {
1604     emit_byte(0xc0);
1605     emit_byte(0xe0+r);
1606     emit_byte(i);
1607     }
1608     }
1609     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1610    
1611     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1612     {
1613     if (optimize_shift_once && (i == 1)) {
1614     emit_byte(0xd1);
1615     emit_byte(0xe8+r);
1616     }
1617     else {
1618     emit_byte(0xc1);
1619     emit_byte(0xe8+r);
1620     emit_byte(i);
1621     }
1622     }
1623     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1624    
1625     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1626     {
1627     emit_byte(0x66);
1628     emit_byte(0xc1);
1629     emit_byte(0xe8+r);
1630     emit_byte(i);
1631     }
1632     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1633    
1634     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1635     {
1636     if (optimize_shift_once && (i == 1)) {
1637     emit_byte(0xd0);
1638     emit_byte(0xe8+r);
1639     }
1640     else {
1641     emit_byte(0xc0);
1642     emit_byte(0xe8+r);
1643     emit_byte(i);
1644     }
1645     }
1646     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1647    
1648     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1649     {
1650     if (optimize_shift_once && (i == 1)) {
1651     emit_byte(0xd1);
1652     emit_byte(0xf8+r);
1653     }
1654     else {
1655     emit_byte(0xc1);
1656     emit_byte(0xf8+r);
1657     emit_byte(i);
1658     }
1659     }
1660     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1661    
1662     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1663     {
1664     emit_byte(0x66);
1665     emit_byte(0xc1);
1666     emit_byte(0xf8+r);
1667     emit_byte(i);
1668     }
1669     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1670    
1671     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1672     {
1673     if (optimize_shift_once && (i == 1)) {
1674     emit_byte(0xd0);
1675     emit_byte(0xf8+r);
1676     }
1677     else {
1678     emit_byte(0xc0);
1679     emit_byte(0xf8+r);
1680     emit_byte(i);
1681     }
1682     }
1683     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1684    
1685     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1686     {
1687     emit_byte(0x9e);
1688     }
1689     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1690    
1691     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1692     {
1693     emit_byte(0x0f);
1694     emit_byte(0xa2);
1695     }
1696     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1697    
1698     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1699     {
1700     emit_byte(0x9f);
1701     }
1702     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1703    
1704     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1705     {
1706     emit_byte(0x0f);
1707     emit_byte(0x90+cc);
1708     emit_byte(0xc0+d);
1709     }
1710     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1711    
1712     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1713     {
1714     emit_byte(0x0f);
1715     emit_byte(0x90+cc);
1716     emit_byte(0x05);
1717     emit_long(d);
1718     }
1719     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1720    
1721     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1722     {
1723     if (have_cmov) {
1724     emit_byte(0x0f);
1725     emit_byte(0x40+cc);
1726     emit_byte(0xc0+8*d+s);
1727     }
1728     else { /* replacement using branch and mov */
1729     int uncc=(cc^1);
1730     emit_byte(0x70+uncc);
1731     emit_byte(2); /* skip next 2 bytes if not cc=true */
1732     emit_byte(0x89);
1733     emit_byte(0xc0+8*s+d);
1734     }
1735     }
1736     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1737    
1738     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1739     {
1740     emit_byte(0x0f);
1741     emit_byte(0xbc);
1742     emit_byte(0xc0+8*d+s);
1743     }
1744     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1745    
1746     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1747     {
1748     emit_byte(0x0f);
1749     emit_byte(0xbf);
1750     emit_byte(0xc0+8*d+s);
1751     }
1752     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1753    
1754     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1755     {
1756     emit_byte(0x0f);
1757     emit_byte(0xbe);
1758     emit_byte(0xc0+8*d+s);
1759     }
1760     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1761    
1762     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1763     {
1764     emit_byte(0x0f);
1765     emit_byte(0xb7);
1766     emit_byte(0xc0+8*d+s);
1767     }
1768     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1769    
1770     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1771     {
1772     emit_byte(0x0f);
1773     emit_byte(0xb6);
1774     emit_byte(0xc0+8*d+s);
1775     }
1776     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1777    
1778     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1779     {
1780     emit_byte(0x0f);
1781     emit_byte(0xaf);
1782     emit_byte(0xc0+8*d+s);
1783     }
1784     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1785    
1786     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1787     {
1788     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1789     abort();
1790     emit_byte(0xf7);
1791     emit_byte(0xea);
1792     }
1793     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1794    
1795     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1796     {
1797     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1798     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1799     abort();
1800     }
1801     emit_byte(0xf7);
1802     emit_byte(0xe2);
1803     }
1804     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1805    
1806     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1807     {
1808     abort(); /* %^$&%^$%#^ x86! */
1809     emit_byte(0x0f);
1810     emit_byte(0xaf);
1811     emit_byte(0xc0+8*d+s);
1812     }
1813     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1814    
1815     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1816     {
1817     emit_byte(0x88);
1818     emit_byte(0xc0+8*s+d);
1819     }
1820     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1821    
1822     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1823     {
1824     emit_byte(0x66);
1825     emit_byte(0x89);
1826     emit_byte(0xc0+8*s+d);
1827     }
1828     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1829    
1830     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1831     {
1832     int isebp=(baser==5)?0x40:0;
1833     int fi;
1834    
1835     switch(factor) {
1836     case 1: fi=0; break;
1837     case 2: fi=1; break;
1838     case 4: fi=2; break;
1839     case 8: fi=3; break;
1840     default: abort();
1841     }
1842    
1843    
1844     emit_byte(0x8b);
1845     emit_byte(0x04+8*d+isebp);
1846     emit_byte(baser+8*index+0x40*fi);
1847     if (isebp)
1848     emit_byte(0x00);
1849     }
1850     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1851    
1852     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1853     {
1854     int fi;
1855     int isebp;
1856    
1857     switch(factor) {
1858     case 1: fi=0; break;
1859     case 2: fi=1; break;
1860     case 4: fi=2; break;
1861     case 8: fi=3; break;
1862     default: abort();
1863     }
1864     isebp=(baser==5)?0x40:0;
1865    
1866     emit_byte(0x66);
1867     emit_byte(0x8b);
1868     emit_byte(0x04+8*d+isebp);
1869     emit_byte(baser+8*index+0x40*fi);
1870     if (isebp)
1871     emit_byte(0x00);
1872     }
1873     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1874    
1875     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1876     {
1877     int fi;
1878     int isebp;
1879    
1880     switch(factor) {
1881     case 1: fi=0; break;
1882     case 2: fi=1; break;
1883     case 4: fi=2; break;
1884     case 8: fi=3; break;
1885     default: abort();
1886     }
1887     isebp=(baser==5)?0x40:0;
1888    
1889     emit_byte(0x8a);
1890     emit_byte(0x04+8*d+isebp);
1891     emit_byte(baser+8*index+0x40*fi);
1892     if (isebp)
1893     emit_byte(0x00);
1894     }
1895     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1896    
1897     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1898     {
1899     int fi;
1900     int isebp;
1901    
1902     switch(factor) {
1903     case 1: fi=0; break;
1904     case 2: fi=1; break;
1905     case 4: fi=2; break;
1906     case 8: fi=3; break;
1907     default: abort();
1908     }
1909    
1910    
1911     isebp=(baser==5)?0x40:0;
1912    
1913     emit_byte(0x89);
1914     emit_byte(0x04+8*s+isebp);
1915     emit_byte(baser+8*index+0x40*fi);
1916     if (isebp)
1917     emit_byte(0x00);
1918     }
1919     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1920    
1921     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1922     {
1923     int fi;
1924     int isebp;
1925    
1926     switch(factor) {
1927     case 1: fi=0; break;
1928     case 2: fi=1; break;
1929     case 4: fi=2; break;
1930     case 8: fi=3; break;
1931     default: abort();
1932     }
1933     isebp=(baser==5)?0x40:0;
1934    
1935     emit_byte(0x66);
1936     emit_byte(0x89);
1937     emit_byte(0x04+8*s+isebp);
1938     emit_byte(baser+8*index+0x40*fi);
1939     if (isebp)
1940     emit_byte(0x00);
1941     }
1942     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1943    
1944     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1945     {
1946     int fi;
1947     int isebp;
1948    
1949     switch(factor) {
1950     case 1: fi=0; break;
1951     case 2: fi=1; break;
1952     case 4: fi=2; break;
1953     case 8: fi=3; break;
1954     default: abort();
1955     }
1956     isebp=(baser==5)?0x40:0;
1957    
1958     emit_byte(0x88);
1959     emit_byte(0x04+8*s+isebp);
1960     emit_byte(baser+8*index+0x40*fi);
1961     if (isebp)
1962     emit_byte(0x00);
1963     }
1964     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1965    
1966     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1967     {
1968     int fi;
1969    
1970     switch(factor) {
1971     case 1: fi=0; break;
1972     case 2: fi=1; break;
1973     case 4: fi=2; break;
1974     case 8: fi=3; break;
1975     default: abort();
1976     }
1977    
1978     emit_byte(0x89);
1979     emit_byte(0x84+8*s);
1980     emit_byte(baser+8*index+0x40*fi);
1981     emit_long(base);
1982     }
1983     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1984    
1985     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1986     {
1987     int fi;
1988    
1989     switch(factor) {
1990     case 1: fi=0; break;
1991     case 2: fi=1; break;
1992     case 4: fi=2; break;
1993     case 8: fi=3; break;
1994     default: abort();
1995     }
1996    
1997     emit_byte(0x66);
1998     emit_byte(0x89);
1999     emit_byte(0x84+8*s);
2000     emit_byte(baser+8*index+0x40*fi);
2001     emit_long(base);
2002     }
2003     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2004    
2005     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2006     {
2007     int fi;
2008    
2009     switch(factor) {
2010     case 1: fi=0; break;
2011     case 2: fi=1; break;
2012     case 4: fi=2; break;
2013     case 8: fi=3; break;
2014     default: abort();
2015     }
2016    
2017     emit_byte(0x88);
2018     emit_byte(0x84+8*s);
2019     emit_byte(baser+8*index+0x40*fi);
2020     emit_long(base);
2021     }
2022     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2023    
2024     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2025     {
2026     int fi;
2027    
2028     switch(factor) {
2029     case 1: fi=0; break;
2030     case 2: fi=1; break;
2031     case 4: fi=2; break;
2032     case 8: fi=3; break;
2033     default: abort();
2034     }
2035    
2036     emit_byte(0x8b);
2037     emit_byte(0x84+8*d);
2038     emit_byte(baser+8*index+0x40*fi);
2039     emit_long(base);
2040     }
2041     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2042    
2043     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2044     {
2045     int fi;
2046    
2047     switch(factor) {
2048     case 1: fi=0; break;
2049     case 2: fi=1; break;
2050     case 4: fi=2; break;
2051     case 8: fi=3; break;
2052     default: abort();
2053     }
2054    
2055     emit_byte(0x66);
2056     emit_byte(0x8b);
2057     emit_byte(0x84+8*d);
2058     emit_byte(baser+8*index+0x40*fi);
2059     emit_long(base);
2060     }
2061     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2062    
2063     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2064     {
2065     int fi;
2066    
2067     switch(factor) {
2068     case 1: fi=0; break;
2069     case 2: fi=1; break;
2070     case 4: fi=2; break;
2071     case 8: fi=3; break;
2072     default: abort();
2073     }
2074    
2075     emit_byte(0x8a);
2076     emit_byte(0x84+8*d);
2077     emit_byte(baser+8*index+0x40*fi);
2078     emit_long(base);
2079     }
2080     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2081    
2082     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2083     {
2084     int fi;
2085     switch(factor) {
2086     case 1: fi=0; break;
2087     case 2: fi=1; break;
2088     case 4: fi=2; break;
2089     case 8: fi=3; break;
2090     default:
2091     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2092     abort();
2093     }
2094     emit_byte(0x8b);
2095     emit_byte(0x04+8*d);
2096     emit_byte(0x05+8*index+64*fi);
2097     emit_long(base);
2098     }
2099     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2100    
2101     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2102     {
2103     int fi;
2104     switch(factor) {
2105     case 1: fi=0; break;
2106     case 2: fi=1; break;
2107     case 4: fi=2; break;
2108     case 8: fi=3; break;
2109     default:
2110     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2111     abort();
2112     }
2113     if (have_cmov) {
2114     emit_byte(0x0f);
2115     emit_byte(0x40+cond);
2116     emit_byte(0x04+8*d);
2117     emit_byte(0x05+8*index+64*fi);
2118     emit_long(base);
2119     }
2120     else { /* replacement using branch and mov */
2121     int uncc=(cond^1);
2122     emit_byte(0x70+uncc);
2123     emit_byte(7); /* skip next 7 bytes if not cc=true */
2124     emit_byte(0x8b);
2125     emit_byte(0x04+8*d);
2126     emit_byte(0x05+8*index+64*fi);
2127     emit_long(base);
2128     }
2129     }
2130     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2131    
2132     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2133     {
2134     if (have_cmov) {
2135     emit_byte(0x0f);
2136     emit_byte(0x40+cond);
2137     emit_byte(0x05+8*d);
2138     emit_long(mem);
2139     }
2140     else { /* replacement using branch and mov */
2141     int uncc=(cond^1);
2142     emit_byte(0x70+uncc);
2143     emit_byte(6); /* skip next 6 bytes if not cc=true */
2144     emit_byte(0x8b);
2145     emit_byte(0x05+8*d);
2146     emit_long(mem);
2147     }
2148     }
2149     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2150    
2151     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2152     {
2153 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2154 gbeauche 1.1 emit_byte(0x8b);
2155     emit_byte(0x40+8*d+s);
2156     emit_byte(offset);
2157     }
2158     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2159    
2160     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2161     {
2162 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2163 gbeauche 1.1 emit_byte(0x66);
2164     emit_byte(0x8b);
2165     emit_byte(0x40+8*d+s);
2166     emit_byte(offset);
2167     }
2168     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2169    
2170     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2171     {
2172 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2173 gbeauche 1.1 emit_byte(0x8a);
2174     emit_byte(0x40+8*d+s);
2175     emit_byte(offset);
2176     }
2177     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2178    
2179     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2180     {
2181     emit_byte(0x8b);
2182     emit_byte(0x80+8*d+s);
2183     emit_long(offset);
2184     }
2185     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2186    
2187     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2188     {
2189     emit_byte(0x66);
2190     emit_byte(0x8b);
2191     emit_byte(0x80+8*d+s);
2192     emit_long(offset);
2193     }
2194     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2195    
2196     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2197     {
2198     emit_byte(0x8a);
2199     emit_byte(0x80+8*d+s);
2200     emit_long(offset);
2201     }
2202     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2203    
2204     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2205     {
2206 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2207 gbeauche 1.1 emit_byte(0xc7);
2208     emit_byte(0x40+d);
2209     emit_byte(offset);
2210     emit_long(i);
2211     }
2212     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2213    
2214     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2215     {
2216 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2217 gbeauche 1.1 emit_byte(0x66);
2218     emit_byte(0xc7);
2219     emit_byte(0x40+d);
2220     emit_byte(offset);
2221     emit_word(i);
2222     }
2223     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2224    
2225     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2226     {
2227 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2228 gbeauche 1.1 emit_byte(0xc6);
2229     emit_byte(0x40+d);
2230     emit_byte(offset);
2231     emit_byte(i);
2232     }
2233     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2234    
2235     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2236     {
2237 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2238 gbeauche 1.1 emit_byte(0x89);
2239     emit_byte(0x40+8*s+d);
2240     emit_byte(offset);
2241     }
2242     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2243    
2244     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2245     {
2246 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2247 gbeauche 1.1 emit_byte(0x66);
2248     emit_byte(0x89);
2249     emit_byte(0x40+8*s+d);
2250     emit_byte(offset);
2251     }
2252     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2253    
2254     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2255     {
2256 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2257 gbeauche 1.1 emit_byte(0x88);
2258     emit_byte(0x40+8*s+d);
2259     emit_byte(offset);
2260     }
2261     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2262    
2263     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2264     {
2265     if (optimize_imm8 && isbyte(offset)) {
2266     emit_byte(0x8d);
2267     emit_byte(0x40+8*d+s);
2268     emit_byte(offset);
2269     }
2270     else {
2271     emit_byte(0x8d);
2272     emit_byte(0x80+8*d+s);
2273     emit_long(offset);
2274     }
2275     }
2276     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2277    
2278     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2279     {
2280     int fi;
2281    
2282     switch(factor) {
2283     case 1: fi=0; break;
2284     case 2: fi=1; break;
2285     case 4: fi=2; break;
2286     case 8: fi=3; break;
2287     default: abort();
2288     }
2289    
2290     if (optimize_imm8 && isbyte(offset)) {
2291     emit_byte(0x8d);
2292     emit_byte(0x44+8*d);
2293     emit_byte(0x40*fi+8*index+s);
2294     emit_byte(offset);
2295     }
2296     else {
2297     emit_byte(0x8d);
2298     emit_byte(0x84+8*d);
2299     emit_byte(0x40*fi+8*index+s);
2300     emit_long(offset);
2301     }
2302     }
2303     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2304    
2305     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2306     {
2307     int isebp=(s==5)?0x40:0;
2308     int fi;
2309    
2310     switch(factor) {
2311     case 1: fi=0; break;
2312     case 2: fi=1; break;
2313     case 4: fi=2; break;
2314     case 8: fi=3; break;
2315     default: abort();
2316     }
2317    
2318     emit_byte(0x8d);
2319     emit_byte(0x04+8*d+isebp);
2320     emit_byte(0x40*fi+8*index+s);
2321     if (isebp)
2322     emit_byte(0);
2323     }
2324     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2325    
2326     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2327     {
2328     if (optimize_imm8 && isbyte(offset)) {
2329     emit_byte(0x89);
2330     emit_byte(0x40+8*s+d);
2331     emit_byte(offset);
2332     }
2333     else {
2334     emit_byte(0x89);
2335     emit_byte(0x80+8*s+d);
2336     emit_long(offset);
2337     }
2338     }
2339     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2340    
2341     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2342     {
2343     emit_byte(0x66);
2344     emit_byte(0x89);
2345     emit_byte(0x80+8*s+d);
2346     emit_long(offset);
2347     }
2348     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2349    
2350     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2351     {
2352     if (optimize_imm8 && isbyte(offset)) {
2353     emit_byte(0x88);
2354     emit_byte(0x40+8*s+d);
2355     emit_byte(offset);
2356     }
2357     else {
2358     emit_byte(0x88);
2359     emit_byte(0x80+8*s+d);
2360     emit_long(offset);
2361     }
2362     }
2363     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2364    
2365     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2366     {
2367     emit_byte(0x0f);
2368     emit_byte(0xc8+r);
2369     }
2370     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2371    
2372     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2373     {
2374     emit_byte(0x66);
2375     emit_byte(0xc1);
2376     emit_byte(0xc0+r);
2377     emit_byte(0x08);
2378     }
2379     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2380    
2381     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2382     {
2383     emit_byte(0x89);
2384     emit_byte(0xc0+8*s+d);
2385     }
2386     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2387    
2388     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2389     {
2390     emit_byte(0x89);
2391     emit_byte(0x05+8*s);
2392     emit_long(d);
2393     }
2394     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2395    
2396     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2397     {
2398     emit_byte(0x66);
2399     emit_byte(0x89);
2400     emit_byte(0x05+8*s);
2401     emit_long(d);
2402     }
2403     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2404    
2405     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2406     {
2407     emit_byte(0x66);
2408     emit_byte(0x8b);
2409     emit_byte(0x05+8*d);
2410     emit_long(s);
2411     }
2412     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2413    
2414     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2415     {
2416     emit_byte(0x88);
2417     emit_byte(0x05+8*s);
2418     emit_long(d);
2419     }
2420     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2421    
2422     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2423     {
2424     emit_byte(0x8a);
2425     emit_byte(0x05+8*d);
2426     emit_long(s);
2427     }
2428     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2429    
2430     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2431     {
2432     emit_byte(0xb8+d);
2433     emit_long(s);
2434     }
2435     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2436    
2437     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2438     {
2439     emit_byte(0x66);
2440     emit_byte(0xb8+d);
2441     emit_word(s);
2442     }
2443     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2444    
2445     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2446     {
2447     emit_byte(0xb0+d);
2448     emit_byte(s);
2449     }
2450     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2451    
2452     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2453     {
2454     emit_byte(0x81);
2455     emit_byte(0x15);
2456     emit_long(d);
2457     emit_long(s);
2458     }
2459     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2460    
2461     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2462     {
2463     if (optimize_imm8 && isbyte(s)) {
2464     emit_byte(0x83);
2465     emit_byte(0x05);
2466     emit_long(d);
2467     emit_byte(s);
2468     }
2469     else {
2470     emit_byte(0x81);
2471     emit_byte(0x05);
2472     emit_long(d);
2473     emit_long(s);
2474     }
2475     }
2476     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2477    
2478     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2479     {
2480     emit_byte(0x66);
2481     emit_byte(0x81);
2482     emit_byte(0x05);
2483     emit_long(d);
2484     emit_word(s);
2485     }
2486     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2487    
2488     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2489     {
2490     emit_byte(0x80);
2491     emit_byte(0x05);
2492     emit_long(d);
2493     emit_byte(s);
2494     }
2495     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2496    
2497     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2498     {
2499 gbeauche 1.2 if (optimize_accum && isaccum(d))
2500     emit_byte(0xa9);
2501     else {
2502 gbeauche 1.1 emit_byte(0xf7);
2503     emit_byte(0xc0+d);
2504 gbeauche 1.2 }
2505 gbeauche 1.1 emit_long(i);
2506     }
2507     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2508    
2509     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2510     {
2511     emit_byte(0x85);
2512     emit_byte(0xc0+8*s+d);
2513     }
2514     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2515    
2516     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2517     {
2518     emit_byte(0x66);
2519     emit_byte(0x85);
2520     emit_byte(0xc0+8*s+d);
2521     }
2522     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2523    
2524     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2525     {
2526     emit_byte(0x84);
2527     emit_byte(0xc0+8*s+d);
2528     }
2529     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2530    
2531 gbeauche 1.24 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2532     {
2533     emit_byte(0x81);
2534     emit_byte(0xf0+d);
2535     emit_long(i);
2536     }
2537     LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2538    
2539 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2540     {
2541     if (optimize_imm8 && isbyte(i)) {
2542 gbeauche 1.2 emit_byte(0x83);
2543     emit_byte(0xe0+d);
2544     emit_byte(i);
2545 gbeauche 1.1 }
2546     else {
2547 gbeauche 1.2 if (optimize_accum && isaccum(d))
2548     emit_byte(0x25);
2549     else {
2550     emit_byte(0x81);
2551     emit_byte(0xe0+d);
2552     }
2553     emit_long(i);
2554 gbeauche 1.1 }
2555     }
2556     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2557    
2558     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2559     {
2560 gbeauche 1.2 emit_byte(0x66);
2561     if (optimize_imm8 && isbyte(i)) {
2562     emit_byte(0x83);
2563     emit_byte(0xe0+d);
2564     emit_byte(i);
2565     }
2566     else {
2567     if (optimize_accum && isaccum(d))
2568     emit_byte(0x25);
2569     else {
2570     emit_byte(0x81);
2571     emit_byte(0xe0+d);
2572     }
2573     emit_word(i);
2574     }
2575 gbeauche 1.1 }
2576     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2577    
2578     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2579     {
2580     emit_byte(0x21);
2581     emit_byte(0xc0+8*s+d);
2582     }
2583     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2584    
2585     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2586     {
2587     emit_byte(0x66);
2588     emit_byte(0x21);
2589     emit_byte(0xc0+8*s+d);
2590     }
2591     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2592    
2593     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2594     {
2595     emit_byte(0x20);
2596     emit_byte(0xc0+8*s+d);
2597     }
2598     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2599    
2600     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2601     {
2602     if (optimize_imm8 && isbyte(i)) {
2603     emit_byte(0x83);
2604     emit_byte(0xc8+d);
2605     emit_byte(i);
2606     }
2607     else {
2608 gbeauche 1.2 if (optimize_accum && isaccum(d))
2609     emit_byte(0x0d);
2610     else {
2611 gbeauche 1.1 emit_byte(0x81);
2612     emit_byte(0xc8+d);
2613 gbeauche 1.2 }
2614 gbeauche 1.1 emit_long(i);
2615     }
2616     }
2617     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2618    
2619     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2620     {
2621     emit_byte(0x09);
2622     emit_byte(0xc0+8*s+d);
2623     }
2624     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2625    
2626     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2627     {
2628     emit_byte(0x66);
2629     emit_byte(0x09);
2630     emit_byte(0xc0+8*s+d);
2631     }
2632     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2633    
2634     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2635     {
2636     emit_byte(0x08);
2637     emit_byte(0xc0+8*s+d);
2638     }
2639     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2640    
2641     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2642     {
2643     emit_byte(0x11);
2644     emit_byte(0xc0+8*s+d);
2645     }
2646     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2647    
2648     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2649     {
2650     emit_byte(0x66);
2651     emit_byte(0x11);
2652     emit_byte(0xc0+8*s+d);
2653     }
2654     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2655    
2656     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2657     {
2658     emit_byte(0x10);
2659     emit_byte(0xc0+8*s+d);
2660     }
2661     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2662    
2663     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2664     {
2665     emit_byte(0x01);
2666     emit_byte(0xc0+8*s+d);
2667     }
2668     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2669    
2670     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2671     {
2672     emit_byte(0x66);
2673     emit_byte(0x01);
2674     emit_byte(0xc0+8*s+d);
2675     }
2676     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2677    
2678     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2679     {
2680     emit_byte(0x00);
2681     emit_byte(0xc0+8*s+d);
2682     }
2683     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2684    
2685     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2686     {
2687     if (isbyte(i)) {
2688     emit_byte(0x83);
2689     emit_byte(0xe8+d);
2690     emit_byte(i);
2691     }
2692     else {
2693 gbeauche 1.2 if (optimize_accum && isaccum(d))
2694     emit_byte(0x2d);
2695     else {
2696 gbeauche 1.1 emit_byte(0x81);
2697     emit_byte(0xe8+d);
2698 gbeauche 1.2 }
2699 gbeauche 1.1 emit_long(i);
2700     }
2701     }
2702     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2703    
2704     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2705     {
2706 gbeauche 1.2 if (optimize_accum && isaccum(d))
2707     emit_byte(0x2c);
2708     else {
2709 gbeauche 1.1 emit_byte(0x80);
2710     emit_byte(0xe8+d);
2711 gbeauche 1.2 }
2712 gbeauche 1.1 emit_byte(i);
2713     }
2714     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2715    
2716     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2717     {
2718     if (isbyte(i)) {
2719     emit_byte(0x83);
2720     emit_byte(0xc0+d);
2721     emit_byte(i);
2722     }
2723     else {
2724 gbeauche 1.2 if (optimize_accum && isaccum(d))
2725     emit_byte(0x05);
2726     else {
2727 gbeauche 1.1 emit_byte(0x81);
2728     emit_byte(0xc0+d);
2729 gbeauche 1.2 }
2730 gbeauche 1.1 emit_long(i);
2731     }
2732     }
2733     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2734    
2735     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2736     {
2737 gbeauche 1.2 emit_byte(0x66);
2738 gbeauche 1.1 if (isbyte(i)) {
2739     emit_byte(0x83);
2740     emit_byte(0xc0+d);
2741     emit_byte(i);
2742     }
2743     else {
2744 gbeauche 1.2 if (optimize_accum && isaccum(d))
2745     emit_byte(0x05);
2746     else {
2747 gbeauche 1.1 emit_byte(0x81);
2748     emit_byte(0xc0+d);
2749 gbeauche 1.2 }
2750 gbeauche 1.1 emit_word(i);
2751     }
2752     }
2753     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2754    
2755     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2756     {
2757 gbeauche 1.2 if (optimize_accum && isaccum(d))
2758     emit_byte(0x04);
2759     else {
2760     emit_byte(0x80);
2761     emit_byte(0xc0+d);
2762     }
2763 gbeauche 1.1 emit_byte(i);
2764     }
2765     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2766    
2767     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2768     {
2769     emit_byte(0x19);
2770     emit_byte(0xc0+8*s+d);
2771     }
2772     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2773    
2774     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2775     {
2776     emit_byte(0x66);
2777     emit_byte(0x19);
2778     emit_byte(0xc0+8*s+d);
2779     }
2780     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2781    
2782     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2783     {
2784     emit_byte(0x18);
2785     emit_byte(0xc0+8*s+d);
2786     }
2787     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2788    
2789     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2790     {
2791     emit_byte(0x29);
2792     emit_byte(0xc0+8*s+d);
2793     }
2794     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2795    
2796     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2797     {
2798     emit_byte(0x66);
2799     emit_byte(0x29);
2800     emit_byte(0xc0+8*s+d);
2801     }
2802     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2803    
2804     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2805     {
2806     emit_byte(0x28);
2807     emit_byte(0xc0+8*s+d);
2808     }
2809     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2810    
2811     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2812     {
2813     emit_byte(0x39);
2814     emit_byte(0xc0+8*s+d);
2815     }
2816     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2817    
2818     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2819     {
2820     if (optimize_imm8 && isbyte(i)) {
2821     emit_byte(0x83);
2822     emit_byte(0xf8+r);
2823     emit_byte(i);
2824     }
2825     else {
2826 gbeauche 1.2 if (optimize_accum && isaccum(r))
2827     emit_byte(0x3d);
2828     else {
2829 gbeauche 1.1 emit_byte(0x81);
2830     emit_byte(0xf8+r);
2831 gbeauche 1.2 }
2832 gbeauche 1.1 emit_long(i);
2833     }
2834     }
2835     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2836    
2837     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2838     {
2839     emit_byte(0x66);
2840     emit_byte(0x39);
2841     emit_byte(0xc0+8*s+d);
2842     }
2843     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2844    
2845 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2846     {
2847     emit_byte(0x80);
2848     emit_byte(0x3d);
2849     emit_long(d);
2850     emit_byte(s);
2851     }
2852     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2853    
2854 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2855     {
2856 gbeauche 1.2 if (optimize_accum && isaccum(d))
2857     emit_byte(0x3c);
2858     else {
2859 gbeauche 1.1 emit_byte(0x80);
2860     emit_byte(0xf8+d);
2861 gbeauche 1.2 }
2862 gbeauche 1.1 emit_byte(i);
2863     }
2864     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2865    
2866     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2867     {
2868     emit_byte(0x38);
2869     emit_byte(0xc0+8*s+d);
2870     }
2871     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2872    
2873     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2874     {
2875     int fi;
2876    
2877     switch(factor) {
2878     case 1: fi=0; break;
2879     case 2: fi=1; break;
2880     case 4: fi=2; break;
2881     case 8: fi=3; break;
2882     default: abort();
2883     }
2884     emit_byte(0x39);
2885     emit_byte(0x04+8*d);
2886     emit_byte(5+8*index+0x40*fi);
2887     emit_long(offset);
2888     }
2889     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2890    
2891     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2892     {
2893     emit_byte(0x31);
2894     emit_byte(0xc0+8*s+d);
2895     }
2896     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2897    
2898     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2899     {
2900     emit_byte(0x66);
2901     emit_byte(0x31);
2902     emit_byte(0xc0+8*s+d);
2903     }
2904     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2905    
2906     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2907     {
2908     emit_byte(0x30);
2909     emit_byte(0xc0+8*s+d);
2910     }
2911     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2912    
2913     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2914     {
2915     if (optimize_imm8 && isbyte(s)) {
2916     emit_byte(0x83);
2917     emit_byte(0x2d);
2918     emit_long(d);
2919     emit_byte(s);
2920     }
2921     else {
2922     emit_byte(0x81);
2923     emit_byte(0x2d);
2924     emit_long(d);
2925     emit_long(s);
2926     }
2927     }
2928     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2929    
2930     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2931     {
2932     if (optimize_imm8 && isbyte(s)) {
2933     emit_byte(0x83);
2934     emit_byte(0x3d);
2935     emit_long(d);
2936     emit_byte(s);
2937     }
2938     else {
2939     emit_byte(0x81);
2940     emit_byte(0x3d);
2941     emit_long(d);
2942     emit_long(s);
2943     }
2944     }
2945     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2946    
2947     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2948     {
2949     emit_byte(0x87);
2950     emit_byte(0xc0+8*r1+r2);
2951     }
2952     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2953    
2954     /*************************************************************************
2955     * FIXME: mem access modes probably wrong *
2956     *************************************************************************/
2957    
2958     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2959     {
2960     emit_byte(0x9c);
2961     }
2962     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2963    
2964     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2965     {
2966     emit_byte(0x9d);
2967     }
2968     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2969 gbeauche 1.13
2970     #endif
2971 gbeauche 1.1
2972     /*************************************************************************
2973     * Unoptimizable stuff --- jump *
2974     *************************************************************************/
2975    
2976     static __inline__ void raw_call_r(R4 r)
2977     {
2978 gbeauche 1.20 #if USE_NEW_RTASM
2979     CALLsr(r);
2980     #else
2981 gbeauche 1.1 emit_byte(0xff);
2982     emit_byte(0xd0+r);
2983 gbeauche 1.20 #endif
2984 gbeauche 1.5 }
2985    
2986     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2987     {
2988 gbeauche 1.20 #if USE_NEW_RTASM
2989     CALLsm(base, X86_NOREG, r, m);
2990     #else
2991 gbeauche 1.5 int mu;
2992     switch(m) {
2993     case 1: mu=0; break;
2994     case 2: mu=1; break;
2995     case 4: mu=2; break;
2996     case 8: mu=3; break;
2997     default: abort();
2998     }
2999     emit_byte(0xff);
3000     emit_byte(0x14);
3001     emit_byte(0x05+8*r+0x40*mu);
3002     emit_long(base);
3003 gbeauche 1.20 #endif
3004 gbeauche 1.1 }
3005    
3006     static __inline__ void raw_jmp_r(R4 r)
3007     {
3008 gbeauche 1.20 #if USE_NEW_RTASM
3009     JMPsr(r);
3010     #else
3011 gbeauche 1.1 emit_byte(0xff);
3012     emit_byte(0xe0+r);
3013 gbeauche 1.20 #endif
3014 gbeauche 1.1 }
3015    
3016     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3017     {
3018 gbeauche 1.20 #if USE_NEW_RTASM
3019     JMPsm(base, X86_NOREG, r, m);
3020     #else
3021 gbeauche 1.1 int mu;
3022     switch(m) {
3023     case 1: mu=0; break;
3024     case 2: mu=1; break;
3025     case 4: mu=2; break;
3026     case 8: mu=3; break;
3027     default: abort();
3028     }
3029     emit_byte(0xff);
3030     emit_byte(0x24);
3031     emit_byte(0x05+8*r+0x40*mu);
3032     emit_long(base);
3033 gbeauche 1.20 #endif
3034 gbeauche 1.1 }
3035    
3036     static __inline__ void raw_jmp_m(uae_u32 base)
3037     {
3038     emit_byte(0xff);
3039     emit_byte(0x25);
3040     emit_long(base);
3041     }
3042    
3043    
3044     static __inline__ void raw_call(uae_u32 t)
3045     {
3046 gbeauche 1.20 #if USE_NEW_RTASM
3047     CALLm(t);
3048     #else
3049 gbeauche 1.1 emit_byte(0xe8);
3050     emit_long(t-(uae_u32)target-4);
3051 gbeauche 1.20 #endif
3052 gbeauche 1.1 }
3053    
3054     static __inline__ void raw_jmp(uae_u32 t)
3055     {
3056 gbeauche 1.20 #if USE_NEW_RTASM
3057     JMPm(t);
3058     #else
3059 gbeauche 1.1 emit_byte(0xe9);
3060     emit_long(t-(uae_u32)target-4);
3061 gbeauche 1.20 #endif
3062 gbeauche 1.1 }
3063    
3064     static __inline__ void raw_jl(uae_u32 t)
3065     {
3066     emit_byte(0x0f);
3067     emit_byte(0x8c);
3068 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3069 gbeauche 1.1 }
3070    
3071     static __inline__ void raw_jz(uae_u32 t)
3072     {
3073     emit_byte(0x0f);
3074     emit_byte(0x84);
3075 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3076 gbeauche 1.1 }
3077    
3078     static __inline__ void raw_jnz(uae_u32 t)
3079     {
3080     emit_byte(0x0f);
3081     emit_byte(0x85);
3082 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3083 gbeauche 1.1 }
3084    
3085     static __inline__ void raw_jnz_l_oponly(void)
3086     {
3087     emit_byte(0x0f);
3088     emit_byte(0x85);
3089     }
3090    
3091     static __inline__ void raw_jcc_l_oponly(int cc)
3092     {
3093     emit_byte(0x0f);
3094     emit_byte(0x80+cc);
3095     }
3096    
3097     static __inline__ void raw_jnz_b_oponly(void)
3098     {
3099     emit_byte(0x75);
3100     }
3101    
3102     static __inline__ void raw_jz_b_oponly(void)
3103     {
3104     emit_byte(0x74);
3105     }
3106    
3107     static __inline__ void raw_jcc_b_oponly(int cc)
3108     {
3109     emit_byte(0x70+cc);
3110     }
3111    
3112     static __inline__ void raw_jmp_l_oponly(void)
3113     {
3114     emit_byte(0xe9);
3115     }
3116    
3117     static __inline__ void raw_jmp_b_oponly(void)
3118     {
3119     emit_byte(0xeb);
3120     }
3121    
3122     static __inline__ void raw_ret(void)
3123     {
3124     emit_byte(0xc3);
3125     }
3126    
3127     static __inline__ void raw_nop(void)
3128     {
3129     emit_byte(0x90);
3130     }
3131    
3132 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3133     {
3134     /* Source: GNU Binutils 2.12.90.0.15 */
3135     /* Various efficient no-op patterns for aligning code labels.
3136     Note: Don't try to assemble the instructions in the comments.
3137     0L and 0w are not legal. */
3138     static const uae_u8 f32_1[] =
3139     {0x90}; /* nop */
3140     static const uae_u8 f32_2[] =
3141     {0x89,0xf6}; /* movl %esi,%esi */
3142     static const uae_u8 f32_3[] =
3143     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3144     static const uae_u8 f32_4[] =
3145     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3146     static const uae_u8 f32_5[] =
3147     {0x90, /* nop */
3148     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3149     static const uae_u8 f32_6[] =
3150     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3151     static const uae_u8 f32_7[] =
3152     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3153     static const uae_u8 f32_8[] =
3154     {0x90, /* nop */
3155     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3156     static const uae_u8 f32_9[] =
3157     {0x89,0xf6, /* movl %esi,%esi */
3158     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3159     static const uae_u8 f32_10[] =
3160     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3161     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3162     static const uae_u8 f32_11[] =
3163     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3164     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3165     static const uae_u8 f32_12[] =
3166     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3167     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3168     static const uae_u8 f32_13[] =
3169     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3170     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3171     static const uae_u8 f32_14[] =
3172     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3173     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3174     static const uae_u8 f32_15[] =
3175     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3176     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3177     static const uae_u8 f32_16[] =
3178     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3179     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3180     static const uae_u8 *const f32_patt[] = {
3181     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3182     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3183     };
3184 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3185 gbeauche 1.8
3186 gbeauche 1.21 #if defined(__x86_64__)
3187     /* The recommended way to pad 64bit code is to use NOPs preceded by
3188     maximally four 0x66 prefixes. Balance the size of nops. */
3189     if (nbytes == 0)
3190     return;
3191    
3192     int i;
3193     int nnops = (nbytes + 3) / 4;
3194     int len = nbytes / nnops;
3195     int remains = nbytes - nnops * len;
3196    
3197     for (i = 0; i < remains; i++) {
3198     emit_block(prefixes, len);
3199     raw_nop();
3200     }
3201     for (; i < nnops; i++) {
3202     emit_block(prefixes, len - 1);
3203     raw_nop();
3204     }
3205     #else
3206 gbeauche 1.8 int nloops = nbytes / 16;
3207     while (nloops-- > 0)
3208     emit_block(f32_16, sizeof(f32_16));
3209    
3210     nbytes %= 16;
3211     if (nbytes)
3212     emit_block(f32_patt[nbytes - 1], nbytes);
3213 gbeauche 1.21 #endif
3214 gbeauche 1.8 }
3215    
3216 gbeauche 1.1
3217     /*************************************************************************
3218     * Flag handling, to and fro UAE flag register *
3219     *************************************************************************/
3220    
3221     #ifdef SAHF_SETO_PROFITABLE
3222    
3223     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3224    
3225     static __inline__ void raw_flags_to_reg(int r)
3226     {
3227     raw_lahf(0); /* Most flags in AH */
3228     //raw_setcc(r,0); /* V flag in AL */
3229 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3230 gbeauche 1.1
3231     #if 1 /* Let's avoid those nasty partial register stalls */
3232 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3233     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3234 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3235     live.state[FLAGTMP].status=INMEM;
3236     live.state[FLAGTMP].realreg=-1;
3237     /* We just "evicted" FLAGTMP. */
3238     if (live.nat[r].nholds!=1) {
3239     /* Huh? */
3240     abort();
3241     }
3242     live.nat[r].nholds=0;
3243     #endif
3244     }
3245    
3246     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3247     static __inline__ void raw_reg_to_flags(int r)
3248     {
3249     raw_cmp_b_ri(r,-127); /* set V */
3250     raw_sahf(0);
3251     }
3252    
3253 gbeauche 1.24 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3254     static __inline__ void raw_flags_set_zero(int s, int tmp)
3255     {
3256     raw_mov_l_rr(tmp,s);
3257     raw_lahf(s); /* flags into ah */
3258     raw_and_l_ri(s,0xffffbfff);
3259     raw_and_l_ri(tmp,0x00004000);
3260     raw_xor_l_ri(tmp,0x00004000);
3261     raw_or_l(s,tmp);
3262     raw_sahf(s);
3263     }
3264    
3265 gbeauche 1.1 #else
3266    
3267     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3268     static __inline__ void raw_flags_to_reg(int r)
3269     {
3270     raw_pushfl();
3271     raw_pop_l_r(r);
3272 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3273 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3274     live.state[FLAGTMP].status=INMEM;
3275     live.state[FLAGTMP].realreg=-1;
3276     /* We just "evicted" FLAGTMP. */
3277     if (live.nat[r].nholds!=1) {
3278     /* Huh? */
3279     abort();
3280     }
3281     live.nat[r].nholds=0;
3282     }
3283    
3284     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3285     static __inline__ void raw_reg_to_flags(int r)
3286     {
3287     raw_push_l_r(r);
3288     raw_popfl();
3289     }
3290    
3291 gbeauche 1.24 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3292     static __inline__ void raw_flags_set_zero(int s, int tmp)
3293     {
3294     raw_mov_l_rr(tmp,s);
3295     raw_pushfl();
3296     raw_pop_l_r(s);
3297     raw_and_l_ri(s,0xffffffbf);
3298     raw_and_l_ri(tmp,0x00000040);
3299     raw_xor_l_ri(tmp,0x00000040);
3300     raw_or_l(s,tmp);
3301     raw_push_l_r(s);
3302     raw_popfl();
3303     }
3304 gbeauche 1.1 #endif
3305    
3306     /* Apparently, there are enough instructions between flag store and
3307     flag reload to avoid the partial memory stall */
3308     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3309     {
3310     #if 1
3311 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3312 gbeauche 1.1 #else
3313 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3314     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3315 gbeauche 1.1 #endif
3316     }
3317    
3318     /* FLAGX is byte sized, and we *do* write it at that size */
3319     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3320     {
3321     if (live.nat[target].canbyte)
3322 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3323 gbeauche 1.1 else if (live.nat[target].canword)
3324 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3325 gbeauche 1.1 else
3326 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3327 gbeauche 1.1 }
3328    
3329 gbeauche 1.31 static __inline__ void raw_dec_sp(int off)
3330     {
3331     if (off) raw_sub_l_ri(ESP_INDEX,off);
3332     }
3333    
3334 gbeauche 1.1 static __inline__ void raw_inc_sp(int off)
3335     {
3336 gbeauche 1.31 if (off) raw_add_l_ri(ESP_INDEX,off);
3337 gbeauche 1.1 }
3338    
3339     /*************************************************************************
3340     * Handling mistaken direct memory access *
3341     *************************************************************************/
3342    
3343     // gb-- I don't need that part for JIT Basilisk II
3344     #if defined(NATMEM_OFFSET) && 0
3345     #include <asm/sigcontext.h>
3346     #include <signal.h>
3347    
3348     #define SIG_READ 1
3349     #define SIG_WRITE 2
3350    
3351     static int in_handler=0;
3352     static uae_u8 veccode[256];
3353    
3354     static void vec(int x, struct sigcontext sc)
3355     {
3356     uae_u8* i=(uae_u8*)sc.eip;
3357     uae_u32 addr=sc.cr2;
3358     int r=-1;
3359     int size=4;
3360     int dir=-1;
3361     int len=0;
3362     int j;
3363    
3364     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3365     if (!canbang)
3366     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3367     if (in_handler)
3368     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3369    
3370     if (canbang && i>=compiled_code && i<=current_compile_p) {
3371     if (*i==0x66) {
3372     i++;
3373     size=2;
3374     len++;
3375     }
3376    
3377     switch(i[0]) {
3378     case 0x8a:
3379     if ((i[1]&0xc0)==0x80) {
3380     r=(i[1]>>3)&7;
3381     dir=SIG_READ;
3382     size=1;
3383     len+=6;
3384     break;
3385     }
3386     break;
3387     case 0x88:
3388     if ((i[1]&0xc0)==0x80) {
3389     r=(i[1]>>3)&7;
3390     dir=SIG_WRITE;
3391     size=1;
3392     len+=6;
3393     break;
3394     }
3395     break;
3396     case 0x8b:
3397     if ((i[1]&0xc0)==0x80) {
3398     r=(i[1]>>3)&7;
3399     dir=SIG_READ;
3400     len+=6;
3401     break;
3402     }
3403     if ((i[1]&0xc0)==0x40) {
3404     r=(i[1]>>3)&7;
3405     dir=SIG_READ;
3406     len+=3;
3407     break;
3408     }
3409     break;
3410     case 0x89:
3411     if ((i[1]&0xc0)==0x80) {
3412     r=(i[1]>>3)&7;
3413     dir=SIG_WRITE;
3414     len+=6;
3415     break;
3416     }
3417     if ((i[1]&0xc0)==0x40) {
3418     r=(i[1]>>3)&7;
3419     dir=SIG_WRITE;
3420     len+=3;
3421     break;
3422     }
3423     break;
3424     }
3425     }
3426    
3427     if (r!=-1) {
3428     void* pr=NULL;
3429     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3430    
3431     switch(r) {
3432     case 0: pr=&(sc.eax); break;
3433     case 1: pr=&(sc.ecx); break;
3434     case 2: pr=&(sc.edx); break;
3435     case 3: pr=&(sc.ebx); break;
3436     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3437     case 5: pr=(size>1)?
3438     (void*)(&(sc.ebp)):
3439     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3440     case 6: pr=(size>1)?
3441     (void*)(&(sc.esi)):
3442     (void*)(((uae_u8*)&(sc.edx))+1); break;
3443     case 7: pr=(size>1)?
3444     (void*)(&(sc.edi)):
3445     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3446     default: abort();
3447     }
3448     if (pr) {
3449     blockinfo* bi;
3450    
3451     if (currprefs.comp_oldsegv) {
3452     addr-=NATMEM_OFFSET;
3453    
3454     if ((addr>=0x10000000 && addr<0x40000000) ||
3455     (addr>=0x50000000)) {
3456     write_log("Suspicious address in %x SEGV handler.\n",addr);
3457     }
3458     if (dir==SIG_READ) {
3459     switch(size) {
3460     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3461     case 2: *((uae_u16*)pr)=get_word(addr); break;
3462     case 4: *((uae_u32*)pr)=get_long(addr); break;
3463     default: abort();
3464     }
3465     }
3466     else { /* write */
3467     switch(size) {
3468     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3469     case 2: put_word(addr,*((uae_u16*)pr)); break;
3470     case 4: put_long(addr,*((uae_u32*)pr)); break;
3471     default: abort();
3472     }
3473     }
3474     write_log("Handled one access!\n");
3475     fflush(stdout);
3476     segvcount++;
3477     sc.eip+=len;
3478     }
3479     else {
3480     void* tmp=target;
3481     int i;
3482     uae_u8 vecbuf[5];
3483    
3484     addr-=NATMEM_OFFSET;
3485    
3486     if ((addr>=0x10000000 && addr<0x40000000) ||
3487     (addr>=0x50000000)) {
3488     write_log("Suspicious address in %x SEGV handler.\n",addr);
3489     }
3490    
3491     target=(uae_u8*)sc.eip;
3492     for (i=0;i<5;i++)
3493     vecbuf[i]=target[i];
3494     emit_byte(0xe9);
3495 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3496 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3497    
3498     write_log("Handled one access!\n");
3499     fflush(stdout);
3500     segvcount++;
3501    
3502     target=veccode;
3503    
3504     if (dir==SIG_READ) {
3505     switch(size) {
3506     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3507     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3508     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3509     default: abort();
3510     }
3511     }
3512     else { /* write */
3513     switch(size) {
3514     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3515     case 2: put_word(addr,*((uae_u16*)pr)); break;
3516     case 4: put_long(addr,*((uae_u32*)pr)); break;
3517     default: abort();
3518     }
3519     }
3520     for (i=0;i<5;i++)
3521     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3522 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3523 gbeauche 1.1 emit_byte(0xe9);
3524 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3525 gbeauche 1.1 in_handler=1;
3526     target=tmp;
3527     }
3528     bi=active;
3529     while (bi) {
3530     if (bi->handler &&
3531     (uae_u8*)bi->direct_handler<=i &&
3532     (uae_u8*)bi->nexthandler>i) {
3533     write_log("deleted trigger (%p<%p<%p) %p\n",
3534     bi->handler,
3535     i,
3536     bi->nexthandler,
3537     bi->pc_p);
3538     invalidate_block(bi);
3539     raise_in_cl_list(bi);
3540     set_special(0);
3541     return;
3542     }
3543     bi=bi->next;
3544     }
3545     /* Not found in the active list. Might be a rom routine that
3546     is in the dormant list */
3547     bi=dormant;
3548     while (bi) {
3549     if (bi->handler &&
3550     (uae_u8*)bi->direct_handler<=i &&
3551     (uae_u8*)bi->nexthandler>i) {
3552     write_log("deleted trigger (%p<%p<%p) %p\n",
3553     bi->handler,
3554     i,
3555     bi->nexthandler,
3556     bi->pc_p);
3557     invalidate_block(bi);
3558     raise_in_cl_list(bi);
3559     set_special(0);
3560     return;
3561     }
3562     bi=bi->next;
3563     }
3564     write_log("Huh? Could not find trigger!\n");
3565     return;
3566     }
3567     }
3568     write_log("Can't handle access!\n");
3569     for (j=0;j<10;j++) {
3570     write_log("instruction byte %2d is %02x\n",j,i[j]);
3571     }
3572     write_log("Please send the above info (starting at \"fault address\") to\n"
3573     "bmeyer@csse.monash.edu.au\n"
3574     "This shouldn't happen ;-)\n");
3575     fflush(stdout);
3576     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3577     }
3578     #endif
3579    
3580    
3581     /*************************************************************************
3582     * Checking for CPU features *
3583     *************************************************************************/
3584    
3585 gbeauche 1.3 struct cpuinfo_x86 {
3586     uae_u8 x86; // CPU family
3587     uae_u8 x86_vendor; // CPU vendor
3588     uae_u8 x86_processor; // CPU canonical processor type
3589     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3590     uae_u32 x86_hwcap;
3591     uae_u8 x86_model;
3592     uae_u8 x86_mask;
3593     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3594     char x86_vendor_id[16];
3595     };
3596     struct cpuinfo_x86 cpuinfo;
3597    
3598     enum {
3599     X86_VENDOR_INTEL = 0,
3600     X86_VENDOR_CYRIX = 1,
3601     X86_VENDOR_AMD = 2,
3602     X86_VENDOR_UMC = 3,
3603     X86_VENDOR_NEXGEN = 4,
3604     X86_VENDOR_CENTAUR = 5,
3605     X86_VENDOR_RISE = 6,
3606     X86_VENDOR_TRANSMETA = 7,
3607     X86_VENDOR_NSC = 8,
3608     X86_VENDOR_UNKNOWN = 0xff
3609     };
3610    
3611     enum {
3612     X86_PROCESSOR_I386, /* 80386 */
3613     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3614     X86_PROCESSOR_PENTIUM,
3615     X86_PROCESSOR_PENTIUMPRO,
3616     X86_PROCESSOR_K6,
3617     X86_PROCESSOR_ATHLON,
3618     X86_PROCESSOR_PENTIUM4,
3619 gbeauche 1.28 X86_PROCESSOR_X86_64,
3620 gbeauche 1.3 X86_PROCESSOR_max
3621     };
3622    
3623     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3624     "80386",
3625     "80486",
3626     "Pentium",
3627     "PentiumPro",
3628     "K6",
3629     "Athlon",
3630 gbeauche 1.16 "Pentium4",
3631 gbeauche 1.28 "x86-64"
3632 gbeauche 1.3 };
3633    
3634     static struct ptt {
3635     const int align_loop;
3636     const int align_loop_max_skip;
3637     const int align_jump;
3638     const int align_jump_max_skip;
3639     const int align_func;
3640     }
3641     x86_alignments[X86_PROCESSOR_max] = {
3642     { 4, 3, 4, 3, 4 },
3643     { 16, 15, 16, 15, 16 },
3644     { 16, 7, 16, 7, 16 },
3645     { 16, 15, 16, 7, 16 },
3646     { 32, 7, 32, 7, 32 },
3647 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3648 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3649     { 16, 7, 16, 7, 16 }
3650 gbeauche 1.3 };
3651 gbeauche 1.1
3652 gbeauche 1.3 static void
3653     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3654 gbeauche 1.1 {
3655 gbeauche 1.3 char *v = c->x86_vendor_id;
3656    
3657     if (!strcmp(v, "GenuineIntel"))
3658     c->x86_vendor = X86_VENDOR_INTEL;
3659     else if (!strcmp(v, "AuthenticAMD"))
3660     c->x86_vendor = X86_VENDOR_AMD;
3661     else if (!strcmp(v, "CyrixInstead"))
3662     c->x86_vendor = X86_VENDOR_CYRIX;
3663     else if (!strcmp(v, "Geode by NSC"))
3664     c->x86_vendor = X86_VENDOR_NSC;
3665     else if (!strcmp(v, "UMC UMC UMC "))
3666     c->x86_vendor = X86_VENDOR_UMC;
3667     else if (!strcmp(v, "CentaurHauls"))
3668     c->x86_vendor = X86_VENDOR_CENTAUR;
3669     else if (!strcmp(v, "NexGenDriven"))
3670     c->x86_vendor = X86_VENDOR_NEXGEN;
3671     else if (!strcmp(v, "RiseRiseRise"))
3672     c->x86_vendor = X86_VENDOR_RISE;
3673     else if (!strcmp(v, "GenuineTMx86") ||
3674     !strcmp(v, "TransmetaCPU"))
3675     c->x86_vendor = X86_VENDOR_TRANSMETA;
3676     else
3677     c->x86_vendor = X86_VENDOR_UNKNOWN;
3678     }
3679 gbeauche 1.1
3680 gbeauche 1.3 static void
3681     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3682     {
3683 gbeauche 1.27 const int CPUID_SPACE = 4096;
3684     uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3685     if (cpuid_space == VM_MAP_FAILED)
3686     abort();
3687     vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3688    
3689 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3690 gbeauche 1.3 uae_u8* tmp=get_target();
3691 gbeauche 1.1
3692 gbeauche 1.20 s_op = op;
3693 gbeauche 1.3 set_target(cpuid_space);
3694     raw_push_l_r(0); /* eax */
3695     raw_push_l_r(1); /* ecx */
3696     raw_push_l_r(2); /* edx */
3697     raw_push_l_r(3); /* ebx */
3698 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3699 gbeauche 1.3 raw_cpuid(0);
3700 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3701     raw_mov_l_mr((uintptr)&s_ebx,3);
3702     raw_mov_l_mr((uintptr)&s_ecx,1);
3703     raw_mov_l_mr((uintptr)&s_edx,2);
3704 gbeauche 1.3 raw_pop_l_r(3);
3705     raw_pop_l_r(2);
3706     raw_pop_l_r(1);
3707     raw_pop_l_r(0);
3708     raw_ret();
3709     set_target(tmp);
3710 gbeauche 1.1
3711 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3712 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3713     if (ebx != NULL) *ebx = s_ebx;
3714     if (ecx != NULL) *ecx = s_ecx;
3715     if (edx != NULL) *edx = s_edx;
3716 gbeauche 1.27
3717     vm_release(cpuid_space, CPUID_SPACE);
3718 gbeauche 1.1 }
3719    
3720 gbeauche 1.3 static void
3721     raw_init_cpu(void)
3722 gbeauche 1.1 {
3723 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3724    
3725     /* Defaults */
3726 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3727 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3728     c->cpuid_level = -1; /* CPUID not detected */
3729     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3730     c->x86_vendor_id[0] = '\0'; /* Unset */
3731     c->x86_hwcap = 0;
3732    
3733     /* Get vendor name */
3734     c->x86_vendor_id[12] = '\0';
3735     cpuid(0x00000000,
3736     (uae_u32 *)&c->cpuid_level,
3737     (uae_u32 *)&c->x86_vendor_id[0],
3738     (uae_u32 *)&c->x86_vendor_id[8],
3739     (uae_u32 *)&c->x86_vendor_id[4]);
3740     x86_get_cpu_vendor(c);
3741    
3742     /* Intel-defined flags: level 0x00000001 */
3743     c->x86_brand_id = 0;
3744     if ( c->cpuid_level >= 0x00000001 ) {
3745     uae_u32 tfms, brand_id;
3746     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3747     c->x86 = (tfms >> 8) & 15;
3748 gbeauche 1.29 if (c->x86 == 0xf)
3749     c->x86 += (tfms >> 20) & 0xff; /* extended family */
3750 gbeauche 1.3 c->x86_model = (tfms >> 4) & 15;
3751 gbeauche 1.29 if (c->x86_model == 0xf)
3752     c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3753 gbeauche 1.3 c->x86_brand_id = brand_id & 0xff;
3754     c->x86_mask = tfms & 15;
3755     } else {
3756     /* Have CPUID level 0 only - unheard of */
3757     c->x86 = 4;
3758     }
3759    
3760 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3761     uae_u32 xlvl;
3762     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3763     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3764     if ( xlvl >= 0x80000001 ) {
3765 gbeauche 1.28 uae_u32 features, extra_features;
3766     cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3767 gbeauche 1.16 if (features & (1 << 29)) {
3768     /* Assume x86-64 if long mode is supported */
3769 gbeauche 1.28 c->x86_processor = X86_PROCESSOR_X86_64;
3770 gbeauche 1.16 }
3771 gbeauche 1.28 if (extra_features & (1 << 0))
3772     have_lahf_lm = true;
3773 gbeauche 1.16 }
3774     }
3775    
3776 gbeauche 1.3 /* Canonicalize processor ID */
3777     switch (c->x86) {
3778     case 3:
3779     c->x86_processor = X86_PROCESSOR_I386;
3780     break;
3781     case 4:
3782     c->x86_processor = X86_PROCESSOR_I486;
3783     break;
3784     case 5:
3785     if (c->x86_vendor == X86_VENDOR_AMD)
3786     c->x86_processor = X86_PROCESSOR_K6;
3787     else
3788     c->x86_processor = X86_PROCESSOR_PENTIUM;
3789     break;
3790     case 6:
3791     if (c->x86_vendor == X86_VENDOR_AMD)
3792     c->x86_processor = X86_PROCESSOR_ATHLON;
3793     else
3794     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3795     break;
3796     case 15:
3797 gbeauche 1.29 if (c->x86_processor == X86_PROCESSOR_max) {
3798     switch (c->x86_vendor) {
3799     case X86_VENDOR_INTEL:
3800     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3801     break;
3802     case X86_VENDOR_AMD:
3803     /* Assume a 32-bit Athlon processor if not in long mode */
3804     c->x86_processor = X86_PROCESSOR_ATHLON;
3805     break;
3806     }
3807     }
3808     break;
3809 gbeauche 1.3 }
3810     if (c->x86_processor == X86_PROCESSOR_max) {
3811 gbeauche 1.30 c->x86_processor = X86_PROCESSOR_I386;
3812     fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3813 gbeauche 1.3 fprintf(stderr, " Family : %d\n", c->x86);
3814     fprintf(stderr, " Model : %d\n", c->x86_model);
3815     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3816 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3817 gbeauche 1.3 if (c->x86_brand_id)
3818     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3819     }
3820    
3821     /* Have CMOV support? */
3822 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3823 gbeauche 1.3
3824     /* Can the host CPU suffer from partial register stalls? */
3825     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3826     #if 1
3827     /* It appears that partial register writes are a bad idea even on
3828 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3829     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3830 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3831     have_rat_stall = true;
3832 gbeauche 1.1 #endif
3833 gbeauche 1.3
3834     /* Alignments */
3835     if (tune_alignment) {
3836     align_loops = x86_alignments[c->x86_processor].align_loop;
3837     align_jumps = x86_alignments[c->x86_processor].align_jump;
3838     }
3839    
3840     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3841     c->cpuid_level, c->x86_vendor_id,
3842     x86_processor_string_table[c->x86_processor]);
3843 gbeauche 1.1 }
3844    
3845 gbeauche 1.10 static bool target_check_bsf(void)
3846     {
3847     bool mismatch = false;
3848     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3849     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3850     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3851     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3852     for (int value = -1; value <= 1; value++) {
3853 gbeauche 1.25 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3854     unsigned long tmp = value;
3855 gbeauche 1.10 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3856 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3857 gbeauche 1.10 int OF = (flags >> 11) & 1;
3858     int SF = (flags >> 7) & 1;
3859     int ZF = (flags >> 6) & 1;
3860     int CF = flags & 1;
3861     tmp = (value == 0);
3862     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3863     mismatch = true;
3864     }
3865     }}}}
3866     if (mismatch)
3867     write_log("Target CPU defines all flags on BSF instruction\n");
3868     return !mismatch;
3869     }
3870    
3871 gbeauche 1.1
3872     /*************************************************************************
3873     * FPU stuff *
3874     *************************************************************************/
3875    
3876    
3877     static __inline__ void raw_fp_init(void)
3878     {
3879     int i;
3880    
3881     for (i=0;i<N_FREGS;i++)
3882     live.spos[i]=-2;
3883     live.tos=-1; /* Stack is empty */
3884     }
3885    
3886     static __inline__ void raw_fp_cleanup_drop(void)
3887     {
3888     #if 0
3889     /* using FINIT instead of popping all the entries.
3890     Seems to have side effects --- there is display corruption in
3891     Quake when this is used */
3892     if (live.tos>1) {
3893     emit_byte(0x9b);
3894     emit_byte(0xdb);
3895     emit_byte(0xe3);
3896     live.tos=-1;
3897     }
3898     #endif
3899     while (live.tos>=1) {
3900     emit_byte(0xde);
3901     emit_byte(0xd9);
3902     live.tos-=2;
3903     }
3904     while (live.tos>=0) {
3905     emit_byte(0xdd);
3906     emit_byte(0xd8);
3907     live.tos--;
3908     }
3909     raw_fp_init();
3910     }
3911    
3912     static __inline__ void make_tos(int r)
3913     {
3914     int p,q;
3915    
3916     if (live.spos[r]<0) { /* Register not yet on stack */
3917     emit_byte(0xd9);
3918     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3919     live.tos++;
3920     live.spos[r]=live.tos;
3921     live.onstack[live.tos]=r;
3922     return;
3923     }
3924     /* Register is on stack */
3925     if (live.tos==live.spos[r])
3926     return;
3927     p=live.spos[r];
3928     q=live.onstack[live.tos];
3929    
3930     emit_byte(0xd9);
3931     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3932     live.onstack[live.tos]=r;
3933     live.spos[r]=live.tos;
3934     live.onstack[p]=q;
3935     live.spos[q]=p;
3936     }
3937    
3938     static __inline__ void make_tos2(int r, int r2)
3939     {
3940     int q;
3941    
3942     make_tos(r2); /* Put the reg that's supposed to end up in position2
3943     on top */
3944    
3945     if (live.spos[r]<0) { /* Register not yet on stack */
3946     make_tos(r); /* This will extend the stack */
3947     return;
3948     }
3949     /* Register is on stack */
3950     emit_byte(0xd9);
3951     emit_byte(0xc9); /* Move r2 into position 2 */
3952    
3953     q=live.onstack[live.tos-1];
3954     live.onstack[live.tos]=q;
3955     live.spos[q]=live.tos;
3956     live.onstack[live.tos-1]=r2;
3957     live.spos[r2]=live.tos-1;
3958    
3959     make_tos(r); /* And r into 1 */
3960     }
3961    
3962     static __inline__ int stackpos(int r)
3963     {
3964     if (live.spos[r]<0)
3965     abort();
3966     if (live.tos<live.spos[r]) {
3967     printf("Looking for spos for fnreg %d\n",r);
3968     abort();
3969     }
3970     return live.tos-live.spos[r];
3971     }
3972    
3973     static __inline__ void usereg(int r)
3974     {
3975     if (live.spos[r]<0)
3976     make_tos(r);
3977     }
3978    
3979     /* This is called with one FP value in a reg *above* tos, which it will
3980     pop off the stack if necessary */
3981     static __inline__ void tos_make(int r)
3982     {
3983     if (live.spos[r]<0) {
3984     live.tos++;
3985     live.spos[r]=live.tos;
3986     live.onstack[live.tos]=r;
3987     return;
3988     }
3989     emit_byte(0xdd);
3990     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3991     and pop it*/
3992     }
3993 gbeauche 1.23
3994     /* FP helper functions */
3995     #if USE_NEW_RTASM
3996     #define DEFINE_OP(NAME, GEN) \
3997     static inline void raw_##NAME(uint32 m) \
3998     { \
3999     GEN(m, X86_NOREG, X86_NOREG, 1); \
4000     }
4001     DEFINE_OP(fstl, FSTLm);
4002     DEFINE_OP(fstpl, FSTPLm);
4003     DEFINE_OP(fldl, FLDLm);
4004     DEFINE_OP(fildl, FILDLm);
4005     DEFINE_OP(fistl, FISTLm);
4006     DEFINE_OP(flds, FLDSm);
4007     DEFINE_OP(fsts, FSTSm);
4008     DEFINE_OP(fstpt, FSTPTm);
4009     DEFINE_OP(fldt, FLDTm);
4010     #else
4011     #define DEFINE_OP(NAME, OP1, OP2) \
4012     static inline void raw_##NAME(uint32 m) \
4013     { \
4014     emit_byte(OP1); \
4015     emit_byte(OP2); \
4016     emit_long(m); \
4017     }
4018     DEFINE_OP(fstl, 0xdd, 0x15);
4019     DEFINE_OP(fstpl, 0xdd, 0x1d);
4020     DEFINE_OP(fldl, 0xdd, 0x05);
4021     DEFINE_OP(fildl, 0xdb, 0x05);
4022     DEFINE_OP(fistl, 0xdb, 0x15);
4023     DEFINE_OP(flds, 0xd9, 0x05);
4024     DEFINE_OP(fsts, 0xd9, 0x15);
4025     DEFINE_OP(fstpt, 0xdb, 0x3d);
4026     DEFINE_OP(fldt, 0xdb, 0x2d);
4027     #endif
4028     #undef DEFINE_OP
4029    
4030 gbeauche 1.1 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4031     {
4032     make_tos(r);
4033 gbeauche 1.23 raw_fstl(m);
4034 gbeauche 1.1 }
4035     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4036    
4037     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4038     {
4039     make_tos(r);
4040 gbeauche 1.23 raw_fstpl(m);
4041 gbeauche 1.1 live.onstack[live.tos]=-1;
4042     live.tos--;
4043     live.spos[r]=-2;
4044     }
4045     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4046    
4047     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4048     {
4049 gbeauche 1.23 raw_fldl(m);
4050 gbeauche 1.1 tos_make(r);
4051     }
4052     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4053    
4054     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4055     {
4056 gbeauche 1.23 raw_fildl(m);
4057 gbeauche 1.1 tos_make(r);
4058     }
4059     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4060    
4061     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4062     {
4063     make_tos(r);
4064 gbeauche 1.23 raw_fistl(m);
4065 gbeauche 1.1 }
4066     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4067    
4068     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4069     {
4070 gbeauche 1.23 raw_flds(m);
4071 gbeauche 1.1 tos_make(r);
4072     }
4073     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4074    
4075     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4076     {
4077     make_tos(r);
4078 gbeauche 1.23 raw_fsts(m);
4079 gbeauche 1.1 }
4080     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4081    
4082     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4083     {
4084     int rs;
4085    
4086     /* Stupid x87 can't write a long double to mem without popping the
4087     stack! */
4088     usereg(r);
4089     rs=stackpos(r);
4090     emit_byte(0xd9); /* Get a copy to the top of stack */
4091     emit_byte(0xc0+rs);
4092    
4093 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4094 gbeauche 1.1 }
4095     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4096    
4097     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4098     {
4099     int rs;
4100    
4101     make_tos(r);
4102 gbeauche 1.23 raw_fstpt(m); /* store and pop it */
4103 gbeauche 1.1 live.onstack[live.tos]=-1;
4104     live.tos--;
4105     live.spos[r]=-2;
4106     }
4107     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4108    
4109     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4110     {
4111 gbeauche 1.23 raw_fldt(m);
4112 gbeauche 1.1 tos_make(r);
4113     }
4114     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4115    
4116     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4117     {
4118     emit_byte(0xd9);
4119     emit_byte(0xeb);
4120     tos_make(r);
4121     }
4122     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4123    
4124     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4125     {
4126     emit_byte(0xd9);
4127     emit_byte(0xec);
4128     tos_make(r);
4129     }
4130     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4131    
4132     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4133     {
4134     emit_byte(0xd9);
4135     emit_byte(0xea);
4136     tos_make(r);
4137     }
4138     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4139    
4140     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4141     {
4142     emit_byte(0xd9);
4143     emit_byte(0xed);
4144     tos_make(r);
4145     }
4146     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4147    
4148     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4149     {
4150     emit_byte(0xd9);
4151     emit_byte(0xe8);
4152     tos_make(r);
4153     }
4154     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4155    
4156     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4157     {
4158     emit_byte(0xd9);
4159     emit_byte(0xee);
4160     tos_make(r);
4161     }
4162     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4163    
4164     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4165     {
4166     int ds;
4167    
4168     usereg(s);
4169     ds=stackpos(s);
4170     if (ds==0 && live.spos[d]>=0) {
4171     /* source is on top of stack, and we already have the dest */
4172     int dd=stackpos(d);
4173     emit_byte(0xdd);
4174     emit_byte(0xd0+dd);
4175     }
4176     else {
4177     emit_byte(0xd9);
4178     emit_byte(0xc0+ds); /* duplicate source on tos */
4179     tos_make(d); /* store to destination, pop if necessary */
4180     }
4181     }
4182     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4183    
4184     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4185     {
4186     emit_byte(0xd9);
4187     emit_byte(0xa8+index);
4188     emit_long(base);
4189     }
4190     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4191    
4192    
4193     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4194     {
4195     int ds;
4196    
4197     if (d!=s) {
4198     usereg(s);
4199     ds=stackpos(s);
4200     emit_byte(0xd9);
4201     emit_byte(0xc0+ds); /* duplicate source */
4202     emit_byte(0xd9);
4203     emit_byte(0xfa); /* take square root */
4204     tos_make(d); /* store to destination */
4205     }
4206     else {
4207     make_tos(d);
4208     emit_byte(0xd9);
4209     emit_byte(0xfa); /* take square root */
4210     }
4211     }
4212     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4213    
4214     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4215     {
4216     int ds;
4217    
4218     if (d!=s) {
4219     usereg(s);
4220     ds=stackpos(s);
4221     emit_byte(0xd9);
4222     emit_byte(0xc0+ds); /* duplicate source */
4223     emit_byte(0xd9);
4224     emit_byte(0xe1); /* take fabs */
4225     tos_make(d); /* store to destination */
4226     }
4227     else {
4228     make_tos(d);
4229     emit_byte(0xd9);
4230     emit_byte(0xe1); /* take fabs */
4231     }
4232     }
4233     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4234    
4235     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4236     {
4237     int ds;
4238    
4239     if (d!=s) {
4240     usereg(s);
4241     ds=stackpos(s);
4242     emit_byte(0xd9);
4243     emit_byte(0xc0+ds); /* duplicate source */
4244     emit_byte(0xd9);
4245     emit_byte(0xfc); /* take frndint */
4246     tos_make(d); /* store to destination */
4247     }
4248     else {
4249     make_tos(d);
4250     emit_byte(0xd9);
4251     emit_byte(0xfc); /* take frndint */
4252     }
4253     }
4254     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4255    
4256     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4257     {
4258     int ds;
4259    
4260     if (d!=s) {
4261     usereg(s);
4262     ds=stackpos(s);
4263     emit_byte(0xd9);
4264     emit_byte(0xc0+ds); /* duplicate source */
4265     emit_byte(0xd9);
4266     emit_byte(0xff); /* take cos */
4267     tos_make(d); /* store to destination */
4268     }
4269     else {
4270     make_tos(d);
4271     emit_byte(0xd9);
4272     emit_byte(0xff); /* take cos */
4273     }
4274     }
4275     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4276    
4277     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4278     {
4279     int ds;
4280    
4281     if (d!=s) {
4282     usereg(s);
4283     ds=stackpos(s);
4284     emit_byte(0xd9);
4285     emit_byte(0xc0+ds); /* duplicate source */
4286     emit_byte(0xd9);
4287     emit_byte(0xfe); /* take sin */
4288     tos_make(d); /* store to destination */
4289     }
4290     else {
4291     make_tos(d);
4292     emit_byte(0xd9);
4293     emit_byte(0xfe); /* take sin */
4294     }
4295     }
4296     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4297    
4298     double one=1;
4299     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4300     {
4301     int ds;
4302    
4303     usereg(s);
4304     ds=stackpos(s);
4305     emit_byte(0xd9);
4306     emit_byte(0xc0+ds); /* duplicate source */
4307    
4308     emit_byte(0xd9);
4309     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4310     emit_byte(0xd9);
4311     emit_byte(0xfc); /* rndint */
4312     emit_byte(0xd9);
4313     emit_byte(0xc9); /* swap top two elements */
4314     emit_byte(0xd8);
4315     emit_byte(0xe1); /* subtract rounded from original */
4316     emit_byte(0xd9);
4317     emit_byte(0xf0); /* f2xm1 */
4318     emit_byte(0xdc);
4319     emit_byte(0x05);
4320 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4321 gbeauche 1.1 emit_byte(0xd9);
4322     emit_byte(0xfd); /* and scale it */
4323     emit_byte(0xdd);
4324     emit_byte(0xd9); /* take he rounded value off */
4325     tos_make(d); /* store to destination */
4326     }
4327     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4328    
4329     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4330     {
4331     int ds;
4332    
4333     usereg(s);
4334     ds=stackpos(s);
4335     emit_byte(0xd9);
4336     emit_byte(0xc0+ds); /* duplicate source */
4337     emit_byte(0xd9);
4338     emit_byte(0xea); /* fldl2e */
4339     emit_byte(0xde);
4340     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4341    
4342     emit_byte(0xd9);
4343     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4344     emit_byte(0xd9);
4345     emit_byte(0xfc); /* rndint */
4346     emit_byte(0xd9);
4347     emit_byte(0xc9); /* swap top two elements */
4348     emit_byte(0xd8);
4349     emit_byte(0xe1); /* subtract rounded from original */
4350     emit_byte(0xd9);
4351     emit_byte(0xf0); /* f2xm1 */
4352     emit_byte(0xdc);
4353     emit_byte(0x05);
4354 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4355 gbeauche 1.1 emit_byte(0xd9);
4356     emit_byte(0xfd); /* and scale it */
4357     emit_byte(0xdd);
4358     emit_byte(0xd9); /* take he rounded value off */
4359     tos_make(d); /* store to destination */
4360     }
4361     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4362    
4363     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4364     {
4365     int ds;
4366    
4367     usereg(s);
4368     ds=stackpos(s);
4369     emit_byte(0xd9);
4370     emit_byte(0xc0+ds); /* duplicate source */
4371     emit_byte(0xd9);
4372     emit_byte(0xe8); /* push '1' */
4373     emit_byte(0xd9);
4374     emit_byte(0xc9); /* swap top two */
4375     emit_byte(0xd9);
4376     emit_byte(0xf1); /* take 1*log2(x) */
4377     tos_make(d); /* store to destination */
4378     }
4379     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4380    
4381    
4382     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4383     {
4384     int ds;
4385    
4386     if (d!=s) {
4387     usereg(s);
4388     ds=stackpos(s);
4389     emit_byte(0xd9);
4390     emit_byte(0xc0+ds); /* duplicate source */
4391     emit_byte(0xd9);
4392     emit_byte(0xe0); /* take fchs */
4393     tos_make(d); /* store to destination */
4394     }
4395     else {
4396     make_tos(d);
4397     emit_byte(0xd9);
4398     emit_byte(0xe0); /* take fchs */
4399     }
4400     }
4401     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4402    
4403     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4404     {
4405     int ds;
4406    
4407     usereg(s);
4408     usereg(d);
4409    
4410     if (live.spos[s]==live.tos) {
4411     /* Source is on top of stack */
4412     ds=stackpos(d);
4413     emit_byte(0xdc);
4414     emit_byte(0xc0+ds); /* add source to dest*/
4415     }
4416     else {
4417     make_tos(d);
4418     ds=stackpos(s);
4419    
4420     emit_byte(0xd8);
4421     emit_byte(0xc0+ds); /* add source to dest*/
4422     }
4423     }
4424     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4425    
4426     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4427     {
4428     int ds;
4429    
4430     usereg(s);
4431     usereg(d);
4432    
4433     if (live.spos[s]==live.tos) {
4434     /* Source is on top of stack */
4435     ds=stackpos(d);
4436     emit_byte(0xdc);
4437     emit_byte(0xe8+ds); /* sub source from dest*/
4438     }
4439     else {
4440     make_tos(d);
4441     ds=stackpos(s);
4442    
4443     emit_byte(0xd8);
4444     emit_byte(0xe0+ds); /* sub src from dest */
4445     }
4446     }
4447     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4448    
4449     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4450     {
4451     int ds;
4452    
4453     usereg(s);
4454     usereg(d);
4455    
4456     make_tos(d);
4457     ds=stackpos(s);
4458    
4459     emit_byte(0xdd);
4460     emit_byte(0xe0+ds); /* cmp dest with source*/
4461     }
4462     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4463    
4464     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4465     {
4466     int ds;
4467    
4468     usereg(s);
4469     usereg(d);
4470    
4471     if (live.spos[s]==live.tos) {
4472     /* Source is on top of stack */
4473     ds=stackpos(d);
4474     emit_byte(0xdc);
4475     emit_byte(0xc8+ds); /* mul dest by source*/
4476     }
4477     else {
4478     make_tos(d);
4479     ds=stackpos(s);
4480    
4481     emit_byte(0xd8);
4482     emit_byte(0xc8+ds); /* mul dest by source*/
4483     }
4484     }
4485     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4486    
4487     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4488     {
4489     int ds;
4490    
4491     usereg(s);
4492     usereg(d);
4493    
4494     if (live.spos[s]==live.tos) {
4495     /* Source is on top of stack */
4496     ds=stackpos(d);
4497     emit_byte(0xdc);
4498     emit_byte(0xf8+ds); /* div dest by source */
4499     }
4500     else {
4501     make_tos(d);
4502     ds=stackpos(s);
4503    
4504     emit_byte(0xd8);
4505     emit_byte(0xf0+ds); /* div dest by source*/
4506     }
4507     }
4508     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4509    
4510     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4511     {
4512     int ds;
4513    
4514     usereg(s);
4515     usereg(d);
4516    
4517     make_tos2(d,s);
4518     ds=stackpos(s);
4519    
4520     if (ds!=1) {
4521     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4522     abort();
4523     }
4524     emit_byte(0xd9);
4525     emit_byte(0xf8); /* take rem from dest by source */
4526     }
4527     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4528    
4529     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4530     {
4531     int ds;
4532    
4533     usereg(s);
4534     usereg(d);
4535    
4536     make_tos2(d,s);
4537     ds=stackpos(s);
4538    
4539     if (ds!=1) {
4540     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4541     abort();
4542     }
4543     emit_byte(0xd9);
4544     emit_byte(0xf5); /* take rem1 from dest by source */
4545     }
4546     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4547    
4548    
4549     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4550     {
4551     make_tos(r);
4552     emit_byte(0xd9); /* ftst */
4553     emit_byte(0xe4);
4554     }
4555     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4556    
4557     /* %eax register is clobbered if target processor doesn't support fucomi */
4558     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4559     #define FFLAG_NREG EAX_INDEX
4560    
4561     static __inline__ void raw_fflags_into_flags(int r)
4562     {
4563     int p;
4564    
4565     usereg(r);
4566     p=stackpos(r);
4567    
4568     emit_byte(0xd9);
4569     emit_byte(0xee); /* Push 0 */
4570     emit_byte(0xd9);
4571     emit_byte(0xc9+p); /* swap top two around */
4572     if (have_cmov) {
4573     // gb-- fucomi is for P6 cores only, not K6-2 then...
4574     emit_byte(0xdb);
4575     emit_byte(0xe9+p); /* fucomi them */
4576     }
4577     else {
4578     emit_byte(0xdd);
4579     emit_byte(0xe1+p); /* fucom them */
4580     emit_byte(0x9b);
4581     emit_byte(0xdf);
4582     emit_byte(0xe0); /* fstsw ax */
4583     raw_sahf(0); /* sahf */
4584     }
4585     emit_byte(0xdd);
4586     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4587     }