ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.21
Committed: 2004-11-01T17:12:55Z (19 years, 10 months ago) by gbeauche
Branch: MAIN
Changes since 1.20: +28 -1 lines
Log Message:
- affine need_to_preserve[] to get close to linux/x86_64 ABI
- optimize NOP fillers on x86-64 (based on GNU as implementation)

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 cebix 1.19 * Adaptation for Basilisk II and improvements, copyright 2000-2004
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 cebix 1.19 * Basilisk II (C) 1997-2004 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.1
56     /* The register in which subroutines return an integer return value */
57 gbeauche 1.20 #define REG_RESULT EAX_INDEX
58 gbeauche 1.1
59     /* The registers subroutines take their first and second argument in */
60     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61     /* Handle the _fastcall parameters of ECX and EDX */
62 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
63     #define REG_PAR2 EDX_INDEX
64     #elif defined(__x86_64__)
65     #define REG_PAR1 EDI_INDEX
66     #define REG_PAR2 ESI_INDEX
67 gbeauche 1.1 #else
68 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
69     #define REG_PAR2 EDX_INDEX
70 gbeauche 1.1 #endif
71    
72 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
75 gbeauche 1.1 #else
76 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 gbeauche 1.1 #endif
78    
79 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 gbeauche 1.1 -1 if any reg will do */
81 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83 gbeauche 1.1
84     uae_s8 always_used[]={4,-1};
85 gbeauche 1.20 #if defined(__x86_64__)
86     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88     #else
89 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
90     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 gbeauche 1.20 #endif
92 gbeauche 1.1
93 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
94     /* Make sure interpretive core does not use cpuopti */
95     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 gbeauche 1.20 #error FIXME: code not ready
97 gbeauche 1.17 #else
98 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
99     by the caller */
100 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 gbeauche 1.17 #endif
102 gbeauche 1.1
103     /* This *should* be the same as call_saved. But:
104     - We might not really know which registers are saved, and which aren't,
105     so we need to preserve some, but don't want to rely on everyone else
106     also saving those registers
107     - Special registers (such like the stack pointer) should not be "preserved"
108     by pushing, even though they are "saved" across function calls
109     */
110 gbeauche 1.21 #if defined(__x86_64__)
111     /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
112     static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1};
113     #else
114     static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
115     #endif
116 gbeauche 1.1
117     /* Whether classes of instructions do or don't clobber the native flags */
118     #define CLOBBER_MOV
119     #define CLOBBER_LEA
120     #define CLOBBER_CMOV
121     #define CLOBBER_POP
122     #define CLOBBER_PUSH
123     #define CLOBBER_SUB clobber_flags()
124     #define CLOBBER_SBB clobber_flags()
125     #define CLOBBER_CMP clobber_flags()
126     #define CLOBBER_ADD clobber_flags()
127     #define CLOBBER_ADC clobber_flags()
128     #define CLOBBER_AND clobber_flags()
129     #define CLOBBER_OR clobber_flags()
130     #define CLOBBER_XOR clobber_flags()
131    
132     #define CLOBBER_ROL clobber_flags()
133     #define CLOBBER_ROR clobber_flags()
134     #define CLOBBER_SHLL clobber_flags()
135     #define CLOBBER_SHRL clobber_flags()
136     #define CLOBBER_SHRA clobber_flags()
137     #define CLOBBER_TEST clobber_flags()
138     #define CLOBBER_CL16
139     #define CLOBBER_CL8
140 gbeauche 1.20 #define CLOBBER_SE32
141 gbeauche 1.1 #define CLOBBER_SE16
142     #define CLOBBER_SE8
143 gbeauche 1.20 #define CLOBBER_ZE32
144 gbeauche 1.1 #define CLOBBER_ZE16
145     #define CLOBBER_ZE8
146     #define CLOBBER_SW16 clobber_flags()
147     #define CLOBBER_SW32
148     #define CLOBBER_SETCC
149     #define CLOBBER_MUL clobber_flags()
150     #define CLOBBER_BT clobber_flags()
151     #define CLOBBER_BSF clobber_flags()
152    
153 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
154 gbeauche 1.20 #if defined(__x86_64__)
155     #define USE_NEW_RTASM 1
156     #endif
157    
158     #if USE_NEW_RTASM
159 gbeauche 1.13
160     #if defined(__x86_64__)
161     #define X86_TARGET_64BIT 1
162     #endif
163     #define X86_FLAT_REGISTERS 0
164 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
165     #define X86_OPTIMIZE_ROTSHI 1
166 gbeauche 1.13 #include "codegen_x86.h"
167    
168     #define x86_emit_byte(B) emit_byte(B)
169     #define x86_emit_word(W) emit_word(W)
170     #define x86_emit_long(L) emit_long(L)
171 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
172 gbeauche 1.13 #define x86_get_target() get_target()
173     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
174    
175     static void jit_fail(const char *msg, const char *file, int line, const char *function)
176     {
177     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
178     function, file, line, msg);
179     abort();
180     }
181    
182     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
183     {
184 gbeauche 1.20 #if defined(__x86_64__)
185     PUSHQr(r);
186     #else
187 gbeauche 1.13 PUSHLr(r);
188 gbeauche 1.20 #endif
189 gbeauche 1.13 }
190     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
191    
192     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
193     {
194 gbeauche 1.20 #if defined(__x86_64__)
195     POPQr(r);
196     #else
197 gbeauche 1.13 POPLr(r);
198 gbeauche 1.20 #endif
199 gbeauche 1.13 }
200     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
201    
202     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
203     {
204     BTLir(i, r);
205     }
206     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
207    
208     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
209     {
210     BTLrr(b, r);
211     }
212     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
213    
214     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
215     {
216     BTCLir(i, r);
217     }
218     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
219    
220     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
221     {
222     BTCLrr(b, r);
223     }
224     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
225    
226     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
227     {
228     BTRLir(i, r);
229     }
230     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
231    
232     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
233     {
234     BTRLrr(b, r);
235     }
236     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
237    
238     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
239     {
240     BTSLir(i, r);
241     }
242     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
243    
244     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
245     {
246     BTSLrr(b, r);
247     }
248     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
249    
250     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
251     {
252     SUBWir(i, d);
253     }
254     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
255    
256     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
257     {
258     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
259     }
260     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
261    
262     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
263     {
264     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
265     }
266     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
267    
268     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
269     {
270     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
271     }
272     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
273    
274     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
275     {
276     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
277     }
278     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
279    
280     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
281     {
282     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
283     }
284     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
285    
286     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
287     {
288     ROLBir(i, r);
289     }
290     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
291    
292     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
293     {
294     ROLWir(i, r);
295     }
296     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
297    
298     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
299     {
300     ROLLir(i, r);
301     }
302     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
303    
304     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
305     {
306     ROLLrr(r, d);
307     }
308     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
309    
310     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
311     {
312     ROLWrr(r, d);
313     }
314     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
315    
316     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
317     {
318     ROLBrr(r, d);
319     }
320     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
321    
322     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
323     {
324     SHLLrr(r, d);
325     }
326     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
327    
328     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
329     {
330     SHLWrr(r, d);
331     }
332     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
333    
334     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
335     {
336     SHLBrr(r, d);
337     }
338     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
339    
340     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
341     {
342     RORBir(i, r);
343     }
344     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
345    
346     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
347     {
348     RORWir(i, r);
349     }
350     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
351    
352     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
353     {
354     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
355     }
356     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
357    
358     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
359     {
360     RORLir(i, r);
361     }
362     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
363    
364     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
365     {
366     RORLrr(r, d);
367     }
368     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
369    
370     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
371     {
372     RORWrr(r, d);
373     }
374     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
375    
376     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
377     {
378     RORBrr(r, d);
379     }
380     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
381    
382     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
383     {
384     SHRLrr(r, d);
385     }
386     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
387    
388     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
389     {
390     SHRWrr(r, d);
391     }
392     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
393    
394     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
395     {
396     SHRBrr(r, d);
397     }
398     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
399    
400     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
401     {
402 gbeauche 1.14 SARLrr(r, d);
403 gbeauche 1.13 }
404     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
405    
406     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
407     {
408 gbeauche 1.14 SARWrr(r, d);
409 gbeauche 1.13 }
410     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
411    
412     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
413     {
414 gbeauche 1.14 SARBrr(r, d);
415 gbeauche 1.13 }
416     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
417    
418     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
419     {
420     SHLLir(i, r);
421     }
422     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
423    
424     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
425     {
426     SHLWir(i, r);
427     }
428     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
429    
430     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
431     {
432     SHLBir(i, r);
433     }
434     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
435    
436     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
437     {
438     SHRLir(i, r);
439     }
440     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
441    
442     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
443     {
444     SHRWir(i, r);
445     }
446     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
447    
448     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
449     {
450     SHRBir(i, r);
451     }
452     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
453    
454     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
455     {
456 gbeauche 1.14 SARLir(i, r);
457 gbeauche 1.13 }
458     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
459    
460     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
461     {
462 gbeauche 1.14 SARWir(i, r);
463 gbeauche 1.13 }
464     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
465    
466     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
467     {
468 gbeauche 1.14 SARBir(i, r);
469 gbeauche 1.13 }
470     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
471    
472     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
473     {
474     SAHF();
475     }
476     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
477    
478     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
479     {
480     CPUID();
481     }
482     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
483    
484     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
485     {
486     LAHF();
487     }
488     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
489    
490     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
491     {
492     SETCCir(cc, d);
493     }
494     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
495    
496     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
497     {
498     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
499     }
500     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
501    
502     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
503     {
504 gbeauche 1.15 if (have_cmov)
505     CMOVLrr(cc, s, d);
506     else { /* replacement using branch and mov */
507     #if defined(__x86_64__)
508     write_log("x86-64 implementations are bound to have CMOV!\n");
509     abort();
510     #endif
511     JCCSii(cc^1, 2);
512     MOVLrr(s, d);
513     }
514 gbeauche 1.13 }
515     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
516    
517     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
518     {
519     BSFLrr(s, d);
520     }
521     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
522    
523 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
524     {
525     MOVSLQrr(s, d);
526     }
527     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
528    
529 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
530     {
531     MOVSWLrr(s, d);
532     }
533     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
534    
535     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
536     {
537     MOVSBLrr(s, d);
538     }
539     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
540    
541     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
542     {
543     MOVZWLrr(s, d);
544     }
545     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
546    
547     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
548     {
549     MOVZBLrr(s, d);
550     }
551     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
552    
553     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
554     {
555 gbeauche 1.14 IMULLrr(s, d);
556 gbeauche 1.13 }
557     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
558    
559     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
560     {
561 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
562     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
563 gbeauche 1.13 abort();
564 gbeauche 1.14 }
565     IMULLr(s);
566 gbeauche 1.13 }
567     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
568    
569     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
570     {
571 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
572     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
573 gbeauche 1.13 abort();
574 gbeauche 1.14 }
575     MULLr(s);
576 gbeauche 1.13 }
577     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
578    
579     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
580     {
581 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
582 gbeauche 1.13 }
583     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
584    
585     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
586     {
587     MOVBrr(s, d);
588     }
589     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
590    
591     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
592     {
593     MOVWrr(s, d);
594     }
595     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
596    
597     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
598     {
599     MOVLmr(0, baser, index, factor, d);
600     }
601     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
602    
603     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
604     {
605     MOVWmr(0, baser, index, factor, d);
606     }
607     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
608    
609     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
610     {
611     MOVBmr(0, baser, index, factor, d);
612     }
613     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
614    
615     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
616     {
617     MOVLrm(s, 0, baser, index, factor);
618     }
619     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
620    
621     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
622     {
623     MOVWrm(s, 0, baser, index, factor);
624     }
625     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
626    
627     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
628     {
629     MOVBrm(s, 0, baser, index, factor);
630     }
631     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
632    
633     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
634     {
635     MOVLrm(s, base, baser, index, factor);
636     }
637     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
638    
639     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
640     {
641     MOVWrm(s, base, baser, index, factor);
642     }
643     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
644    
645     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
646     {
647     MOVBrm(s, base, baser, index, factor);
648     }
649     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
650    
651     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
652     {
653     MOVLmr(base, baser, index, factor, d);
654     }
655     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
656    
657     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
658     {
659     MOVWmr(base, baser, index, factor, d);
660     }
661     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
662    
663     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
664     {
665     MOVBmr(base, baser, index, factor, d);
666     }
667     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
668    
669     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
670     {
671     MOVLmr(base, X86_NOREG, index, factor, d);
672     }
673     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
674    
675     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
676     {
677 gbeauche 1.15 if (have_cmov)
678     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
679     else { /* replacement using branch and mov */
680     #if defined(__x86_64__)
681     write_log("x86-64 implementations are bound to have CMOV!\n");
682     abort();
683     #endif
684     JCCSii(cond^1, 7);
685     MOVLmr(base, X86_NOREG, index, factor, d);
686     }
687 gbeauche 1.13 }
688     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
689    
690     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
691     {
692 gbeauche 1.15 if (have_cmov)
693     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
694     else { /* replacement using branch and mov */
695     #if defined(__x86_64__)
696     write_log("x86-64 implementations are bound to have CMOV!\n");
697     abort();
698     #endif
699     JCCSii(cond^1, 6);
700     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
701     }
702 gbeauche 1.13 }
703     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
704    
705     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
706     {
707     MOVLmr(offset, s, X86_NOREG, 1, d);
708     }
709     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
710    
711     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
712     {
713     MOVWmr(offset, s, X86_NOREG, 1, d);
714     }
715     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
716    
717     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
718     {
719     MOVBmr(offset, s, X86_NOREG, 1, d);
720     }
721     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
722    
723     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
724     {
725     MOVLmr(offset, s, X86_NOREG, 1, d);
726     }
727     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
728    
729     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
730     {
731     MOVWmr(offset, s, X86_NOREG, 1, d);
732     }
733     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
734    
735     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
736     {
737     MOVBmr(offset, s, X86_NOREG, 1, d);
738     }
739     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
740    
741     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
742     {
743     MOVLim(i, offset, d, X86_NOREG, 1);
744     }
745     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
746    
747     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
748     {
749     MOVWim(i, offset, d, X86_NOREG, 1);
750     }
751     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
752    
753     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
754     {
755     MOVBim(i, offset, d, X86_NOREG, 1);
756     }
757     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
758    
759     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
760     {
761     MOVLrm(s, offset, d, X86_NOREG, 1);
762     }
763     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
764    
765     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
766     {
767     MOVWrm(s, offset, d, X86_NOREG, 1);
768     }
769     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
770    
771     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
772     {
773     MOVBrm(s, offset, d, X86_NOREG, 1);
774     }
775     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
776    
777     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
778     {
779     LEALmr(offset, s, X86_NOREG, 1, d);
780     }
781     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
782    
783     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
784     {
785     LEALmr(offset, s, index, factor, d);
786     }
787     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
788    
789     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
790     {
791     LEALmr(0, s, index, factor, d);
792     }
793     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
794    
795     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
796     {
797     MOVLrm(s, offset, d, X86_NOREG, 1);
798     }
799     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
800    
801     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
802     {
803     MOVWrm(s, offset, d, X86_NOREG, 1);
804     }
805     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
806    
807     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
808     {
809     MOVBrm(s, offset, d, X86_NOREG, 1);
810     }
811     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
812    
813     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
814     {
815     BSWAPLr(r);
816     }
817     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
818    
819     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
820     {
821     ROLWir(8, r);
822     }
823     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
824    
825     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
826     {
827     MOVLrr(s, d);
828     }
829     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
830    
831     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
832     {
833     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
834     }
835     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
836    
837     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
838     {
839     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
840     }
841     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
842    
843     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
844     {
845     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
846     }
847     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
848    
849     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
850     {
851     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
852     }
853     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
854    
855     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
856     {
857     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
858     }
859     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
860    
861     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
862     {
863     MOVLir(s, d);
864     }
865     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
866    
867     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
868     {
869     MOVWir(s, d);
870     }
871     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
872    
873     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
874     {
875     MOVBir(s, d);
876     }
877     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
878    
879     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
880     {
881     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
882     }
883     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
884    
885     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
886     {
887     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
888     }
889     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
890    
891     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
892     {
893     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
894     }
895     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
896    
897     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
898     {
899     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
900     }
901     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
902    
903     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
904     {
905     TESTLir(i, d);
906     }
907     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
908    
909     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
910     {
911     TESTLrr(s, d);
912     }
913     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
914    
915     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
916     {
917     TESTWrr(s, d);
918     }
919     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
920    
921     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
922     {
923     TESTBrr(s, d);
924     }
925     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
926    
927     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
928     {
929     ANDLir(i, d);
930     }
931     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
932    
933     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
934     {
935     ANDWir(i, d);
936     }
937     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
938    
939     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
940     {
941     ANDLrr(s, d);
942     }
943     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
944    
945     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
946     {
947     ANDWrr(s, d);
948     }
949     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
950    
951     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
952     {
953     ANDBrr(s, d);
954     }
955     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
956    
957     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
958     {
959     ORLir(i, d);
960     }
961     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
962    
963     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
964     {
965     ORLrr(s, d);
966     }
967     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
968    
969     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
970     {
971     ORWrr(s, d);
972     }
973     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
974    
975     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
976     {
977     ORBrr(s, d);
978     }
979     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
980    
981     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
982     {
983     ADCLrr(s, d);
984     }
985     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
986    
987     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
988     {
989     ADCWrr(s, d);
990     }
991     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
992    
993     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
994     {
995     ADCBrr(s, d);
996     }
997     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
998    
999     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1000     {
1001     ADDLrr(s, d);
1002     }
1003     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1004    
1005     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1006     {
1007     ADDWrr(s, d);
1008     }
1009     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1010    
1011     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1012     {
1013     ADDBrr(s, d);
1014     }
1015     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1016    
1017     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1018     {
1019     SUBLir(i, d);
1020     }
1021     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1022    
1023     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1024     {
1025     SUBBir(i, d);
1026     }
1027     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1028    
1029     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1030     {
1031     ADDLir(i, d);
1032     }
1033     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1034    
1035     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1036     {
1037     ADDWir(i, d);
1038     }
1039     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1040    
1041     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1042     {
1043     ADDBir(i, d);
1044     }
1045     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1046    
1047     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1048     {
1049     SBBLrr(s, d);
1050     }
1051     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1052    
1053     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1054     {
1055     SBBWrr(s, d);
1056     }
1057     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1058    
1059     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1060     {
1061     SBBBrr(s, d);
1062     }
1063     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1064    
1065     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1066     {
1067     SUBLrr(s, d);
1068     }
1069     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1070    
1071     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1072     {
1073     SUBWrr(s, d);
1074     }
1075     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1076    
1077     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1078     {
1079     SUBBrr(s, d);
1080     }
1081     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1082    
1083     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1084     {
1085     CMPLrr(s, d);
1086     }
1087     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1088    
1089     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1090     {
1091     CMPLir(i, r);
1092     }
1093     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1094    
1095     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1096     {
1097     CMPWrr(s, d);
1098     }
1099     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1100    
1101     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1102     {
1103     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1104     }
1105     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1106    
1107     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1108     {
1109     CMPBir(i, d);
1110     }
1111     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1112    
1113     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1114     {
1115     CMPBrr(s, d);
1116     }
1117     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1118    
1119     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1120     {
1121     CMPLmr(offset, X86_NOREG, index, factor, d);
1122     }
1123     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1124    
1125     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1126     {
1127     XORLrr(s, d);
1128     }
1129     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1130    
1131     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1132     {
1133     XORWrr(s, d);
1134     }
1135     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1136    
1137     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1138     {
1139     XORBrr(s, d);
1140     }
1141     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1142    
1143     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1144     {
1145     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1146     }
1147     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1148    
1149     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1150     {
1151     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1152     }
1153     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1154    
1155     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1156     {
1157     XCHGLrr(r2, r1);
1158     }
1159     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1160    
1161     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1162     {
1163 gbeauche 1.18 PUSHF();
1164 gbeauche 1.13 }
1165     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1166    
1167     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1168     {
1169 gbeauche 1.18 POPF();
1170 gbeauche 1.13 }
1171     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1172    
1173     #else
1174    
1175 gbeauche 1.2 const bool optimize_accum = true;
1176 gbeauche 1.1 const bool optimize_imm8 = true;
1177     const bool optimize_shift_once = true;
1178    
1179     /*************************************************************************
1180     * Actual encoding of the instructions on the target CPU *
1181     *************************************************************************/
1182    
1183 gbeauche 1.2 static __inline__ int isaccum(int r)
1184     {
1185     return (r == EAX_INDEX);
1186     }
1187    
1188 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1189     {
1190     return (x>=-128 && x<=127);
1191     }
1192    
1193     static __inline__ int isword(uae_s32 x)
1194     {
1195     return (x>=-32768 && x<=32767);
1196     }
1197    
1198     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1199     {
1200     emit_byte(0x50+r);
1201     }
1202     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1203    
1204     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1205     {
1206     emit_byte(0x58+r);
1207     }
1208     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1209    
1210     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1211     {
1212     emit_byte(0x0f);
1213     emit_byte(0xba);
1214     emit_byte(0xe0+r);
1215     emit_byte(i);
1216     }
1217     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1218    
1219     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1220     {
1221     emit_byte(0x0f);
1222     emit_byte(0xa3);
1223     emit_byte(0xc0+8*b+r);
1224     }
1225     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1226    
1227     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1228     {
1229     emit_byte(0x0f);
1230     emit_byte(0xba);
1231     emit_byte(0xf8+r);
1232     emit_byte(i);
1233     }
1234     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1235    
1236     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1237     {
1238     emit_byte(0x0f);
1239     emit_byte(0xbb);
1240     emit_byte(0xc0+8*b+r);
1241     }
1242     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1243    
1244    
1245     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1246     {
1247     emit_byte(0x0f);
1248     emit_byte(0xba);
1249     emit_byte(0xf0+r);
1250     emit_byte(i);
1251     }
1252     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1253    
1254     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1255     {
1256     emit_byte(0x0f);
1257     emit_byte(0xb3);
1258     emit_byte(0xc0+8*b+r);
1259     }
1260     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1261    
1262     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1263     {
1264     emit_byte(0x0f);
1265     emit_byte(0xba);
1266     emit_byte(0xe8+r);
1267     emit_byte(i);
1268     }
1269     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1270    
1271     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1272     {
1273     emit_byte(0x0f);
1274     emit_byte(0xab);
1275     emit_byte(0xc0+8*b+r);
1276     }
1277     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1278    
1279     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1280     {
1281     emit_byte(0x66);
1282     if (isbyte(i)) {
1283     emit_byte(0x83);
1284     emit_byte(0xe8+d);
1285     emit_byte(i);
1286     }
1287     else {
1288 gbeauche 1.2 if (optimize_accum && isaccum(d))
1289     emit_byte(0x2d);
1290     else {
1291 gbeauche 1.1 emit_byte(0x81);
1292     emit_byte(0xe8+d);
1293 gbeauche 1.2 }
1294 gbeauche 1.1 emit_word(i);
1295     }
1296     }
1297     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1298    
1299    
1300     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1301     {
1302     emit_byte(0x8b);
1303     emit_byte(0x05+8*d);
1304     emit_long(s);
1305     }
1306     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1307    
1308     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1309     {
1310     emit_byte(0xc7);
1311     emit_byte(0x05);
1312     emit_long(d);
1313     emit_long(s);
1314     }
1315     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1316    
1317     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1318     {
1319     emit_byte(0x66);
1320     emit_byte(0xc7);
1321     emit_byte(0x05);
1322     emit_long(d);
1323     emit_word(s);
1324     }
1325     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1326    
1327     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1328     {
1329     emit_byte(0xc6);
1330     emit_byte(0x05);
1331     emit_long(d);
1332     emit_byte(s);
1333     }
1334     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1335    
1336     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1337     {
1338     if (optimize_shift_once && (i == 1)) {
1339     emit_byte(0xd0);
1340     emit_byte(0x05);
1341     emit_long(d);
1342     }
1343     else {
1344     emit_byte(0xc0);
1345     emit_byte(0x05);
1346     emit_long(d);
1347     emit_byte(i);
1348     }
1349     }
1350     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1351    
1352     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1353     {
1354     if (optimize_shift_once && (i == 1)) {
1355     emit_byte(0xd0);
1356     emit_byte(0xc0+r);
1357     }
1358     else {
1359     emit_byte(0xc0);
1360     emit_byte(0xc0+r);
1361     emit_byte(i);
1362     }
1363     }
1364     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1365    
1366     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1367     {
1368     emit_byte(0x66);
1369     emit_byte(0xc1);
1370     emit_byte(0xc0+r);
1371     emit_byte(i);
1372     }
1373     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1374    
1375     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1376     {
1377     if (optimize_shift_once && (i == 1)) {
1378     emit_byte(0xd1);
1379     emit_byte(0xc0+r);
1380     }
1381     else {
1382     emit_byte(0xc1);
1383     emit_byte(0xc0+r);
1384     emit_byte(i);
1385     }
1386     }
1387     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1388    
1389     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1390     {
1391     emit_byte(0xd3);
1392     emit_byte(0xc0+d);
1393     }
1394     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1395    
1396     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1397     {
1398     emit_byte(0x66);
1399     emit_byte(0xd3);
1400     emit_byte(0xc0+d);
1401     }
1402     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1403    
1404     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1405     {
1406     emit_byte(0xd2);
1407     emit_byte(0xc0+d);
1408     }
1409     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1410    
1411     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1412     {
1413     emit_byte(0xd3);
1414     emit_byte(0xe0+d);
1415     }
1416     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1417    
1418     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1419     {
1420     emit_byte(0x66);
1421     emit_byte(0xd3);
1422     emit_byte(0xe0+d);
1423     }
1424     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1425    
1426     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1427     {
1428     emit_byte(0xd2);
1429     emit_byte(0xe0+d);
1430     }
1431     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1432    
1433     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1434     {
1435     if (optimize_shift_once && (i == 1)) {
1436     emit_byte(0xd0);
1437     emit_byte(0xc8+r);
1438     }
1439     else {
1440     emit_byte(0xc0);
1441     emit_byte(0xc8+r);
1442     emit_byte(i);
1443     }
1444     }
1445     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1446    
1447     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1448     {
1449     emit_byte(0x66);
1450     emit_byte(0xc1);
1451     emit_byte(0xc8+r);
1452     emit_byte(i);
1453     }
1454     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1455    
1456     // gb-- used for making an fpcr value in compemu_fpp.cpp
1457     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1458     {
1459     emit_byte(0x0b);
1460     emit_byte(0x05+8*d);
1461     emit_long(s);
1462     }
1463     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1464    
1465     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1466     {
1467     if (optimize_shift_once && (i == 1)) {
1468     emit_byte(0xd1);
1469     emit_byte(0xc8+r);
1470     }
1471     else {
1472     emit_byte(0xc1);
1473     emit_byte(0xc8+r);
1474     emit_byte(i);
1475     }
1476     }
1477     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1478    
1479     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1480     {
1481     emit_byte(0xd3);
1482     emit_byte(0xc8+d);
1483     }
1484     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1485    
1486     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1487     {
1488     emit_byte(0x66);
1489     emit_byte(0xd3);
1490     emit_byte(0xc8+d);
1491     }
1492     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1493    
1494     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1495     {
1496     emit_byte(0xd2);
1497     emit_byte(0xc8+d);
1498     }
1499     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1500    
1501     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1502     {
1503     emit_byte(0xd3);
1504     emit_byte(0xe8+d);
1505     }
1506     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1507    
1508     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1509     {
1510     emit_byte(0x66);
1511     emit_byte(0xd3);
1512     emit_byte(0xe8+d);
1513     }
1514     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1515    
1516     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1517     {
1518     emit_byte(0xd2);
1519     emit_byte(0xe8+d);
1520     }
1521     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1522    
1523     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1524     {
1525     emit_byte(0xd3);
1526     emit_byte(0xf8+d);
1527     }
1528     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1529    
1530     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1531     {
1532     emit_byte(0x66);
1533     emit_byte(0xd3);
1534     emit_byte(0xf8+d);
1535     }
1536     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1537    
1538     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1539     {
1540     emit_byte(0xd2);
1541     emit_byte(0xf8+d);
1542     }
1543     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1544    
1545     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1546     {
1547     if (optimize_shift_once && (i == 1)) {
1548     emit_byte(0xd1);
1549     emit_byte(0xe0+r);
1550     }
1551     else {
1552     emit_byte(0xc1);
1553     emit_byte(0xe0+r);
1554     emit_byte(i);
1555     }
1556     }
1557     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1558    
1559     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1560     {
1561     emit_byte(0x66);
1562     emit_byte(0xc1);
1563     emit_byte(0xe0+r);
1564     emit_byte(i);
1565     }
1566     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1567    
1568     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1569     {
1570     if (optimize_shift_once && (i == 1)) {
1571     emit_byte(0xd0);
1572     emit_byte(0xe0+r);
1573     }
1574     else {
1575     emit_byte(0xc0);
1576     emit_byte(0xe0+r);
1577     emit_byte(i);
1578     }
1579     }
1580     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1581    
1582     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1583     {
1584     if (optimize_shift_once && (i == 1)) {
1585     emit_byte(0xd1);
1586     emit_byte(0xe8+r);
1587     }
1588     else {
1589     emit_byte(0xc1);
1590     emit_byte(0xe8+r);
1591     emit_byte(i);
1592     }
1593     }
1594     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1595    
1596     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1597     {
1598     emit_byte(0x66);
1599     emit_byte(0xc1);
1600     emit_byte(0xe8+r);
1601     emit_byte(i);
1602     }
1603     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1604    
1605     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1606     {
1607     if (optimize_shift_once && (i == 1)) {
1608     emit_byte(0xd0);
1609     emit_byte(0xe8+r);
1610     }
1611     else {
1612     emit_byte(0xc0);
1613     emit_byte(0xe8+r);
1614     emit_byte(i);
1615     }
1616     }
1617     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1618    
1619     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1620     {
1621     if (optimize_shift_once && (i == 1)) {
1622     emit_byte(0xd1);
1623     emit_byte(0xf8+r);
1624     }
1625     else {
1626     emit_byte(0xc1);
1627     emit_byte(0xf8+r);
1628     emit_byte(i);
1629     }
1630     }
1631     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1632    
1633     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1634     {
1635     emit_byte(0x66);
1636     emit_byte(0xc1);
1637     emit_byte(0xf8+r);
1638     emit_byte(i);
1639     }
1640     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1641    
1642     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1643     {
1644     if (optimize_shift_once && (i == 1)) {
1645     emit_byte(0xd0);
1646     emit_byte(0xf8+r);
1647     }
1648     else {
1649     emit_byte(0xc0);
1650     emit_byte(0xf8+r);
1651     emit_byte(i);
1652     }
1653     }
1654     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1655    
1656     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1657     {
1658     emit_byte(0x9e);
1659     }
1660     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1661    
1662     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1663     {
1664     emit_byte(0x0f);
1665     emit_byte(0xa2);
1666     }
1667     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1668    
1669     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1670     {
1671     emit_byte(0x9f);
1672     }
1673     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1674    
1675     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1676     {
1677     emit_byte(0x0f);
1678     emit_byte(0x90+cc);
1679     emit_byte(0xc0+d);
1680     }
1681     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1682    
1683     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1684     {
1685     emit_byte(0x0f);
1686     emit_byte(0x90+cc);
1687     emit_byte(0x05);
1688     emit_long(d);
1689     }
1690     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1691    
1692     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1693     {
1694     if (have_cmov) {
1695     emit_byte(0x0f);
1696     emit_byte(0x40+cc);
1697     emit_byte(0xc0+8*d+s);
1698     }
1699     else { /* replacement using branch and mov */
1700     int uncc=(cc^1);
1701     emit_byte(0x70+uncc);
1702     emit_byte(2); /* skip next 2 bytes if not cc=true */
1703     emit_byte(0x89);
1704     emit_byte(0xc0+8*s+d);
1705     }
1706     }
1707     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1708    
1709     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1710     {
1711     emit_byte(0x0f);
1712     emit_byte(0xbc);
1713     emit_byte(0xc0+8*d+s);
1714     }
1715     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1716    
1717     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1718     {
1719     emit_byte(0x0f);
1720     emit_byte(0xbf);
1721     emit_byte(0xc0+8*d+s);
1722     }
1723     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1724    
1725     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1726     {
1727     emit_byte(0x0f);
1728     emit_byte(0xbe);
1729     emit_byte(0xc0+8*d+s);
1730     }
1731     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1732    
1733     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1734     {
1735     emit_byte(0x0f);
1736     emit_byte(0xb7);
1737     emit_byte(0xc0+8*d+s);
1738     }
1739     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1740    
1741     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1742     {
1743     emit_byte(0x0f);
1744     emit_byte(0xb6);
1745     emit_byte(0xc0+8*d+s);
1746     }
1747     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1748    
1749     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1750     {
1751     emit_byte(0x0f);
1752     emit_byte(0xaf);
1753     emit_byte(0xc0+8*d+s);
1754     }
1755     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1756    
1757     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1758     {
1759     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1760     abort();
1761     emit_byte(0xf7);
1762     emit_byte(0xea);
1763     }
1764     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1765    
1766     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1767     {
1768     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1769     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1770     abort();
1771     }
1772     emit_byte(0xf7);
1773     emit_byte(0xe2);
1774     }
1775     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1776    
1777     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1778     {
1779     abort(); /* %^$&%^$%#^ x86! */
1780     emit_byte(0x0f);
1781     emit_byte(0xaf);
1782     emit_byte(0xc0+8*d+s);
1783     }
1784     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1785    
1786     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1787     {
1788     emit_byte(0x88);
1789     emit_byte(0xc0+8*s+d);
1790     }
1791     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1792    
1793     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1794     {
1795     emit_byte(0x66);
1796     emit_byte(0x89);
1797     emit_byte(0xc0+8*s+d);
1798     }
1799     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1800    
1801     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1802     {
1803     int isebp=(baser==5)?0x40:0;
1804     int fi;
1805    
1806     switch(factor) {
1807     case 1: fi=0; break;
1808     case 2: fi=1; break;
1809     case 4: fi=2; break;
1810     case 8: fi=3; break;
1811     default: abort();
1812     }
1813    
1814    
1815     emit_byte(0x8b);
1816     emit_byte(0x04+8*d+isebp);
1817     emit_byte(baser+8*index+0x40*fi);
1818     if (isebp)
1819     emit_byte(0x00);
1820     }
1821     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1822    
1823     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1824     {
1825     int fi;
1826     int isebp;
1827    
1828     switch(factor) {
1829     case 1: fi=0; break;
1830     case 2: fi=1; break;
1831     case 4: fi=2; break;
1832     case 8: fi=3; break;
1833     default: abort();
1834     }
1835     isebp=(baser==5)?0x40:0;
1836    
1837     emit_byte(0x66);
1838     emit_byte(0x8b);
1839     emit_byte(0x04+8*d+isebp);
1840     emit_byte(baser+8*index+0x40*fi);
1841     if (isebp)
1842     emit_byte(0x00);
1843     }
1844     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1845    
1846     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1847     {
1848     int fi;
1849     int isebp;
1850    
1851     switch(factor) {
1852     case 1: fi=0; break;
1853     case 2: fi=1; break;
1854     case 4: fi=2; break;
1855     case 8: fi=3; break;
1856     default: abort();
1857     }
1858     isebp=(baser==5)?0x40:0;
1859    
1860     emit_byte(0x8a);
1861     emit_byte(0x04+8*d+isebp);
1862     emit_byte(baser+8*index+0x40*fi);
1863     if (isebp)
1864     emit_byte(0x00);
1865     }
1866     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1867    
1868     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1869     {
1870     int fi;
1871     int isebp;
1872    
1873     switch(factor) {
1874     case 1: fi=0; break;
1875     case 2: fi=1; break;
1876     case 4: fi=2; break;
1877     case 8: fi=3; break;
1878     default: abort();
1879     }
1880    
1881    
1882     isebp=(baser==5)?0x40:0;
1883    
1884     emit_byte(0x89);
1885     emit_byte(0x04+8*s+isebp);
1886     emit_byte(baser+8*index+0x40*fi);
1887     if (isebp)
1888     emit_byte(0x00);
1889     }
1890     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1891    
1892     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1893     {
1894     int fi;
1895     int isebp;
1896    
1897     switch(factor) {
1898     case 1: fi=0; break;
1899     case 2: fi=1; break;
1900     case 4: fi=2; break;
1901     case 8: fi=3; break;
1902     default: abort();
1903     }
1904     isebp=(baser==5)?0x40:0;
1905    
1906     emit_byte(0x66);
1907     emit_byte(0x89);
1908     emit_byte(0x04+8*s+isebp);
1909     emit_byte(baser+8*index+0x40*fi);
1910     if (isebp)
1911     emit_byte(0x00);
1912     }
1913     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1914    
1915     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1916     {
1917     int fi;
1918     int isebp;
1919    
1920     switch(factor) {
1921     case 1: fi=0; break;
1922     case 2: fi=1; break;
1923     case 4: fi=2; break;
1924     case 8: fi=3; break;
1925     default: abort();
1926     }
1927     isebp=(baser==5)?0x40:0;
1928    
1929     emit_byte(0x88);
1930     emit_byte(0x04+8*s+isebp);
1931     emit_byte(baser+8*index+0x40*fi);
1932     if (isebp)
1933     emit_byte(0x00);
1934     }
1935     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1936    
1937     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1938     {
1939     int fi;
1940    
1941     switch(factor) {
1942     case 1: fi=0; break;
1943     case 2: fi=1; break;
1944     case 4: fi=2; break;
1945     case 8: fi=3; break;
1946     default: abort();
1947     }
1948    
1949     emit_byte(0x89);
1950     emit_byte(0x84+8*s);
1951     emit_byte(baser+8*index+0x40*fi);
1952     emit_long(base);
1953     }
1954     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1955    
1956     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1957     {
1958     int fi;
1959    
1960     switch(factor) {
1961     case 1: fi=0; break;
1962     case 2: fi=1; break;
1963     case 4: fi=2; break;
1964     case 8: fi=3; break;
1965     default: abort();
1966     }
1967    
1968     emit_byte(0x66);
1969     emit_byte(0x89);
1970     emit_byte(0x84+8*s);
1971     emit_byte(baser+8*index+0x40*fi);
1972     emit_long(base);
1973     }
1974     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1975    
1976     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1977     {
1978     int fi;
1979    
1980     switch(factor) {
1981     case 1: fi=0; break;
1982     case 2: fi=1; break;
1983     case 4: fi=2; break;
1984     case 8: fi=3; break;
1985     default: abort();
1986     }
1987    
1988     emit_byte(0x88);
1989     emit_byte(0x84+8*s);
1990     emit_byte(baser+8*index+0x40*fi);
1991     emit_long(base);
1992     }
1993     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1994    
1995     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
1996     {
1997     int fi;
1998    
1999     switch(factor) {
2000     case 1: fi=0; break;
2001     case 2: fi=1; break;
2002     case 4: fi=2; break;
2003     case 8: fi=3; break;
2004     default: abort();
2005     }
2006    
2007     emit_byte(0x8b);
2008     emit_byte(0x84+8*d);
2009     emit_byte(baser+8*index+0x40*fi);
2010     emit_long(base);
2011     }
2012     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2013    
2014     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2015     {
2016     int fi;
2017    
2018     switch(factor) {
2019     case 1: fi=0; break;
2020     case 2: fi=1; break;
2021     case 4: fi=2; break;
2022     case 8: fi=3; break;
2023     default: abort();
2024     }
2025    
2026     emit_byte(0x66);
2027     emit_byte(0x8b);
2028     emit_byte(0x84+8*d);
2029     emit_byte(baser+8*index+0x40*fi);
2030     emit_long(base);
2031     }
2032     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2033    
2034     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2035     {
2036     int fi;
2037    
2038     switch(factor) {
2039     case 1: fi=0; break;
2040     case 2: fi=1; break;
2041     case 4: fi=2; break;
2042     case 8: fi=3; break;
2043     default: abort();
2044     }
2045    
2046     emit_byte(0x8a);
2047     emit_byte(0x84+8*d);
2048     emit_byte(baser+8*index+0x40*fi);
2049     emit_long(base);
2050     }
2051     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2052    
2053     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2054     {
2055     int fi;
2056     switch(factor) {
2057     case 1: fi=0; break;
2058     case 2: fi=1; break;
2059     case 4: fi=2; break;
2060     case 8: fi=3; break;
2061     default:
2062     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2063     abort();
2064     }
2065     emit_byte(0x8b);
2066     emit_byte(0x04+8*d);
2067     emit_byte(0x05+8*index+64*fi);
2068     emit_long(base);
2069     }
2070     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2071    
2072     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2073     {
2074     int fi;
2075     switch(factor) {
2076     case 1: fi=0; break;
2077     case 2: fi=1; break;
2078     case 4: fi=2; break;
2079     case 8: fi=3; break;
2080     default:
2081     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2082     abort();
2083     }
2084     if (have_cmov) {
2085     emit_byte(0x0f);
2086     emit_byte(0x40+cond);
2087     emit_byte(0x04+8*d);
2088     emit_byte(0x05+8*index+64*fi);
2089     emit_long(base);
2090     }
2091     else { /* replacement using branch and mov */
2092     int uncc=(cond^1);
2093     emit_byte(0x70+uncc);
2094     emit_byte(7); /* skip next 7 bytes if not cc=true */
2095     emit_byte(0x8b);
2096     emit_byte(0x04+8*d);
2097     emit_byte(0x05+8*index+64*fi);
2098     emit_long(base);
2099     }
2100     }
2101     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2102    
2103     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2104     {
2105     if (have_cmov) {
2106     emit_byte(0x0f);
2107     emit_byte(0x40+cond);
2108     emit_byte(0x05+8*d);
2109     emit_long(mem);
2110     }
2111     else { /* replacement using branch and mov */
2112     int uncc=(cond^1);
2113     emit_byte(0x70+uncc);
2114     emit_byte(6); /* skip next 6 bytes if not cc=true */
2115     emit_byte(0x8b);
2116     emit_byte(0x05+8*d);
2117     emit_long(mem);
2118     }
2119     }
2120     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2121    
2122     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2123     {
2124 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2125 gbeauche 1.1 emit_byte(0x8b);
2126     emit_byte(0x40+8*d+s);
2127     emit_byte(offset);
2128     }
2129     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2130    
2131     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2132     {
2133 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2134 gbeauche 1.1 emit_byte(0x66);
2135     emit_byte(0x8b);
2136     emit_byte(0x40+8*d+s);
2137     emit_byte(offset);
2138     }
2139     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2140    
2141     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2142     {
2143 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2144 gbeauche 1.1 emit_byte(0x8a);
2145     emit_byte(0x40+8*d+s);
2146     emit_byte(offset);
2147     }
2148     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2149    
2150     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2151     {
2152     emit_byte(0x8b);
2153     emit_byte(0x80+8*d+s);
2154     emit_long(offset);
2155     }
2156     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2157    
2158     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2159     {
2160     emit_byte(0x66);
2161     emit_byte(0x8b);
2162     emit_byte(0x80+8*d+s);
2163     emit_long(offset);
2164     }
2165     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2166    
2167     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2168     {
2169     emit_byte(0x8a);
2170     emit_byte(0x80+8*d+s);
2171     emit_long(offset);
2172     }
2173     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2174    
2175     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2176     {
2177 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2178 gbeauche 1.1 emit_byte(0xc7);
2179     emit_byte(0x40+d);
2180     emit_byte(offset);
2181     emit_long(i);
2182     }
2183     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2184    
2185     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2186     {
2187 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2188 gbeauche 1.1 emit_byte(0x66);
2189     emit_byte(0xc7);
2190     emit_byte(0x40+d);
2191     emit_byte(offset);
2192     emit_word(i);
2193     }
2194     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2195    
2196     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2197     {
2198 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2199 gbeauche 1.1 emit_byte(0xc6);
2200     emit_byte(0x40+d);
2201     emit_byte(offset);
2202     emit_byte(i);
2203     }
2204     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2205    
2206     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2207     {
2208 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2209 gbeauche 1.1 emit_byte(0x89);
2210     emit_byte(0x40+8*s+d);
2211     emit_byte(offset);
2212     }
2213     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2214    
2215     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2216     {
2217 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2218 gbeauche 1.1 emit_byte(0x66);
2219     emit_byte(0x89);
2220     emit_byte(0x40+8*s+d);
2221     emit_byte(offset);
2222     }
2223     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2224    
2225     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2226     {
2227 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2228 gbeauche 1.1 emit_byte(0x88);
2229     emit_byte(0x40+8*s+d);
2230     emit_byte(offset);
2231     }
2232     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2233    
2234     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2235     {
2236     if (optimize_imm8 && isbyte(offset)) {
2237     emit_byte(0x8d);
2238     emit_byte(0x40+8*d+s);
2239     emit_byte(offset);
2240     }
2241     else {
2242     emit_byte(0x8d);
2243     emit_byte(0x80+8*d+s);
2244     emit_long(offset);
2245     }
2246     }
2247     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2248    
2249     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2250     {
2251     int fi;
2252    
2253     switch(factor) {
2254     case 1: fi=0; break;
2255     case 2: fi=1; break;
2256     case 4: fi=2; break;
2257     case 8: fi=3; break;
2258     default: abort();
2259     }
2260    
2261     if (optimize_imm8 && isbyte(offset)) {
2262     emit_byte(0x8d);
2263     emit_byte(0x44+8*d);
2264     emit_byte(0x40*fi+8*index+s);
2265     emit_byte(offset);
2266     }
2267     else {
2268     emit_byte(0x8d);
2269     emit_byte(0x84+8*d);
2270     emit_byte(0x40*fi+8*index+s);
2271     emit_long(offset);
2272     }
2273     }
2274     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2275    
2276     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2277     {
2278     int isebp=(s==5)?0x40:0;
2279     int fi;
2280    
2281     switch(factor) {
2282     case 1: fi=0; break;
2283     case 2: fi=1; break;
2284     case 4: fi=2; break;
2285     case 8: fi=3; break;
2286     default: abort();
2287     }
2288    
2289     emit_byte(0x8d);
2290     emit_byte(0x04+8*d+isebp);
2291     emit_byte(0x40*fi+8*index+s);
2292     if (isebp)
2293     emit_byte(0);
2294     }
2295     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2296    
2297     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2298     {
2299     if (optimize_imm8 && isbyte(offset)) {
2300     emit_byte(0x89);
2301     emit_byte(0x40+8*s+d);
2302     emit_byte(offset);
2303     }
2304     else {
2305     emit_byte(0x89);
2306     emit_byte(0x80+8*s+d);
2307     emit_long(offset);
2308     }
2309     }
2310     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2311    
2312     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2313     {
2314     emit_byte(0x66);
2315     emit_byte(0x89);
2316     emit_byte(0x80+8*s+d);
2317     emit_long(offset);
2318     }
2319     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2320    
2321     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2322     {
2323     if (optimize_imm8 && isbyte(offset)) {
2324     emit_byte(0x88);
2325     emit_byte(0x40+8*s+d);
2326     emit_byte(offset);
2327     }
2328     else {
2329     emit_byte(0x88);
2330     emit_byte(0x80+8*s+d);
2331     emit_long(offset);
2332     }
2333     }
2334     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2335    
2336     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2337     {
2338     emit_byte(0x0f);
2339     emit_byte(0xc8+r);
2340     }
2341     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2342    
2343     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2344     {
2345     emit_byte(0x66);
2346     emit_byte(0xc1);
2347     emit_byte(0xc0+r);
2348     emit_byte(0x08);
2349     }
2350     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2351    
2352     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2353     {
2354     emit_byte(0x89);
2355     emit_byte(0xc0+8*s+d);
2356     }
2357     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2358    
2359     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2360     {
2361     emit_byte(0x89);
2362     emit_byte(0x05+8*s);
2363     emit_long(d);
2364     }
2365     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2366    
2367     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2368     {
2369     emit_byte(0x66);
2370     emit_byte(0x89);
2371     emit_byte(0x05+8*s);
2372     emit_long(d);
2373     }
2374     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2375    
2376     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2377     {
2378     emit_byte(0x66);
2379     emit_byte(0x8b);
2380     emit_byte(0x05+8*d);
2381     emit_long(s);
2382     }
2383     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2384    
2385     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2386     {
2387     emit_byte(0x88);
2388     emit_byte(0x05+8*s);
2389     emit_long(d);
2390     }
2391     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2392    
2393     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2394     {
2395     emit_byte(0x8a);
2396     emit_byte(0x05+8*d);
2397     emit_long(s);
2398     }
2399     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2400    
2401     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2402     {
2403     emit_byte(0xb8+d);
2404     emit_long(s);
2405     }
2406     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2407    
2408     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2409     {
2410     emit_byte(0x66);
2411     emit_byte(0xb8+d);
2412     emit_word(s);
2413     }
2414     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2415    
2416     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2417     {
2418     emit_byte(0xb0+d);
2419     emit_byte(s);
2420     }
2421     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2422    
2423     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2424     {
2425     emit_byte(0x81);
2426     emit_byte(0x15);
2427     emit_long(d);
2428     emit_long(s);
2429     }
2430     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2431    
2432     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2433     {
2434     if (optimize_imm8 && isbyte(s)) {
2435     emit_byte(0x83);
2436     emit_byte(0x05);
2437     emit_long(d);
2438     emit_byte(s);
2439     }
2440     else {
2441     emit_byte(0x81);
2442     emit_byte(0x05);
2443     emit_long(d);
2444     emit_long(s);
2445     }
2446     }
2447     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2448    
2449     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2450     {
2451     emit_byte(0x66);
2452     emit_byte(0x81);
2453     emit_byte(0x05);
2454     emit_long(d);
2455     emit_word(s);
2456     }
2457     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2458    
2459     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2460     {
2461     emit_byte(0x80);
2462     emit_byte(0x05);
2463     emit_long(d);
2464     emit_byte(s);
2465     }
2466     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2467    
2468     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2469     {
2470 gbeauche 1.2 if (optimize_accum && isaccum(d))
2471     emit_byte(0xa9);
2472     else {
2473 gbeauche 1.1 emit_byte(0xf7);
2474     emit_byte(0xc0+d);
2475 gbeauche 1.2 }
2476 gbeauche 1.1 emit_long(i);
2477     }
2478     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2479    
2480     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2481     {
2482     emit_byte(0x85);
2483     emit_byte(0xc0+8*s+d);
2484     }
2485     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2486    
2487     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2488     {
2489     emit_byte(0x66);
2490     emit_byte(0x85);
2491     emit_byte(0xc0+8*s+d);
2492     }
2493     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2494    
2495     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2496     {
2497     emit_byte(0x84);
2498     emit_byte(0xc0+8*s+d);
2499     }
2500     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2501    
2502     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2503     {
2504     if (optimize_imm8 && isbyte(i)) {
2505 gbeauche 1.2 emit_byte(0x83);
2506     emit_byte(0xe0+d);
2507     emit_byte(i);
2508 gbeauche 1.1 }
2509     else {
2510 gbeauche 1.2 if (optimize_accum && isaccum(d))
2511     emit_byte(0x25);
2512     else {
2513     emit_byte(0x81);
2514     emit_byte(0xe0+d);
2515     }
2516     emit_long(i);
2517 gbeauche 1.1 }
2518     }
2519     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2520    
2521     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2522     {
2523 gbeauche 1.2 emit_byte(0x66);
2524     if (optimize_imm8 && isbyte(i)) {
2525     emit_byte(0x83);
2526     emit_byte(0xe0+d);
2527     emit_byte(i);
2528     }
2529     else {
2530     if (optimize_accum && isaccum(d))
2531     emit_byte(0x25);
2532     else {
2533     emit_byte(0x81);
2534     emit_byte(0xe0+d);
2535     }
2536     emit_word(i);
2537     }
2538 gbeauche 1.1 }
2539     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2540    
2541     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2542     {
2543     emit_byte(0x21);
2544     emit_byte(0xc0+8*s+d);
2545     }
2546     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2547    
2548     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2549     {
2550     emit_byte(0x66);
2551     emit_byte(0x21);
2552     emit_byte(0xc0+8*s+d);
2553     }
2554     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2555    
2556     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2557     {
2558     emit_byte(0x20);
2559     emit_byte(0xc0+8*s+d);
2560     }
2561     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2562    
2563     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2564     {
2565     if (optimize_imm8 && isbyte(i)) {
2566     emit_byte(0x83);
2567     emit_byte(0xc8+d);
2568     emit_byte(i);
2569     }
2570     else {
2571 gbeauche 1.2 if (optimize_accum && isaccum(d))
2572     emit_byte(0x0d);
2573     else {
2574 gbeauche 1.1 emit_byte(0x81);
2575     emit_byte(0xc8+d);
2576 gbeauche 1.2 }
2577 gbeauche 1.1 emit_long(i);
2578     }
2579     }
2580     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2581    
2582     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2583     {
2584     emit_byte(0x09);
2585     emit_byte(0xc0+8*s+d);
2586     }
2587     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2588    
2589     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2590     {
2591     emit_byte(0x66);
2592     emit_byte(0x09);
2593     emit_byte(0xc0+8*s+d);
2594     }
2595     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2596    
2597     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2598     {
2599     emit_byte(0x08);
2600     emit_byte(0xc0+8*s+d);
2601     }
2602     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2603    
2604     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2605     {
2606     emit_byte(0x11);
2607     emit_byte(0xc0+8*s+d);
2608     }
2609     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2610    
2611     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2612     {
2613     emit_byte(0x66);
2614     emit_byte(0x11);
2615     emit_byte(0xc0+8*s+d);
2616     }
2617     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2618    
2619     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2620     {
2621     emit_byte(0x10);
2622     emit_byte(0xc0+8*s+d);
2623     }
2624     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2625    
2626     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2627     {
2628     emit_byte(0x01);
2629     emit_byte(0xc0+8*s+d);
2630     }
2631     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2632    
2633     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2634     {
2635     emit_byte(0x66);
2636     emit_byte(0x01);
2637     emit_byte(0xc0+8*s+d);
2638     }
2639     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2640    
2641     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2642     {
2643     emit_byte(0x00);
2644     emit_byte(0xc0+8*s+d);
2645     }
2646     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2647    
2648     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2649     {
2650     if (isbyte(i)) {
2651     emit_byte(0x83);
2652     emit_byte(0xe8+d);
2653     emit_byte(i);
2654     }
2655     else {
2656 gbeauche 1.2 if (optimize_accum && isaccum(d))
2657     emit_byte(0x2d);
2658     else {
2659 gbeauche 1.1 emit_byte(0x81);
2660     emit_byte(0xe8+d);
2661 gbeauche 1.2 }
2662 gbeauche 1.1 emit_long(i);
2663     }
2664     }
2665     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2666    
2667     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2668     {
2669 gbeauche 1.2 if (optimize_accum && isaccum(d))
2670     emit_byte(0x2c);
2671     else {
2672 gbeauche 1.1 emit_byte(0x80);
2673     emit_byte(0xe8+d);
2674 gbeauche 1.2 }
2675 gbeauche 1.1 emit_byte(i);
2676     }
2677     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2678    
2679     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2680     {
2681     if (isbyte(i)) {
2682     emit_byte(0x83);
2683     emit_byte(0xc0+d);
2684     emit_byte(i);
2685     }
2686     else {
2687 gbeauche 1.2 if (optimize_accum && isaccum(d))
2688     emit_byte(0x05);
2689     else {
2690 gbeauche 1.1 emit_byte(0x81);
2691     emit_byte(0xc0+d);
2692 gbeauche 1.2 }
2693 gbeauche 1.1 emit_long(i);
2694     }
2695     }
2696     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2697    
2698     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2699     {
2700 gbeauche 1.2 emit_byte(0x66);
2701 gbeauche 1.1 if (isbyte(i)) {
2702     emit_byte(0x83);
2703     emit_byte(0xc0+d);
2704     emit_byte(i);
2705     }
2706     else {
2707 gbeauche 1.2 if (optimize_accum && isaccum(d))
2708     emit_byte(0x05);
2709     else {
2710 gbeauche 1.1 emit_byte(0x81);
2711     emit_byte(0xc0+d);
2712 gbeauche 1.2 }
2713 gbeauche 1.1 emit_word(i);
2714     }
2715     }
2716     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2717    
2718     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2719     {
2720 gbeauche 1.2 if (optimize_accum && isaccum(d))
2721     emit_byte(0x04);
2722     else {
2723     emit_byte(0x80);
2724     emit_byte(0xc0+d);
2725     }
2726 gbeauche 1.1 emit_byte(i);
2727     }
2728     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2729    
2730     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2731     {
2732     emit_byte(0x19);
2733     emit_byte(0xc0+8*s+d);
2734     }
2735     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2736    
2737     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2738     {
2739     emit_byte(0x66);
2740     emit_byte(0x19);
2741     emit_byte(0xc0+8*s+d);
2742     }
2743     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2744    
2745     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2746     {
2747     emit_byte(0x18);
2748     emit_byte(0xc0+8*s+d);
2749     }
2750     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2751    
2752     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2753     {
2754     emit_byte(0x29);
2755     emit_byte(0xc0+8*s+d);
2756     }
2757     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2758    
2759     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2760     {
2761     emit_byte(0x66);
2762     emit_byte(0x29);
2763     emit_byte(0xc0+8*s+d);
2764     }
2765     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2766    
2767     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2768     {
2769     emit_byte(0x28);
2770     emit_byte(0xc0+8*s+d);
2771     }
2772     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2773    
2774     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2775     {
2776     emit_byte(0x39);
2777     emit_byte(0xc0+8*s+d);
2778     }
2779     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2780    
2781     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2782     {
2783     if (optimize_imm8 && isbyte(i)) {
2784     emit_byte(0x83);
2785     emit_byte(0xf8+r);
2786     emit_byte(i);
2787     }
2788     else {
2789 gbeauche 1.2 if (optimize_accum && isaccum(r))
2790     emit_byte(0x3d);
2791     else {
2792 gbeauche 1.1 emit_byte(0x81);
2793     emit_byte(0xf8+r);
2794 gbeauche 1.2 }
2795 gbeauche 1.1 emit_long(i);
2796     }
2797     }
2798     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2799    
2800     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2801     {
2802     emit_byte(0x66);
2803     emit_byte(0x39);
2804     emit_byte(0xc0+8*s+d);
2805     }
2806     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2807    
2808 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2809     {
2810     emit_byte(0x80);
2811     emit_byte(0x3d);
2812     emit_long(d);
2813     emit_byte(s);
2814     }
2815     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2816    
2817 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2818     {
2819 gbeauche 1.2 if (optimize_accum && isaccum(d))
2820     emit_byte(0x3c);
2821     else {
2822 gbeauche 1.1 emit_byte(0x80);
2823     emit_byte(0xf8+d);
2824 gbeauche 1.2 }
2825 gbeauche 1.1 emit_byte(i);
2826     }
2827     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2828    
2829     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2830     {
2831     emit_byte(0x38);
2832     emit_byte(0xc0+8*s+d);
2833     }
2834     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2835    
2836     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2837     {
2838     int fi;
2839    
2840     switch(factor) {
2841     case 1: fi=0; break;
2842     case 2: fi=1; break;
2843     case 4: fi=2; break;
2844     case 8: fi=3; break;
2845     default: abort();
2846     }
2847     emit_byte(0x39);
2848     emit_byte(0x04+8*d);
2849     emit_byte(5+8*index+0x40*fi);
2850     emit_long(offset);
2851     }
2852     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2853    
2854     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2855     {
2856     emit_byte(0x31);
2857     emit_byte(0xc0+8*s+d);
2858     }
2859     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2860    
2861     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2862     {
2863     emit_byte(0x66);
2864     emit_byte(0x31);
2865     emit_byte(0xc0+8*s+d);
2866     }
2867     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2868    
2869     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2870     {
2871     emit_byte(0x30);
2872     emit_byte(0xc0+8*s+d);
2873     }
2874     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2875    
2876     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2877     {
2878     if (optimize_imm8 && isbyte(s)) {
2879     emit_byte(0x83);
2880     emit_byte(0x2d);
2881     emit_long(d);
2882     emit_byte(s);
2883     }
2884     else {
2885     emit_byte(0x81);
2886     emit_byte(0x2d);
2887     emit_long(d);
2888     emit_long(s);
2889     }
2890     }
2891     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2892    
2893     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2894     {
2895     if (optimize_imm8 && isbyte(s)) {
2896     emit_byte(0x83);
2897     emit_byte(0x3d);
2898     emit_long(d);
2899     emit_byte(s);
2900     }
2901     else {
2902     emit_byte(0x81);
2903     emit_byte(0x3d);
2904     emit_long(d);
2905     emit_long(s);
2906     }
2907     }
2908     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2909    
2910     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2911     {
2912     emit_byte(0x87);
2913     emit_byte(0xc0+8*r1+r2);
2914     }
2915     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2916    
2917     /*************************************************************************
2918     * FIXME: mem access modes probably wrong *
2919     *************************************************************************/
2920    
2921     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2922     {
2923     emit_byte(0x9c);
2924     }
2925     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2926    
2927     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2928     {
2929     emit_byte(0x9d);
2930     }
2931     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2932 gbeauche 1.13
2933     #endif
2934 gbeauche 1.1
2935     /*************************************************************************
2936     * Unoptimizable stuff --- jump *
2937     *************************************************************************/
2938    
2939     static __inline__ void raw_call_r(R4 r)
2940     {
2941 gbeauche 1.20 #if USE_NEW_RTASM
2942     CALLsr(r);
2943     #else
2944 gbeauche 1.1 emit_byte(0xff);
2945     emit_byte(0xd0+r);
2946 gbeauche 1.20 #endif
2947 gbeauche 1.5 }
2948    
2949     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2950     {
2951 gbeauche 1.20 #if USE_NEW_RTASM
2952     CALLsm(base, X86_NOREG, r, m);
2953     #else
2954 gbeauche 1.5 int mu;
2955     switch(m) {
2956     case 1: mu=0; break;
2957     case 2: mu=1; break;
2958     case 4: mu=2; break;
2959     case 8: mu=3; break;
2960     default: abort();
2961     }
2962     emit_byte(0xff);
2963     emit_byte(0x14);
2964     emit_byte(0x05+8*r+0x40*mu);
2965     emit_long(base);
2966 gbeauche 1.20 #endif
2967 gbeauche 1.1 }
2968    
2969     static __inline__ void raw_jmp_r(R4 r)
2970     {
2971 gbeauche 1.20 #if USE_NEW_RTASM
2972     JMPsr(r);
2973     #else
2974 gbeauche 1.1 emit_byte(0xff);
2975     emit_byte(0xe0+r);
2976 gbeauche 1.20 #endif
2977 gbeauche 1.1 }
2978    
2979     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2980     {
2981 gbeauche 1.20 #if USE_NEW_RTASM
2982     JMPsm(base, X86_NOREG, r, m);
2983     #else
2984 gbeauche 1.1 int mu;
2985     switch(m) {
2986     case 1: mu=0; break;
2987     case 2: mu=1; break;
2988     case 4: mu=2; break;
2989     case 8: mu=3; break;
2990     default: abort();
2991     }
2992     emit_byte(0xff);
2993     emit_byte(0x24);
2994     emit_byte(0x05+8*r+0x40*mu);
2995     emit_long(base);
2996 gbeauche 1.20 #endif
2997 gbeauche 1.1 }
2998    
2999     static __inline__ void raw_jmp_m(uae_u32 base)
3000     {
3001     emit_byte(0xff);
3002     emit_byte(0x25);
3003     emit_long(base);
3004     }
3005    
3006    
3007     static __inline__ void raw_call(uae_u32 t)
3008     {
3009 gbeauche 1.20 #if USE_NEW_RTASM
3010     CALLm(t);
3011     #else
3012 gbeauche 1.1 emit_byte(0xe8);
3013     emit_long(t-(uae_u32)target-4);
3014 gbeauche 1.20 #endif
3015 gbeauche 1.1 }
3016    
3017     static __inline__ void raw_jmp(uae_u32 t)
3018     {
3019 gbeauche 1.20 #if USE_NEW_RTASM
3020     JMPm(t);
3021     #else
3022 gbeauche 1.1 emit_byte(0xe9);
3023     emit_long(t-(uae_u32)target-4);
3024 gbeauche 1.20 #endif
3025 gbeauche 1.1 }
3026    
3027     static __inline__ void raw_jl(uae_u32 t)
3028     {
3029     emit_byte(0x0f);
3030     emit_byte(0x8c);
3031 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3032 gbeauche 1.1 }
3033    
3034     static __inline__ void raw_jz(uae_u32 t)
3035     {
3036     emit_byte(0x0f);
3037     emit_byte(0x84);
3038 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3039 gbeauche 1.1 }
3040    
3041     static __inline__ void raw_jnz(uae_u32 t)
3042     {
3043     emit_byte(0x0f);
3044     emit_byte(0x85);
3045 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3046 gbeauche 1.1 }
3047    
3048     static __inline__ void raw_jnz_l_oponly(void)
3049     {
3050     emit_byte(0x0f);
3051     emit_byte(0x85);
3052     }
3053    
3054     static __inline__ void raw_jcc_l_oponly(int cc)
3055     {
3056     emit_byte(0x0f);
3057     emit_byte(0x80+cc);
3058     }
3059    
3060     static __inline__ void raw_jnz_b_oponly(void)
3061     {
3062     emit_byte(0x75);
3063     }
3064    
3065     static __inline__ void raw_jz_b_oponly(void)
3066     {
3067     emit_byte(0x74);
3068     }
3069    
3070     static __inline__ void raw_jcc_b_oponly(int cc)
3071     {
3072     emit_byte(0x70+cc);
3073     }
3074    
3075     static __inline__ void raw_jmp_l_oponly(void)
3076     {
3077     emit_byte(0xe9);
3078     }
3079    
3080     static __inline__ void raw_jmp_b_oponly(void)
3081     {
3082     emit_byte(0xeb);
3083     }
3084    
3085     static __inline__ void raw_ret(void)
3086     {
3087     emit_byte(0xc3);
3088     }
3089    
3090     static __inline__ void raw_nop(void)
3091     {
3092     emit_byte(0x90);
3093     }
3094    
3095 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3096     {
3097     /* Source: GNU Binutils 2.12.90.0.15 */
3098     /* Various efficient no-op patterns for aligning code labels.
3099     Note: Don't try to assemble the instructions in the comments.
3100     0L and 0w are not legal. */
3101     static const uae_u8 f32_1[] =
3102     {0x90}; /* nop */
3103     static const uae_u8 f32_2[] =
3104     {0x89,0xf6}; /* movl %esi,%esi */
3105     static const uae_u8 f32_3[] =
3106     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3107     static const uae_u8 f32_4[] =
3108     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3109     static const uae_u8 f32_5[] =
3110     {0x90, /* nop */
3111     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3112     static const uae_u8 f32_6[] =
3113     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3114     static const uae_u8 f32_7[] =
3115     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3116     static const uae_u8 f32_8[] =
3117     {0x90, /* nop */
3118     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3119     static const uae_u8 f32_9[] =
3120     {0x89,0xf6, /* movl %esi,%esi */
3121     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3122     static const uae_u8 f32_10[] =
3123     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3124     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3125     static const uae_u8 f32_11[] =
3126     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3127     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3128     static const uae_u8 f32_12[] =
3129     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3130     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3131     static const uae_u8 f32_13[] =
3132     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3133     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3134     static const uae_u8 f32_14[] =
3135     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3136     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3137     static const uae_u8 f32_15[] =
3138     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3139     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3140     static const uae_u8 f32_16[] =
3141     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3142     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3143     static const uae_u8 *const f32_patt[] = {
3144     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3145     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3146     };
3147 gbeauche 1.21 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3148 gbeauche 1.8
3149 gbeauche 1.21 #if defined(__x86_64__)
3150     /* The recommended way to pad 64bit code is to use NOPs preceded by
3151     maximally four 0x66 prefixes. Balance the size of nops. */
3152     if (nbytes == 0)
3153     return;
3154    
3155     int i;
3156     int nnops = (nbytes + 3) / 4;
3157     int len = nbytes / nnops;
3158     int remains = nbytes - nnops * len;
3159    
3160     for (i = 0; i < remains; i++) {
3161     emit_block(prefixes, len);
3162     raw_nop();
3163     }
3164     for (; i < nnops; i++) {
3165     emit_block(prefixes, len - 1);
3166     raw_nop();
3167     }
3168     #else
3169 gbeauche 1.8 int nloops = nbytes / 16;
3170     while (nloops-- > 0)
3171     emit_block(f32_16, sizeof(f32_16));
3172    
3173     nbytes %= 16;
3174     if (nbytes)
3175     emit_block(f32_patt[nbytes - 1], nbytes);
3176 gbeauche 1.21 #endif
3177 gbeauche 1.8 }
3178    
3179 gbeauche 1.1
3180     /*************************************************************************
3181     * Flag handling, to and fro UAE flag register *
3182     *************************************************************************/
3183    
3184     #ifdef SAHF_SETO_PROFITABLE
3185    
3186     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3187    
3188     static __inline__ void raw_flags_to_reg(int r)
3189     {
3190     raw_lahf(0); /* Most flags in AH */
3191     //raw_setcc(r,0); /* V flag in AL */
3192 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3193 gbeauche 1.1
3194     #if 1 /* Let's avoid those nasty partial register stalls */
3195 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3196     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3197 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3198     live.state[FLAGTMP].status=INMEM;
3199     live.state[FLAGTMP].realreg=-1;
3200     /* We just "evicted" FLAGTMP. */
3201     if (live.nat[r].nholds!=1) {
3202     /* Huh? */
3203     abort();
3204     }
3205     live.nat[r].nholds=0;
3206     #endif
3207     }
3208    
3209     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3210     static __inline__ void raw_reg_to_flags(int r)
3211     {
3212     raw_cmp_b_ri(r,-127); /* set V */
3213     raw_sahf(0);
3214     }
3215    
3216     #else
3217    
3218     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3219     static __inline__ void raw_flags_to_reg(int r)
3220     {
3221     raw_pushfl();
3222     raw_pop_l_r(r);
3223 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3224 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3225     live.state[FLAGTMP].status=INMEM;
3226     live.state[FLAGTMP].realreg=-1;
3227     /* We just "evicted" FLAGTMP. */
3228     if (live.nat[r].nholds!=1) {
3229     /* Huh? */
3230     abort();
3231     }
3232     live.nat[r].nholds=0;
3233     }
3234    
3235     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3236     static __inline__ void raw_reg_to_flags(int r)
3237     {
3238     raw_push_l_r(r);
3239     raw_popfl();
3240     }
3241    
3242     #endif
3243    
3244     /* Apparently, there are enough instructions between flag store and
3245     flag reload to avoid the partial memory stall */
3246     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3247     {
3248     #if 1
3249 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3250 gbeauche 1.1 #else
3251 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3252     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3253 gbeauche 1.1 #endif
3254     }
3255    
3256     /* FLAGX is byte sized, and we *do* write it at that size */
3257     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3258     {
3259     if (live.nat[target].canbyte)
3260 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3261 gbeauche 1.1 else if (live.nat[target].canword)
3262 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3263 gbeauche 1.1 else
3264 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3265 gbeauche 1.1 }
3266    
3267 gbeauche 1.11 #define NATIVE_FLAG_Z 0x40
3268     static __inline__ void raw_flags_set_zero(int f, int r, int t)
3269     {
3270     // FIXME: this is really suboptimal
3271     raw_pushfl();
3272     raw_pop_l_r(f);
3273     raw_and_l_ri(f,~NATIVE_FLAG_Z);
3274     raw_test_l_rr(r,r);
3275     raw_mov_l_ri(r,0);
3276     raw_mov_l_ri(t,NATIVE_FLAG_Z);
3277     raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
3278     raw_or_l(f,r);
3279     raw_push_l_r(f);
3280     raw_popfl();
3281     }
3282 gbeauche 1.1
3283     static __inline__ void raw_inc_sp(int off)
3284     {
3285 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
3286 gbeauche 1.1 }
3287    
3288     /*************************************************************************
3289     * Handling mistaken direct memory access *
3290     *************************************************************************/
3291    
3292     // gb-- I don't need that part for JIT Basilisk II
3293     #if defined(NATMEM_OFFSET) && 0
3294     #include <asm/sigcontext.h>
3295     #include <signal.h>
3296    
3297     #define SIG_READ 1
3298     #define SIG_WRITE 2
3299    
3300     static int in_handler=0;
3301     static uae_u8 veccode[256];
3302    
3303     static void vec(int x, struct sigcontext sc)
3304     {
3305     uae_u8* i=(uae_u8*)sc.eip;
3306     uae_u32 addr=sc.cr2;
3307     int r=-1;
3308     int size=4;
3309     int dir=-1;
3310     int len=0;
3311     int j;
3312    
3313     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3314     if (!canbang)
3315     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3316     if (in_handler)
3317     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3318    
3319     if (canbang && i>=compiled_code && i<=current_compile_p) {
3320     if (*i==0x66) {
3321     i++;
3322     size=2;
3323     len++;
3324     }
3325    
3326     switch(i[0]) {
3327     case 0x8a:
3328     if ((i[1]&0xc0)==0x80) {
3329     r=(i[1]>>3)&7;
3330     dir=SIG_READ;
3331     size=1;
3332     len+=6;
3333     break;
3334     }
3335     break;
3336     case 0x88:
3337     if ((i[1]&0xc0)==0x80) {
3338     r=(i[1]>>3)&7;
3339     dir=SIG_WRITE;
3340     size=1;
3341     len+=6;
3342     break;
3343     }
3344     break;
3345     case 0x8b:
3346     if ((i[1]&0xc0)==0x80) {
3347     r=(i[1]>>3)&7;
3348     dir=SIG_READ;
3349     len+=6;
3350     break;
3351     }
3352     if ((i[1]&0xc0)==0x40) {
3353     r=(i[1]>>3)&7;
3354     dir=SIG_READ;
3355     len+=3;
3356     break;
3357     }
3358     break;
3359     case 0x89:
3360     if ((i[1]&0xc0)==0x80) {
3361     r=(i[1]>>3)&7;
3362     dir=SIG_WRITE;
3363     len+=6;
3364     break;
3365     }
3366     if ((i[1]&0xc0)==0x40) {
3367     r=(i[1]>>3)&7;
3368     dir=SIG_WRITE;
3369     len+=3;
3370     break;
3371     }
3372     break;
3373     }
3374     }
3375    
3376     if (r!=-1) {
3377     void* pr=NULL;
3378     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3379    
3380     switch(r) {
3381     case 0: pr=&(sc.eax); break;
3382     case 1: pr=&(sc.ecx); break;
3383     case 2: pr=&(sc.edx); break;
3384     case 3: pr=&(sc.ebx); break;
3385     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3386     case 5: pr=(size>1)?
3387     (void*)(&(sc.ebp)):
3388     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3389     case 6: pr=(size>1)?
3390     (void*)(&(sc.esi)):
3391     (void*)(((uae_u8*)&(sc.edx))+1); break;
3392     case 7: pr=(size>1)?
3393     (void*)(&(sc.edi)):
3394     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3395     default: abort();
3396     }
3397     if (pr) {
3398     blockinfo* bi;
3399    
3400     if (currprefs.comp_oldsegv) {
3401     addr-=NATMEM_OFFSET;
3402    
3403     if ((addr>=0x10000000 && addr<0x40000000) ||
3404     (addr>=0x50000000)) {
3405     write_log("Suspicious address in %x SEGV handler.\n",addr);
3406     }
3407     if (dir==SIG_READ) {
3408     switch(size) {
3409     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3410     case 2: *((uae_u16*)pr)=get_word(addr); break;
3411     case 4: *((uae_u32*)pr)=get_long(addr); break;
3412     default: abort();
3413     }
3414     }
3415     else { /* write */
3416     switch(size) {
3417     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3418     case 2: put_word(addr,*((uae_u16*)pr)); break;
3419     case 4: put_long(addr,*((uae_u32*)pr)); break;
3420     default: abort();
3421     }
3422     }
3423     write_log("Handled one access!\n");
3424     fflush(stdout);
3425     segvcount++;
3426     sc.eip+=len;
3427     }
3428     else {
3429     void* tmp=target;
3430     int i;
3431     uae_u8 vecbuf[5];
3432    
3433     addr-=NATMEM_OFFSET;
3434    
3435     if ((addr>=0x10000000 && addr<0x40000000) ||
3436     (addr>=0x50000000)) {
3437     write_log("Suspicious address in %x SEGV handler.\n",addr);
3438     }
3439    
3440     target=(uae_u8*)sc.eip;
3441     for (i=0;i<5;i++)
3442     vecbuf[i]=target[i];
3443     emit_byte(0xe9);
3444 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3445 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3446    
3447     write_log("Handled one access!\n");
3448     fflush(stdout);
3449     segvcount++;
3450    
3451     target=veccode;
3452    
3453     if (dir==SIG_READ) {
3454     switch(size) {
3455     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3456     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3457     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3458     default: abort();
3459     }
3460     }
3461     else { /* write */
3462     switch(size) {
3463     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3464     case 2: put_word(addr,*((uae_u16*)pr)); break;
3465     case 4: put_long(addr,*((uae_u32*)pr)); break;
3466     default: abort();
3467     }
3468     }
3469     for (i=0;i<5;i++)
3470     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3471 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3472 gbeauche 1.1 emit_byte(0xe9);
3473 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3474 gbeauche 1.1 in_handler=1;
3475     target=tmp;
3476     }
3477     bi=active;
3478     while (bi) {
3479     if (bi->handler &&
3480     (uae_u8*)bi->direct_handler<=i &&
3481     (uae_u8*)bi->nexthandler>i) {
3482     write_log("deleted trigger (%p<%p<%p) %p\n",
3483     bi->handler,
3484     i,
3485     bi->nexthandler,
3486     bi->pc_p);
3487     invalidate_block(bi);
3488     raise_in_cl_list(bi);
3489     set_special(0);
3490     return;
3491     }
3492     bi=bi->next;
3493     }
3494     /* Not found in the active list. Might be a rom routine that
3495     is in the dormant list */
3496     bi=dormant;
3497     while (bi) {
3498     if (bi->handler &&
3499     (uae_u8*)bi->direct_handler<=i &&
3500     (uae_u8*)bi->nexthandler>i) {
3501     write_log("deleted trigger (%p<%p<%p) %p\n",
3502     bi->handler,
3503     i,
3504     bi->nexthandler,
3505     bi->pc_p);
3506     invalidate_block(bi);
3507     raise_in_cl_list(bi);
3508     set_special(0);
3509     return;
3510     }
3511     bi=bi->next;
3512     }
3513     write_log("Huh? Could not find trigger!\n");
3514     return;
3515     }
3516     }
3517     write_log("Can't handle access!\n");
3518     for (j=0;j<10;j++) {
3519     write_log("instruction byte %2d is %02x\n",j,i[j]);
3520     }
3521     write_log("Please send the above info (starting at \"fault address\") to\n"
3522     "bmeyer@csse.monash.edu.au\n"
3523     "This shouldn't happen ;-)\n");
3524     fflush(stdout);
3525     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3526     }
3527     #endif
3528    
3529    
3530     /*************************************************************************
3531     * Checking for CPU features *
3532     *************************************************************************/
3533    
3534 gbeauche 1.3 struct cpuinfo_x86 {
3535     uae_u8 x86; // CPU family
3536     uae_u8 x86_vendor; // CPU vendor
3537     uae_u8 x86_processor; // CPU canonical processor type
3538     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3539     uae_u32 x86_hwcap;
3540     uae_u8 x86_model;
3541     uae_u8 x86_mask;
3542     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3543     char x86_vendor_id[16];
3544     };
3545     struct cpuinfo_x86 cpuinfo;
3546    
3547     enum {
3548     X86_VENDOR_INTEL = 0,
3549     X86_VENDOR_CYRIX = 1,
3550     X86_VENDOR_AMD = 2,
3551     X86_VENDOR_UMC = 3,
3552     X86_VENDOR_NEXGEN = 4,
3553     X86_VENDOR_CENTAUR = 5,
3554     X86_VENDOR_RISE = 6,
3555     X86_VENDOR_TRANSMETA = 7,
3556     X86_VENDOR_NSC = 8,
3557     X86_VENDOR_UNKNOWN = 0xff
3558     };
3559    
3560     enum {
3561     X86_PROCESSOR_I386, /* 80386 */
3562     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3563     X86_PROCESSOR_PENTIUM,
3564     X86_PROCESSOR_PENTIUMPRO,
3565     X86_PROCESSOR_K6,
3566     X86_PROCESSOR_ATHLON,
3567     X86_PROCESSOR_PENTIUM4,
3568 gbeauche 1.16 X86_PROCESSOR_K8,
3569 gbeauche 1.3 X86_PROCESSOR_max
3570     };
3571    
3572     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3573     "80386",
3574     "80486",
3575     "Pentium",
3576     "PentiumPro",
3577     "K6",
3578     "Athlon",
3579 gbeauche 1.16 "Pentium4",
3580     "K8"
3581 gbeauche 1.3 };
3582    
3583     static struct ptt {
3584     const int align_loop;
3585     const int align_loop_max_skip;
3586     const int align_jump;
3587     const int align_jump_max_skip;
3588     const int align_func;
3589     }
3590     x86_alignments[X86_PROCESSOR_max] = {
3591     { 4, 3, 4, 3, 4 },
3592     { 16, 15, 16, 15, 16 },
3593     { 16, 7, 16, 7, 16 },
3594     { 16, 15, 16, 7, 16 },
3595     { 32, 7, 32, 7, 32 },
3596 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3597 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3598     { 16, 7, 16, 7, 16 }
3599 gbeauche 1.3 };
3600 gbeauche 1.1
3601 gbeauche 1.3 static void
3602     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3603 gbeauche 1.1 {
3604 gbeauche 1.3 char *v = c->x86_vendor_id;
3605    
3606     if (!strcmp(v, "GenuineIntel"))
3607     c->x86_vendor = X86_VENDOR_INTEL;
3608     else if (!strcmp(v, "AuthenticAMD"))
3609     c->x86_vendor = X86_VENDOR_AMD;
3610     else if (!strcmp(v, "CyrixInstead"))
3611     c->x86_vendor = X86_VENDOR_CYRIX;
3612     else if (!strcmp(v, "Geode by NSC"))
3613     c->x86_vendor = X86_VENDOR_NSC;
3614     else if (!strcmp(v, "UMC UMC UMC "))
3615     c->x86_vendor = X86_VENDOR_UMC;
3616     else if (!strcmp(v, "CentaurHauls"))
3617     c->x86_vendor = X86_VENDOR_CENTAUR;
3618     else if (!strcmp(v, "NexGenDriven"))
3619     c->x86_vendor = X86_VENDOR_NEXGEN;
3620     else if (!strcmp(v, "RiseRiseRise"))
3621     c->x86_vendor = X86_VENDOR_RISE;
3622     else if (!strcmp(v, "GenuineTMx86") ||
3623     !strcmp(v, "TransmetaCPU"))
3624     c->x86_vendor = X86_VENDOR_TRANSMETA;
3625     else
3626     c->x86_vendor = X86_VENDOR_UNKNOWN;
3627     }
3628 gbeauche 1.1
3629 gbeauche 1.3 static void
3630     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3631     {
3632     static uae_u8 cpuid_space[256];
3633 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3634 gbeauche 1.3 uae_u8* tmp=get_target();
3635 gbeauche 1.1
3636 gbeauche 1.20 s_op = op;
3637 gbeauche 1.3 set_target(cpuid_space);
3638     raw_push_l_r(0); /* eax */
3639     raw_push_l_r(1); /* ecx */
3640     raw_push_l_r(2); /* edx */
3641     raw_push_l_r(3); /* ebx */
3642 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3643 gbeauche 1.3 raw_cpuid(0);
3644 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3645     raw_mov_l_mr((uintptr)&s_ebx,3);
3646     raw_mov_l_mr((uintptr)&s_ecx,1);
3647     raw_mov_l_mr((uintptr)&s_edx,2);
3648 gbeauche 1.3 raw_pop_l_r(3);
3649     raw_pop_l_r(2);
3650     raw_pop_l_r(1);
3651     raw_pop_l_r(0);
3652     raw_ret();
3653     set_target(tmp);
3654 gbeauche 1.1
3655 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3656 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3657     if (ebx != NULL) *ebx = s_ebx;
3658     if (ecx != NULL) *ecx = s_ecx;
3659     if (edx != NULL) *edx = s_edx;
3660 gbeauche 1.1 }
3661    
3662 gbeauche 1.3 static void
3663     raw_init_cpu(void)
3664 gbeauche 1.1 {
3665 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3666    
3667     /* Defaults */
3668 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3669 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3670     c->cpuid_level = -1; /* CPUID not detected */
3671     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3672     c->x86_vendor_id[0] = '\0'; /* Unset */
3673     c->x86_hwcap = 0;
3674    
3675     /* Get vendor name */
3676     c->x86_vendor_id[12] = '\0';
3677     cpuid(0x00000000,
3678     (uae_u32 *)&c->cpuid_level,
3679     (uae_u32 *)&c->x86_vendor_id[0],
3680     (uae_u32 *)&c->x86_vendor_id[8],
3681     (uae_u32 *)&c->x86_vendor_id[4]);
3682     x86_get_cpu_vendor(c);
3683    
3684     /* Intel-defined flags: level 0x00000001 */
3685     c->x86_brand_id = 0;
3686     if ( c->cpuid_level >= 0x00000001 ) {
3687     uae_u32 tfms, brand_id;
3688     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3689     c->x86 = (tfms >> 8) & 15;
3690     c->x86_model = (tfms >> 4) & 15;
3691     c->x86_brand_id = brand_id & 0xff;
3692     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
3693     (c->x86 == 0xf)) {
3694     /* AMD Extended Family and Model Values */
3695     c->x86 += (tfms >> 20) & 0xff;
3696     c->x86_model += (tfms >> 12) & 0xf0;
3697     }
3698     c->x86_mask = tfms & 15;
3699     } else {
3700     /* Have CPUID level 0 only - unheard of */
3701     c->x86 = 4;
3702     }
3703    
3704 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3705     uae_u32 xlvl;
3706     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3707     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3708     if ( xlvl >= 0x80000001 ) {
3709     uae_u32 features;
3710     cpuid(0x80000001, NULL, NULL, NULL, &features);
3711     if (features & (1 << 29)) {
3712     /* Assume x86-64 if long mode is supported */
3713     c->x86_processor = X86_PROCESSOR_K8;
3714     }
3715     }
3716     }
3717    
3718 gbeauche 1.3 /* Canonicalize processor ID */
3719     switch (c->x86) {
3720     case 3:
3721     c->x86_processor = X86_PROCESSOR_I386;
3722     break;
3723     case 4:
3724     c->x86_processor = X86_PROCESSOR_I486;
3725     break;
3726     case 5:
3727     if (c->x86_vendor == X86_VENDOR_AMD)
3728     c->x86_processor = X86_PROCESSOR_K6;
3729     else
3730     c->x86_processor = X86_PROCESSOR_PENTIUM;
3731     break;
3732     case 6:
3733     if (c->x86_vendor == X86_VENDOR_AMD)
3734     c->x86_processor = X86_PROCESSOR_ATHLON;
3735     else
3736     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3737     break;
3738     case 15:
3739     if (c->x86_vendor == X86_VENDOR_INTEL) {
3740 gbeauche 1.16 /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3741 gbeauche 1.3 if (c->x86_brand_id >= 8)
3742     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3743     }
3744 gbeauche 1.16 if (c->x86_vendor == X86_VENDOR_AMD) {
3745     /* Assume an Athlon processor if family == 15 and it was not
3746     detected as an x86-64 so far */
3747     if (c->x86_processor == X86_PROCESSOR_max)
3748     c->x86_processor = X86_PROCESSOR_ATHLON;
3749     }
3750 gbeauche 1.3 break;
3751     }
3752     if (c->x86_processor == X86_PROCESSOR_max) {
3753     fprintf(stderr, "Error: unknown processor type\n");
3754     fprintf(stderr, " Family : %d\n", c->x86);
3755     fprintf(stderr, " Model : %d\n", c->x86_model);
3756     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3757 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3758 gbeauche 1.3 if (c->x86_brand_id)
3759     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3760     abort();
3761     }
3762    
3763     /* Have CMOV support? */
3764 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3765 gbeauche 1.3
3766     /* Can the host CPU suffer from partial register stalls? */
3767     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3768     #if 1
3769     /* It appears that partial register writes are a bad idea even on
3770 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3771     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3772 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3773     have_rat_stall = true;
3774 gbeauche 1.1 #endif
3775 gbeauche 1.3
3776     /* Alignments */
3777     if (tune_alignment) {
3778     align_loops = x86_alignments[c->x86_processor].align_loop;
3779     align_jumps = x86_alignments[c->x86_processor].align_jump;
3780     }
3781    
3782     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3783     c->cpuid_level, c->x86_vendor_id,
3784     x86_processor_string_table[c->x86_processor]);
3785 gbeauche 1.1 }
3786    
3787 gbeauche 1.10 static bool target_check_bsf(void)
3788     {
3789     bool mismatch = false;
3790     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3791     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3792     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3793     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3794     for (int value = -1; value <= 1; value++) {
3795     int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3796     int tmp = value;
3797     __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3798 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3799 gbeauche 1.10 int OF = (flags >> 11) & 1;
3800     int SF = (flags >> 7) & 1;
3801     int ZF = (flags >> 6) & 1;
3802     int CF = flags & 1;
3803     tmp = (value == 0);
3804     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3805     mismatch = true;
3806     }
3807     }}}}
3808     if (mismatch)
3809     write_log("Target CPU defines all flags on BSF instruction\n");
3810     return !mismatch;
3811     }
3812    
3813 gbeauche 1.1
3814     /*************************************************************************
3815     * FPU stuff *
3816     *************************************************************************/
3817    
3818    
3819     static __inline__ void raw_fp_init(void)
3820     {
3821     int i;
3822    
3823     for (i=0;i<N_FREGS;i++)
3824     live.spos[i]=-2;
3825     live.tos=-1; /* Stack is empty */
3826     }
3827    
3828     static __inline__ void raw_fp_cleanup_drop(void)
3829     {
3830     #if 0
3831     /* using FINIT instead of popping all the entries.
3832     Seems to have side effects --- there is display corruption in
3833     Quake when this is used */
3834     if (live.tos>1) {
3835     emit_byte(0x9b);
3836     emit_byte(0xdb);
3837     emit_byte(0xe3);
3838     live.tos=-1;
3839     }
3840     #endif
3841     while (live.tos>=1) {
3842     emit_byte(0xde);
3843     emit_byte(0xd9);
3844     live.tos-=2;
3845     }
3846     while (live.tos>=0) {
3847     emit_byte(0xdd);
3848     emit_byte(0xd8);
3849     live.tos--;
3850     }
3851     raw_fp_init();
3852     }
3853    
3854     static __inline__ void make_tos(int r)
3855     {
3856     int p,q;
3857    
3858     if (live.spos[r]<0) { /* Register not yet on stack */
3859     emit_byte(0xd9);
3860     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3861     live.tos++;
3862     live.spos[r]=live.tos;
3863     live.onstack[live.tos]=r;
3864     return;
3865     }
3866     /* Register is on stack */
3867     if (live.tos==live.spos[r])
3868     return;
3869     p=live.spos[r];
3870     q=live.onstack[live.tos];
3871    
3872     emit_byte(0xd9);
3873     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3874     live.onstack[live.tos]=r;
3875     live.spos[r]=live.tos;
3876     live.onstack[p]=q;
3877     live.spos[q]=p;
3878     }
3879    
3880     static __inline__ void make_tos2(int r, int r2)
3881     {
3882     int q;
3883    
3884     make_tos(r2); /* Put the reg that's supposed to end up in position2
3885     on top */
3886    
3887     if (live.spos[r]<0) { /* Register not yet on stack */
3888     make_tos(r); /* This will extend the stack */
3889     return;
3890     }
3891     /* Register is on stack */
3892     emit_byte(0xd9);
3893     emit_byte(0xc9); /* Move r2 into position 2 */
3894    
3895     q=live.onstack[live.tos-1];
3896     live.onstack[live.tos]=q;
3897     live.spos[q]=live.tos;
3898     live.onstack[live.tos-1]=r2;
3899     live.spos[r2]=live.tos-1;
3900    
3901     make_tos(r); /* And r into 1 */
3902     }
3903    
3904     static __inline__ int stackpos(int r)
3905     {
3906     if (live.spos[r]<0)
3907     abort();
3908     if (live.tos<live.spos[r]) {
3909     printf("Looking for spos for fnreg %d\n",r);
3910     abort();
3911     }
3912     return live.tos-live.spos[r];
3913     }
3914    
3915     static __inline__ void usereg(int r)
3916     {
3917     if (live.spos[r]<0)
3918     make_tos(r);
3919     }
3920    
3921     /* This is called with one FP value in a reg *above* tos, which it will
3922     pop off the stack if necessary */
3923     static __inline__ void tos_make(int r)
3924     {
3925     if (live.spos[r]<0) {
3926     live.tos++;
3927     live.spos[r]=live.tos;
3928     live.onstack[live.tos]=r;
3929     return;
3930     }
3931     emit_byte(0xdd);
3932     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3933     and pop it*/
3934     }
3935    
3936    
3937     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3938     {
3939     make_tos(r);
3940     emit_byte(0xdd);
3941     emit_byte(0x15);
3942     emit_long(m);
3943     }
3944     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3945    
3946     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
3947     {
3948     make_tos(r);
3949     emit_byte(0xdd);
3950     emit_byte(0x1d);
3951     emit_long(m);
3952     live.onstack[live.tos]=-1;
3953     live.tos--;
3954     live.spos[r]=-2;
3955     }
3956     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3957    
3958     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3959     {
3960     emit_byte(0xdd);
3961     emit_byte(0x05);
3962     emit_long(m);
3963     tos_make(r);
3964     }
3965     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3966    
3967     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3968     {
3969     emit_byte(0xdb);
3970     emit_byte(0x05);
3971     emit_long(m);
3972     tos_make(r);
3973     }
3974     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3975    
3976     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
3977     {
3978     make_tos(r);
3979     emit_byte(0xdb);
3980     emit_byte(0x15);
3981     emit_long(m);
3982     }
3983     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
3984    
3985     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
3986     {
3987     emit_byte(0xd9);
3988     emit_byte(0x05);
3989     emit_long(m);
3990     tos_make(r);
3991     }
3992     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
3993    
3994     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
3995     {
3996     make_tos(r);
3997     emit_byte(0xd9);
3998     emit_byte(0x15);
3999     emit_long(m);
4000     }
4001     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4002    
4003     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4004     {
4005     int rs;
4006    
4007     /* Stupid x87 can't write a long double to mem without popping the
4008     stack! */
4009     usereg(r);
4010     rs=stackpos(r);
4011     emit_byte(0xd9); /* Get a copy to the top of stack */
4012     emit_byte(0xc0+rs);
4013    
4014     emit_byte(0xdb); /* store and pop it */
4015     emit_byte(0x3d);
4016     emit_long(m);
4017     }
4018     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4019    
4020     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4021     {
4022     int rs;
4023    
4024     make_tos(r);
4025     emit_byte(0xdb); /* store and pop it */
4026     emit_byte(0x3d);
4027     emit_long(m);
4028     live.onstack[live.tos]=-1;
4029     live.tos--;
4030     live.spos[r]=-2;
4031     }
4032     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4033    
4034     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4035     {
4036     emit_byte(0xdb);
4037     emit_byte(0x2d);
4038     emit_long(m);
4039     tos_make(r);
4040     }
4041     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4042    
4043     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4044     {
4045     emit_byte(0xd9);
4046     emit_byte(0xeb);
4047     tos_make(r);
4048     }
4049     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4050    
4051     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4052     {
4053     emit_byte(0xd9);
4054     emit_byte(0xec);
4055     tos_make(r);
4056     }
4057     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4058    
4059     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4060     {
4061     emit_byte(0xd9);
4062     emit_byte(0xea);
4063     tos_make(r);
4064     }
4065     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4066    
4067     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4068     {
4069     emit_byte(0xd9);
4070     emit_byte(0xed);
4071     tos_make(r);
4072     }
4073     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4074    
4075     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4076     {
4077     emit_byte(0xd9);
4078     emit_byte(0xe8);
4079     tos_make(r);
4080     }
4081     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4082    
4083     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4084     {
4085     emit_byte(0xd9);
4086     emit_byte(0xee);
4087     tos_make(r);
4088     }
4089     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4090    
4091     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4092     {
4093     int ds;
4094    
4095     usereg(s);
4096     ds=stackpos(s);
4097     if (ds==0 && live.spos[d]>=0) {
4098     /* source is on top of stack, and we already have the dest */
4099     int dd=stackpos(d);
4100     emit_byte(0xdd);
4101     emit_byte(0xd0+dd);
4102     }
4103     else {
4104     emit_byte(0xd9);
4105     emit_byte(0xc0+ds); /* duplicate source on tos */
4106     tos_make(d); /* store to destination, pop if necessary */
4107     }
4108     }
4109     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4110    
4111     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4112     {
4113     emit_byte(0xd9);
4114     emit_byte(0xa8+index);
4115     emit_long(base);
4116     }
4117     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4118    
4119    
4120     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4121     {
4122     int ds;
4123    
4124     if (d!=s) {
4125     usereg(s);
4126     ds=stackpos(s);
4127     emit_byte(0xd9);
4128     emit_byte(0xc0+ds); /* duplicate source */
4129     emit_byte(0xd9);
4130     emit_byte(0xfa); /* take square root */
4131     tos_make(d); /* store to destination */
4132     }
4133     else {
4134     make_tos(d);
4135     emit_byte(0xd9);
4136     emit_byte(0xfa); /* take square root */
4137     }
4138     }
4139     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4140    
4141     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4142     {
4143     int ds;
4144    
4145     if (d!=s) {
4146     usereg(s);
4147     ds=stackpos(s);
4148     emit_byte(0xd9);
4149     emit_byte(0xc0+ds); /* duplicate source */
4150     emit_byte(0xd9);
4151     emit_byte(0xe1); /* take fabs */
4152     tos_make(d); /* store to destination */
4153     }
4154     else {
4155     make_tos(d);
4156     emit_byte(0xd9);
4157     emit_byte(0xe1); /* take fabs */
4158     }
4159     }
4160     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4161    
4162     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4163     {
4164     int ds;
4165    
4166     if (d!=s) {
4167     usereg(s);
4168     ds=stackpos(s);
4169     emit_byte(0xd9);
4170     emit_byte(0xc0+ds); /* duplicate source */
4171     emit_byte(0xd9);
4172     emit_byte(0xfc); /* take frndint */
4173     tos_make(d); /* store to destination */
4174     }
4175     else {
4176     make_tos(d);
4177     emit_byte(0xd9);
4178     emit_byte(0xfc); /* take frndint */
4179     }
4180     }
4181     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4182    
4183     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4184     {
4185     int ds;
4186    
4187     if (d!=s) {
4188     usereg(s);
4189     ds=stackpos(s);
4190     emit_byte(0xd9);
4191     emit_byte(0xc0+ds); /* duplicate source */
4192     emit_byte(0xd9);
4193     emit_byte(0xff); /* take cos */
4194     tos_make(d); /* store to destination */
4195     }
4196     else {
4197     make_tos(d);
4198     emit_byte(0xd9);
4199     emit_byte(0xff); /* take cos */
4200     }
4201     }
4202     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4203    
4204     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4205     {
4206     int ds;
4207    
4208     if (d!=s) {
4209     usereg(s);
4210     ds=stackpos(s);
4211     emit_byte(0xd9);
4212     emit_byte(0xc0+ds); /* duplicate source */
4213     emit_byte(0xd9);
4214     emit_byte(0xfe); /* take sin */
4215     tos_make(d); /* store to destination */
4216     }
4217     else {
4218     make_tos(d);
4219     emit_byte(0xd9);
4220     emit_byte(0xfe); /* take sin */
4221     }
4222     }
4223     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4224    
4225     double one=1;
4226     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4227     {
4228     int ds;
4229    
4230     usereg(s);
4231     ds=stackpos(s);
4232     emit_byte(0xd9);
4233     emit_byte(0xc0+ds); /* duplicate source */
4234    
4235     emit_byte(0xd9);
4236     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4237     emit_byte(0xd9);
4238     emit_byte(0xfc); /* rndint */
4239     emit_byte(0xd9);
4240     emit_byte(0xc9); /* swap top two elements */
4241     emit_byte(0xd8);
4242     emit_byte(0xe1); /* subtract rounded from original */
4243     emit_byte(0xd9);
4244     emit_byte(0xf0); /* f2xm1 */
4245     emit_byte(0xdc);
4246     emit_byte(0x05);
4247 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4248 gbeauche 1.1 emit_byte(0xd9);
4249     emit_byte(0xfd); /* and scale it */
4250     emit_byte(0xdd);
4251     emit_byte(0xd9); /* take he rounded value off */
4252     tos_make(d); /* store to destination */
4253     }
4254     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4255    
4256     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4257     {
4258     int ds;
4259    
4260     usereg(s);
4261     ds=stackpos(s);
4262     emit_byte(0xd9);
4263     emit_byte(0xc0+ds); /* duplicate source */
4264     emit_byte(0xd9);
4265     emit_byte(0xea); /* fldl2e */
4266     emit_byte(0xde);
4267     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4268    
4269     emit_byte(0xd9);
4270     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4271     emit_byte(0xd9);
4272     emit_byte(0xfc); /* rndint */
4273     emit_byte(0xd9);
4274     emit_byte(0xc9); /* swap top two elements */
4275     emit_byte(0xd8);
4276     emit_byte(0xe1); /* subtract rounded from original */
4277     emit_byte(0xd9);
4278     emit_byte(0xf0); /* f2xm1 */
4279     emit_byte(0xdc);
4280     emit_byte(0x05);
4281 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4282 gbeauche 1.1 emit_byte(0xd9);
4283     emit_byte(0xfd); /* and scale it */
4284     emit_byte(0xdd);
4285     emit_byte(0xd9); /* take he rounded value off */
4286     tos_make(d); /* store to destination */
4287     }
4288     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4289    
4290     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4291     {
4292     int ds;
4293    
4294     usereg(s);
4295     ds=stackpos(s);
4296     emit_byte(0xd9);
4297     emit_byte(0xc0+ds); /* duplicate source */
4298     emit_byte(0xd9);
4299     emit_byte(0xe8); /* push '1' */
4300     emit_byte(0xd9);
4301     emit_byte(0xc9); /* swap top two */
4302     emit_byte(0xd9);
4303     emit_byte(0xf1); /* take 1*log2(x) */
4304     tos_make(d); /* store to destination */
4305     }
4306     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4307    
4308    
4309     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4310     {
4311     int ds;
4312    
4313     if (d!=s) {
4314     usereg(s);
4315     ds=stackpos(s);
4316     emit_byte(0xd9);
4317     emit_byte(0xc0+ds); /* duplicate source */
4318     emit_byte(0xd9);
4319     emit_byte(0xe0); /* take fchs */
4320     tos_make(d); /* store to destination */
4321     }
4322     else {
4323     make_tos(d);
4324     emit_byte(0xd9);
4325     emit_byte(0xe0); /* take fchs */
4326     }
4327     }
4328     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4329    
4330     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4331     {
4332     int ds;
4333    
4334     usereg(s);
4335     usereg(d);
4336    
4337     if (live.spos[s]==live.tos) {
4338     /* Source is on top of stack */
4339     ds=stackpos(d);
4340     emit_byte(0xdc);
4341     emit_byte(0xc0+ds); /* add source to dest*/
4342     }
4343     else {
4344     make_tos(d);
4345     ds=stackpos(s);
4346    
4347     emit_byte(0xd8);
4348     emit_byte(0xc0+ds); /* add source to dest*/
4349     }
4350     }
4351     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4352    
4353     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4354     {
4355     int ds;
4356    
4357     usereg(s);
4358     usereg(d);
4359    
4360     if (live.spos[s]==live.tos) {
4361     /* Source is on top of stack */
4362     ds=stackpos(d);
4363     emit_byte(0xdc);
4364     emit_byte(0xe8+ds); /* sub source from dest*/
4365     }
4366     else {
4367     make_tos(d);
4368     ds=stackpos(s);
4369    
4370     emit_byte(0xd8);
4371     emit_byte(0xe0+ds); /* sub src from dest */
4372     }
4373     }
4374     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4375    
4376     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4377     {
4378     int ds;
4379    
4380     usereg(s);
4381     usereg(d);
4382    
4383     make_tos(d);
4384     ds=stackpos(s);
4385    
4386     emit_byte(0xdd);
4387     emit_byte(0xe0+ds); /* cmp dest with source*/
4388     }
4389     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4390    
4391     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4392     {
4393     int ds;
4394    
4395     usereg(s);
4396     usereg(d);
4397    
4398     if (live.spos[s]==live.tos) {
4399     /* Source is on top of stack */
4400     ds=stackpos(d);
4401     emit_byte(0xdc);
4402     emit_byte(0xc8+ds); /* mul dest by source*/
4403     }
4404     else {
4405     make_tos(d);
4406     ds=stackpos(s);
4407    
4408     emit_byte(0xd8);
4409     emit_byte(0xc8+ds); /* mul dest by source*/
4410     }
4411     }
4412     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4413    
4414     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4415     {
4416     int ds;
4417    
4418     usereg(s);
4419     usereg(d);
4420    
4421     if (live.spos[s]==live.tos) {
4422     /* Source is on top of stack */
4423     ds=stackpos(d);
4424     emit_byte(0xdc);
4425     emit_byte(0xf8+ds); /* div dest by source */
4426     }
4427     else {
4428     make_tos(d);
4429     ds=stackpos(s);
4430    
4431     emit_byte(0xd8);
4432     emit_byte(0xf0+ds); /* div dest by source*/
4433     }
4434     }
4435     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4436    
4437     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4438     {
4439     int ds;
4440    
4441     usereg(s);
4442     usereg(d);
4443    
4444     make_tos2(d,s);
4445     ds=stackpos(s);
4446    
4447     if (ds!=1) {
4448     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4449     abort();
4450     }
4451     emit_byte(0xd9);
4452     emit_byte(0xf8); /* take rem from dest by source */
4453     }
4454     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4455    
4456     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4457     {
4458     int ds;
4459    
4460     usereg(s);
4461     usereg(d);
4462    
4463     make_tos2(d,s);
4464     ds=stackpos(s);
4465    
4466     if (ds!=1) {
4467     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4468     abort();
4469     }
4470     emit_byte(0xd9);
4471     emit_byte(0xf5); /* take rem1 from dest by source */
4472     }
4473     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4474    
4475    
4476     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4477     {
4478     make_tos(r);
4479     emit_byte(0xd9); /* ftst */
4480     emit_byte(0xe4);
4481     }
4482     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4483    
4484     /* %eax register is clobbered if target processor doesn't support fucomi */
4485     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4486     #define FFLAG_NREG EAX_INDEX
4487    
4488     static __inline__ void raw_fflags_into_flags(int r)
4489     {
4490     int p;
4491    
4492     usereg(r);
4493     p=stackpos(r);
4494    
4495     emit_byte(0xd9);
4496     emit_byte(0xee); /* Push 0 */
4497     emit_byte(0xd9);
4498     emit_byte(0xc9+p); /* swap top two around */
4499     if (have_cmov) {
4500     // gb-- fucomi is for P6 cores only, not K6-2 then...
4501     emit_byte(0xdb);
4502     emit_byte(0xe9+p); /* fucomi them */
4503     }
4504     else {
4505     emit_byte(0xdd);
4506     emit_byte(0xe1+p); /* fucom them */
4507     emit_byte(0x9b);
4508     emit_byte(0xdf);
4509     emit_byte(0xe0); /* fstsw ax */
4510     raw_sahf(0); /* sahf */
4511     }
4512     emit_byte(0xdd);
4513     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4514     }