ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.13
Committed: 2003-03-18T17:26:32Z (21 years, 6 months ago) by gbeauche
Branch: MAIN
Changes since 1.12: +968 -48 lines
Log Message:
Add new backend, disabled for until it's proofread and fully functional
Remove obsolete string-related instructions

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6     * Adaptation for Basilisk II and improvements, copyright 2000-2002
7     * Gwenole Beauchesne
8     *
9     * Basilisk II (C) 1997-2002 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45    
46     /* The register in which subroutines return an integer return value */
47     #define REG_RESULT 0
48    
49     /* The registers subroutines take their first and second argument in */
50     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
51     /* Handle the _fastcall parameters of ECX and EDX */
52     #define REG_PAR1 1
53     #define REG_PAR2 2
54     #else
55     #define REG_PAR1 0
56     #define REG_PAR2 2
57     #endif
58    
59     /* Three registers that are not used for any of the above */
60     #define REG_NOPAR1 6
61     #define REG_NOPAR2 5
62     #define REG_NOPAR3 3
63    
64     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     #define REG_PC_TMP 0
67     #else
68     #define REG_PC_TMP 1 /* Another register that is not the above */
69     #endif
70    
71     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
72     -1 if any reg will do */
73     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
74     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
75    
76     uae_s8 always_used[]={4,-1};
77     uae_s8 can_byte[]={0,1,2,3,-1};
78     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
79    
80     /* cpuopti mutate instruction handlers to assume registers are saved
81     by the caller */
82     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
83    
84     /* This *should* be the same as call_saved. But:
85     - We might not really know which registers are saved, and which aren't,
86     so we need to preserve some, but don't want to rely on everyone else
87     also saving those registers
88     - Special registers (such like the stack pointer) should not be "preserved"
89     by pushing, even though they are "saved" across function calls
90     */
91     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
92    
93     /* Whether classes of instructions do or don't clobber the native flags */
94     #define CLOBBER_MOV
95     #define CLOBBER_LEA
96     #define CLOBBER_CMOV
97     #define CLOBBER_POP
98     #define CLOBBER_PUSH
99     #define CLOBBER_SUB clobber_flags()
100     #define CLOBBER_SBB clobber_flags()
101     #define CLOBBER_CMP clobber_flags()
102     #define CLOBBER_ADD clobber_flags()
103     #define CLOBBER_ADC clobber_flags()
104     #define CLOBBER_AND clobber_flags()
105     #define CLOBBER_OR clobber_flags()
106     #define CLOBBER_XOR clobber_flags()
107    
108     #define CLOBBER_ROL clobber_flags()
109     #define CLOBBER_ROR clobber_flags()
110     #define CLOBBER_SHLL clobber_flags()
111     #define CLOBBER_SHRL clobber_flags()
112     #define CLOBBER_SHRA clobber_flags()
113     #define CLOBBER_TEST clobber_flags()
114     #define CLOBBER_CL16
115     #define CLOBBER_CL8
116     #define CLOBBER_SE16
117     #define CLOBBER_SE8
118     #define CLOBBER_ZE16
119     #define CLOBBER_ZE8
120     #define CLOBBER_SW16 clobber_flags()
121     #define CLOBBER_SW32
122     #define CLOBBER_SETCC
123     #define CLOBBER_MUL clobber_flags()
124     #define CLOBBER_BT clobber_flags()
125     #define CLOBBER_BSF clobber_flags()
126    
127 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
128     #if 0
129    
130     #if defined(__x86_64__)
131     #define X86_TARGET_64BIT 1
132     #endif
133     #define X86_FLAT_REGISTERS 0
134     #include "codegen_x86.h"
135    
136     #define x86_emit_byte(B) emit_byte(B)
137     #define x86_emit_word(W) emit_word(W)
138     #define x86_emit_long(L) emit_long(L)
139     #define x86_get_target() get_target()
140     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
141    
142     static void jit_fail(const char *msg, const char *file, int line, const char *function)
143     {
144     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
145     function, file, line, msg);
146     abort();
147     }
148    
149     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
150     {
151     PUSHLr(r);
152     }
153     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
154    
155     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
156     {
157     POPLr(r);
158     }
159     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
160    
161     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
162     {
163     BTLir(i, r);
164     }
165     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
166    
167     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
168     {
169     BTLrr(b, r);
170     }
171     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
172    
173     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
174     {
175     BTCLir(i, r);
176     }
177     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
178    
179     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
180     {
181     BTCLrr(b, r);
182     }
183     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
184    
185     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
186     {
187     BTRLir(i, r);
188     }
189     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
190    
191     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
192     {
193     BTRLrr(b, r);
194     }
195     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
196    
197     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
198     {
199     BTSLir(i, r);
200     }
201     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
202    
203     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
204     {
205     BTSLrr(b, r);
206     }
207     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
208    
209     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
210     {
211     SUBWir(i, d);
212     }
213     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
214    
215     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
216     {
217     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
218     }
219     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
220    
221     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
222     {
223     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
224     }
225     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
226    
227     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
228     {
229     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
230     }
231     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
232    
233     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
234     {
235     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
236     }
237     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
238    
239     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
240     {
241     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
242     }
243     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
244    
245     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
246     {
247     ROLBir(i, r);
248     }
249     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
250    
251     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
252     {
253     ROLWir(i, r);
254     }
255     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
256    
257     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
258     {
259     ROLLir(i, r);
260     }
261     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
262    
263     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
264     {
265     ROLLrr(r, d);
266     }
267     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
268    
269     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
270     {
271     ROLWrr(r, d);
272     }
273     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
274    
275     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
276     {
277     ROLBrr(r, d);
278     }
279     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
280    
281     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
282     {
283     SHLLrr(r, d);
284     }
285     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
286    
287     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
288     {
289     SHLWrr(r, d);
290     }
291     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
292    
293     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
294     {
295     SHLBrr(r, d);
296     }
297     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
298    
299     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
300     {
301     RORBir(i, r);
302     }
303     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
304    
305     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
306     {
307     RORWir(i, r);
308     }
309     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
310    
311     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
312     {
313     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
314     }
315     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
316    
317     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
318     {
319     RORLir(i, r);
320     }
321     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
322    
323     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
324     {
325     RORLrr(r, d);
326     }
327     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
328    
329     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
330     {
331     RORWrr(r, d);
332     }
333     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
334    
335     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
336     {
337     RORBrr(r, d);
338     }
339     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
340    
341     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
342     {
343     SHRLrr(r, d);
344     }
345     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
346    
347     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
348     {
349     SHRWrr(r, d);
350     }
351     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
352    
353     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
354     {
355     SHRBrr(r, d);
356     }
357     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
358    
359     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
360     {
361     abort();
362     }
363     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
364    
365     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
366     {
367     abort();
368     }
369     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
370    
371     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
372     {
373     abort();
374     }
375     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
376    
377     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
378     {
379     SHLLir(i, r);
380     }
381     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
382    
383     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
384     {
385     SHLWir(i, r);
386     }
387     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
388    
389     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
390     {
391     SHLBir(i, r);
392     }
393     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
394    
395     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
396     {
397     SHRLir(i, r);
398     }
399     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
400    
401     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
402     {
403     SHRWir(i, r);
404     }
405     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
406    
407     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
408     {
409     SHRBir(i, r);
410     }
411     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
412    
413     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
414     {
415     abort();
416     }
417     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
418    
419     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
420     {
421     abort();
422     }
423     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
424    
425     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
426     {
427     abort();
428     }
429     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
430    
431     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
432     {
433     SAHF();
434     }
435     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
436    
437     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
438     {
439     CPUID();
440     }
441     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
442    
443     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
444     {
445     LAHF();
446     }
447     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
448    
449     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
450     {
451     SETCCir(cc, d);
452     }
453     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
454    
455     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
456     {
457     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
458     }
459     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
460    
461     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
462     {
463     CMOVLrr(cc, s, d);
464     }
465     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
466    
467     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
468     {
469     BSFLrr(s, d);
470     }
471     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
472    
473     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
474     {
475     MOVSWLrr(s, d);
476     }
477     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
478    
479     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
480     {
481     MOVSBLrr(s, d);
482     }
483     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
484    
485     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
486     {
487     MOVZWLrr(s, d);
488     }
489     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
490    
491     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
492     {
493     MOVZBLrr(s, d);
494     }
495     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
496    
497     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
498     {
499     abort();
500     }
501     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
502    
503     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
504     {
505     abort();
506     }
507     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
508    
509     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
510     {
511     abort();
512     }
513     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
514    
515     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
516     {
517     abort();
518     }
519     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
520    
521     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
522     {
523     MOVBrr(s, d);
524     }
525     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
526    
527     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
528     {
529     MOVWrr(s, d);
530     }
531     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
532    
533     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
534     {
535     MOVLmr(0, baser, index, factor, d);
536     }
537     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
538    
539     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
540     {
541     MOVWmr(0, baser, index, factor, d);
542     }
543     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
544    
545     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
546     {
547     MOVBmr(0, baser, index, factor, d);
548     }
549     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
550    
551     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
552     {
553     MOVLrm(s, 0, baser, index, factor);
554     }
555     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
556    
557     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
558     {
559     MOVWrm(s, 0, baser, index, factor);
560     }
561     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
562    
563     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
564     {
565     MOVBrm(s, 0, baser, index, factor);
566     }
567     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
568    
569     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
570     {
571     MOVLrm(s, base, baser, index, factor);
572     }
573     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
574    
575     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
576     {
577     MOVWrm(s, base, baser, index, factor);
578     }
579     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
580    
581     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
582     {
583     MOVBrm(s, base, baser, index, factor);
584     }
585     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
586    
587     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
588     {
589     MOVLmr(base, baser, index, factor, d);
590     }
591     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
592    
593     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
594     {
595     MOVWmr(base, baser, index, factor, d);
596     }
597     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
598    
599     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
600     {
601     MOVBmr(base, baser, index, factor, d);
602     }
603     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
604    
605     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
606     {
607     MOVLmr(base, X86_NOREG, index, factor, d);
608     }
609     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
610    
611     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
612     {
613     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
614     }
615     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
616    
617     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
618     {
619     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
620     }
621     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
622    
623     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
624     {
625     MOVLmr(offset, s, X86_NOREG, 1, d);
626     }
627     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
628    
629     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
630     {
631     MOVWmr(offset, s, X86_NOREG, 1, d);
632     }
633     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
634    
635     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
636     {
637     MOVBmr(offset, s, X86_NOREG, 1, d);
638     }
639     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
640    
641     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
642     {
643     MOVLmr(offset, s, X86_NOREG, 1, d);
644     }
645     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
646    
647     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
648     {
649     MOVWmr(offset, s, X86_NOREG, 1, d);
650     }
651     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
652    
653     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
654     {
655     MOVBmr(offset, s, X86_NOREG, 1, d);
656     }
657     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
658    
659     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
660     {
661     MOVLim(i, offset, d, X86_NOREG, 1);
662     }
663     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
664    
665     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
666     {
667     MOVWim(i, offset, d, X86_NOREG, 1);
668     }
669     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
670    
671     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
672     {
673     MOVBim(i, offset, d, X86_NOREG, 1);
674     }
675     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
676    
677     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
678     {
679     MOVLrm(s, offset, d, X86_NOREG, 1);
680     }
681     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
682    
683     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
684     {
685     MOVWrm(s, offset, d, X86_NOREG, 1);
686     }
687     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
688    
689     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
690     {
691     MOVBrm(s, offset, d, X86_NOREG, 1);
692     }
693     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
694    
695     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
696     {
697     LEALmr(offset, s, X86_NOREG, 1, d);
698     }
699     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
700    
701     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
702     {
703     LEALmr(offset, s, index, factor, d);
704     }
705     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
706    
707     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
708     {
709     LEALmr(0, s, index, factor, d);
710     }
711     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
712    
713     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
714     {
715     MOVLrm(s, offset, d, X86_NOREG, 1);
716     }
717     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
718    
719     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
720     {
721     MOVWrm(s, offset, d, X86_NOREG, 1);
722     }
723     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
724    
725     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
726     {
727     MOVBrm(s, offset, d, X86_NOREG, 1);
728     }
729     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
730    
731     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
732     {
733     BSWAPLr(r);
734     }
735     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
736    
737     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
738     {
739     ROLWir(8, r);
740     }
741     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
742    
743     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
744     {
745     MOVLrr(s, d);
746     }
747     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
748    
749     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
750     {
751     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
752     }
753     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
754    
755     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
756     {
757     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
758     }
759     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
760    
761     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
762     {
763     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
764     }
765     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
766    
767     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
768     {
769     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
770     }
771     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
772    
773     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
774     {
775     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
776     }
777     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
778    
779     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
780     {
781     MOVLir(s, d);
782     }
783     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
784    
785     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
786     {
787     MOVWir(s, d);
788     }
789     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
790    
791     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
792     {
793     MOVBir(s, d);
794     }
795     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
796    
797     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
798     {
799     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
800     }
801     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
802    
803     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
804     {
805     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
806     }
807     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
808    
809     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
810     {
811     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
812     }
813     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
814    
815     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
816     {
817     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
818     }
819     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
820    
821     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
822     {
823     TESTLir(i, d);
824     }
825     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
826    
827     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
828     {
829     TESTLrr(s, d);
830     }
831     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
832    
833     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
834     {
835     TESTWrr(s, d);
836     }
837     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
838    
839     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
840     {
841     TESTBrr(s, d);
842     }
843     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
844    
845     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
846     {
847     ANDLir(i, d);
848     }
849     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
850    
851     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
852     {
853     ANDWir(i, d);
854     }
855     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
856    
857     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
858     {
859     ANDLrr(s, d);
860     }
861     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
862    
863     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
864     {
865     ANDWrr(s, d);
866     }
867     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
868    
869     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
870     {
871     ANDBrr(s, d);
872     }
873     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
874    
875     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
876     {
877     ORLir(i, d);
878     }
879     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
880    
881     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
882     {
883     ORLrr(s, d);
884     }
885     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
886    
887     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
888     {
889     ORWrr(s, d);
890     }
891     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
892    
893     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
894     {
895     ORBrr(s, d);
896     }
897     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
898    
899     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
900     {
901     ADCLrr(s, d);
902     }
903     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
904    
905     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
906     {
907     ADCWrr(s, d);
908     }
909     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
910    
911     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
912     {
913     ADCBrr(s, d);
914     }
915     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
916    
917     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
918     {
919     ADDLrr(s, d);
920     }
921     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
922    
923     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
924     {
925     ADDWrr(s, d);
926     }
927     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
928    
929     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
930     {
931     ADDBrr(s, d);
932     }
933     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
934    
935     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
936     {
937     SUBLir(i, d);
938     }
939     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
940    
941     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
942     {
943     SUBBir(i, d);
944     }
945     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
946    
947     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
948     {
949     ADDLir(i, d);
950     }
951     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
952    
953     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
954     {
955     ADDWir(i, d);
956     }
957     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
958    
959     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
960     {
961     ADDBir(i, d);
962     }
963     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
964    
965     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
966     {
967     SBBLrr(s, d);
968     }
969     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
970    
971     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
972     {
973     SBBWrr(s, d);
974     }
975     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
976    
977     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
978     {
979     SBBBrr(s, d);
980     }
981     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
982    
983     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
984     {
985     SUBLrr(s, d);
986     }
987     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
988    
989     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
990     {
991     SUBWrr(s, d);
992     }
993     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
994    
995     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
996     {
997     SUBBrr(s, d);
998     }
999     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1000    
1001     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1002     {
1003     CMPLrr(s, d);
1004     }
1005     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1006    
1007     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1008     {
1009     CMPLir(i, r);
1010     }
1011     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1012    
1013     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1014     {
1015     CMPWrr(s, d);
1016     }
1017     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1018    
1019     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1020     {
1021     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1022     }
1023     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1024    
1025     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1026     {
1027     CMPBir(i, d);
1028     }
1029     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1030    
1031     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1032     {
1033     CMPBrr(s, d);
1034     }
1035     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1036    
1037     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1038     {
1039     CMPLmr(offset, X86_NOREG, index, factor, d);
1040     }
1041     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1042    
1043     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1044     {
1045     XORLrr(s, d);
1046     }
1047     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1048    
1049     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1050     {
1051     XORWrr(s, d);
1052     }
1053     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1054    
1055     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1056     {
1057     XORBrr(s, d);
1058     }
1059     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1060    
1061     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1062     {
1063     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1064     }
1065     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1066    
1067     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1068     {
1069     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1070     }
1071     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1072    
1073     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1074     {
1075     XCHGLrr(r2, r1);
1076     }
1077     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1078    
1079     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1080     {
1081     PUSHFD();
1082     }
1083     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1084    
1085     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1086     {
1087     POPFD();
1088     }
1089     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1090    
1091     #else
1092    
1093 gbeauche 1.2 const bool optimize_accum = true;
1094 gbeauche 1.1 const bool optimize_imm8 = true;
1095     const bool optimize_shift_once = true;
1096    
1097     /*************************************************************************
1098     * Actual encoding of the instructions on the target CPU *
1099     *************************************************************************/
1100    
1101 gbeauche 1.2 static __inline__ int isaccum(int r)
1102     {
1103     return (r == EAX_INDEX);
1104     }
1105    
1106 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1107     {
1108     return (x>=-128 && x<=127);
1109     }
1110    
1111     static __inline__ int isword(uae_s32 x)
1112     {
1113     return (x>=-32768 && x<=32767);
1114     }
1115    
1116     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1117     {
1118     emit_byte(0x50+r);
1119     }
1120     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1121    
1122     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1123     {
1124     emit_byte(0x58+r);
1125     }
1126     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1127    
1128     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1129     {
1130     emit_byte(0x0f);
1131     emit_byte(0xba);
1132     emit_byte(0xe0+r);
1133     emit_byte(i);
1134     }
1135     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1136    
1137     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1138     {
1139     emit_byte(0x0f);
1140     emit_byte(0xa3);
1141     emit_byte(0xc0+8*b+r);
1142     }
1143     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1144    
1145     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1146     {
1147     emit_byte(0x0f);
1148     emit_byte(0xba);
1149     emit_byte(0xf8+r);
1150     emit_byte(i);
1151     }
1152     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1153    
1154     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1155     {
1156     emit_byte(0x0f);
1157     emit_byte(0xbb);
1158     emit_byte(0xc0+8*b+r);
1159     }
1160     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1161    
1162    
1163     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1164     {
1165     emit_byte(0x0f);
1166     emit_byte(0xba);
1167     emit_byte(0xf0+r);
1168     emit_byte(i);
1169     }
1170     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1171    
1172     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1173     {
1174     emit_byte(0x0f);
1175     emit_byte(0xb3);
1176     emit_byte(0xc0+8*b+r);
1177     }
1178     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1179    
1180     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1181     {
1182     emit_byte(0x0f);
1183     emit_byte(0xba);
1184     emit_byte(0xe8+r);
1185     emit_byte(i);
1186     }
1187     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1188    
1189     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1190     {
1191     emit_byte(0x0f);
1192     emit_byte(0xab);
1193     emit_byte(0xc0+8*b+r);
1194     }
1195     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1196    
1197     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1198     {
1199     emit_byte(0x66);
1200     if (isbyte(i)) {
1201     emit_byte(0x83);
1202     emit_byte(0xe8+d);
1203     emit_byte(i);
1204     }
1205     else {
1206 gbeauche 1.2 if (optimize_accum && isaccum(d))
1207     emit_byte(0x2d);
1208     else {
1209 gbeauche 1.1 emit_byte(0x81);
1210     emit_byte(0xe8+d);
1211 gbeauche 1.2 }
1212 gbeauche 1.1 emit_word(i);
1213     }
1214     }
1215     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1216    
1217    
1218     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1219     {
1220     emit_byte(0x8b);
1221     emit_byte(0x05+8*d);
1222     emit_long(s);
1223     }
1224     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1225    
1226     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1227     {
1228     emit_byte(0xc7);
1229     emit_byte(0x05);
1230     emit_long(d);
1231     emit_long(s);
1232     }
1233     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1234    
1235     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1236     {
1237     emit_byte(0x66);
1238     emit_byte(0xc7);
1239     emit_byte(0x05);
1240     emit_long(d);
1241     emit_word(s);
1242     }
1243     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1244    
1245     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1246     {
1247     emit_byte(0xc6);
1248     emit_byte(0x05);
1249     emit_long(d);
1250     emit_byte(s);
1251     }
1252     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1253    
1254     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1255     {
1256     if (optimize_shift_once && (i == 1)) {
1257     emit_byte(0xd0);
1258     emit_byte(0x05);
1259     emit_long(d);
1260     }
1261     else {
1262     emit_byte(0xc0);
1263     emit_byte(0x05);
1264     emit_long(d);
1265     emit_byte(i);
1266     }
1267     }
1268     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1269    
1270     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1271     {
1272     if (optimize_shift_once && (i == 1)) {
1273     emit_byte(0xd0);
1274     emit_byte(0xc0+r);
1275     }
1276     else {
1277     emit_byte(0xc0);
1278     emit_byte(0xc0+r);
1279     emit_byte(i);
1280     }
1281     }
1282     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1283    
1284     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1285     {
1286     emit_byte(0x66);
1287     emit_byte(0xc1);
1288     emit_byte(0xc0+r);
1289     emit_byte(i);
1290     }
1291     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1292    
1293     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1294     {
1295     if (optimize_shift_once && (i == 1)) {
1296     emit_byte(0xd1);
1297     emit_byte(0xc0+r);
1298     }
1299     else {
1300     emit_byte(0xc1);
1301     emit_byte(0xc0+r);
1302     emit_byte(i);
1303     }
1304     }
1305     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1306    
1307     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1308     {
1309     emit_byte(0xd3);
1310     emit_byte(0xc0+d);
1311     }
1312     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1313    
1314     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1315     {
1316     emit_byte(0x66);
1317     emit_byte(0xd3);
1318     emit_byte(0xc0+d);
1319     }
1320     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1321    
1322     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1323     {
1324     emit_byte(0xd2);
1325     emit_byte(0xc0+d);
1326     }
1327     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1328    
1329     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1330     {
1331     emit_byte(0xd3);
1332     emit_byte(0xe0+d);
1333     }
1334     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1335    
1336     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1337     {
1338     emit_byte(0x66);
1339     emit_byte(0xd3);
1340     emit_byte(0xe0+d);
1341     }
1342     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1343    
1344     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1345     {
1346     emit_byte(0xd2);
1347     emit_byte(0xe0+d);
1348     }
1349     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1350    
1351     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1352     {
1353     if (optimize_shift_once && (i == 1)) {
1354     emit_byte(0xd0);
1355     emit_byte(0xc8+r);
1356     }
1357     else {
1358     emit_byte(0xc0);
1359     emit_byte(0xc8+r);
1360     emit_byte(i);
1361     }
1362     }
1363     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1364    
1365     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1366     {
1367     emit_byte(0x66);
1368     emit_byte(0xc1);
1369     emit_byte(0xc8+r);
1370     emit_byte(i);
1371     }
1372     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1373    
1374     // gb-- used for making an fpcr value in compemu_fpp.cpp
1375     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1376     {
1377     emit_byte(0x0b);
1378     emit_byte(0x05+8*d);
1379     emit_long(s);
1380     }
1381     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1382    
1383     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1384     {
1385     if (optimize_shift_once && (i == 1)) {
1386     emit_byte(0xd1);
1387     emit_byte(0xc8+r);
1388     }
1389     else {
1390     emit_byte(0xc1);
1391     emit_byte(0xc8+r);
1392     emit_byte(i);
1393     }
1394     }
1395     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1396    
1397     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1398     {
1399     emit_byte(0xd3);
1400     emit_byte(0xc8+d);
1401     }
1402     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1403    
1404     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1405     {
1406     emit_byte(0x66);
1407     emit_byte(0xd3);
1408     emit_byte(0xc8+d);
1409     }
1410     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1411    
1412     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1413     {
1414     emit_byte(0xd2);
1415     emit_byte(0xc8+d);
1416     }
1417     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1418    
1419     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1420     {
1421     emit_byte(0xd3);
1422     emit_byte(0xe8+d);
1423     }
1424     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1425    
1426     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1427     {
1428     emit_byte(0x66);
1429     emit_byte(0xd3);
1430     emit_byte(0xe8+d);
1431     }
1432     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1433    
1434     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1435     {
1436     emit_byte(0xd2);
1437     emit_byte(0xe8+d);
1438     }
1439     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1440    
1441     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1442     {
1443     emit_byte(0xd3);
1444     emit_byte(0xf8+d);
1445     }
1446     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1447    
1448     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1449     {
1450     emit_byte(0x66);
1451     emit_byte(0xd3);
1452     emit_byte(0xf8+d);
1453     }
1454     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1455    
1456     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1457     {
1458     emit_byte(0xd2);
1459     emit_byte(0xf8+d);
1460     }
1461     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1462    
1463     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1464     {
1465     if (optimize_shift_once && (i == 1)) {
1466     emit_byte(0xd1);
1467     emit_byte(0xe0+r);
1468     }
1469     else {
1470     emit_byte(0xc1);
1471     emit_byte(0xe0+r);
1472     emit_byte(i);
1473     }
1474     }
1475     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1476    
1477     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1478     {
1479     emit_byte(0x66);
1480     emit_byte(0xc1);
1481     emit_byte(0xe0+r);
1482     emit_byte(i);
1483     }
1484     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1485    
1486     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1487     {
1488     if (optimize_shift_once && (i == 1)) {
1489     emit_byte(0xd0);
1490     emit_byte(0xe0+r);
1491     }
1492     else {
1493     emit_byte(0xc0);
1494     emit_byte(0xe0+r);
1495     emit_byte(i);
1496     }
1497     }
1498     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1499    
1500     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1501     {
1502     if (optimize_shift_once && (i == 1)) {
1503     emit_byte(0xd1);
1504     emit_byte(0xe8+r);
1505     }
1506     else {
1507     emit_byte(0xc1);
1508     emit_byte(0xe8+r);
1509     emit_byte(i);
1510     }
1511     }
1512     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1513    
1514     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1515     {
1516     emit_byte(0x66);
1517     emit_byte(0xc1);
1518     emit_byte(0xe8+r);
1519     emit_byte(i);
1520     }
1521     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1522    
1523     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1524     {
1525     if (optimize_shift_once && (i == 1)) {
1526     emit_byte(0xd0);
1527     emit_byte(0xe8+r);
1528     }
1529     else {
1530     emit_byte(0xc0);
1531     emit_byte(0xe8+r);
1532     emit_byte(i);
1533     }
1534     }
1535     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1536    
1537     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1538     {
1539     if (optimize_shift_once && (i == 1)) {
1540     emit_byte(0xd1);
1541     emit_byte(0xf8+r);
1542     }
1543     else {
1544     emit_byte(0xc1);
1545     emit_byte(0xf8+r);
1546     emit_byte(i);
1547     }
1548     }
1549     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1550    
1551     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1552     {
1553     emit_byte(0x66);
1554     emit_byte(0xc1);
1555     emit_byte(0xf8+r);
1556     emit_byte(i);
1557     }
1558     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1559    
1560     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1561     {
1562     if (optimize_shift_once && (i == 1)) {
1563     emit_byte(0xd0);
1564     emit_byte(0xf8+r);
1565     }
1566     else {
1567     emit_byte(0xc0);
1568     emit_byte(0xf8+r);
1569     emit_byte(i);
1570     }
1571     }
1572     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1573    
1574     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1575     {
1576     emit_byte(0x9e);
1577     }
1578     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1579    
1580     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1581     {
1582     emit_byte(0x0f);
1583     emit_byte(0xa2);
1584     }
1585     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1586    
1587     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1588     {
1589     emit_byte(0x9f);
1590     }
1591     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1592    
1593     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1594     {
1595     emit_byte(0x0f);
1596     emit_byte(0x90+cc);
1597     emit_byte(0xc0+d);
1598     }
1599     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1600    
1601     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1602     {
1603     emit_byte(0x0f);
1604     emit_byte(0x90+cc);
1605     emit_byte(0x05);
1606     emit_long(d);
1607     }
1608     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1609    
1610     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1611     {
1612     if (have_cmov) {
1613     emit_byte(0x0f);
1614     emit_byte(0x40+cc);
1615     emit_byte(0xc0+8*d+s);
1616     }
1617     else { /* replacement using branch and mov */
1618     int uncc=(cc^1);
1619     emit_byte(0x70+uncc);
1620     emit_byte(2); /* skip next 2 bytes if not cc=true */
1621     emit_byte(0x89);
1622     emit_byte(0xc0+8*s+d);
1623     }
1624     }
1625     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1626    
1627     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1628     {
1629     emit_byte(0x0f);
1630     emit_byte(0xbc);
1631     emit_byte(0xc0+8*d+s);
1632     }
1633     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1634    
1635     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1636     {
1637     emit_byte(0x0f);
1638     emit_byte(0xbf);
1639     emit_byte(0xc0+8*d+s);
1640     }
1641     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1642    
1643     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1644     {
1645     emit_byte(0x0f);
1646     emit_byte(0xbe);
1647     emit_byte(0xc0+8*d+s);
1648     }
1649     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1650    
1651     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1652     {
1653     emit_byte(0x0f);
1654     emit_byte(0xb7);
1655     emit_byte(0xc0+8*d+s);
1656     }
1657     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1658    
1659     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1660     {
1661     emit_byte(0x0f);
1662     emit_byte(0xb6);
1663     emit_byte(0xc0+8*d+s);
1664     }
1665     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1666    
1667     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1668     {
1669     emit_byte(0x0f);
1670     emit_byte(0xaf);
1671     emit_byte(0xc0+8*d+s);
1672     }
1673     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1674    
1675     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1676     {
1677     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1678     abort();
1679     emit_byte(0xf7);
1680     emit_byte(0xea);
1681     }
1682     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1683    
1684     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1685     {
1686     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1687     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1688     abort();
1689     }
1690     emit_byte(0xf7);
1691     emit_byte(0xe2);
1692     }
1693     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1694    
1695     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1696     {
1697     abort(); /* %^$&%^$%#^ x86! */
1698     emit_byte(0x0f);
1699     emit_byte(0xaf);
1700     emit_byte(0xc0+8*d+s);
1701     }
1702     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1703    
1704     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1705     {
1706     emit_byte(0x88);
1707     emit_byte(0xc0+8*s+d);
1708     }
1709     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1710    
1711     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1712     {
1713     emit_byte(0x66);
1714     emit_byte(0x89);
1715     emit_byte(0xc0+8*s+d);
1716     }
1717     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1718    
1719     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1720     {
1721     int isebp=(baser==5)?0x40:0;
1722     int fi;
1723    
1724     switch(factor) {
1725     case 1: fi=0; break;
1726     case 2: fi=1; break;
1727     case 4: fi=2; break;
1728     case 8: fi=3; break;
1729     default: abort();
1730     }
1731    
1732    
1733     emit_byte(0x8b);
1734     emit_byte(0x04+8*d+isebp);
1735     emit_byte(baser+8*index+0x40*fi);
1736     if (isebp)
1737     emit_byte(0x00);
1738     }
1739     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1740    
1741     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1742     {
1743     int fi;
1744     int isebp;
1745    
1746     switch(factor) {
1747     case 1: fi=0; break;
1748     case 2: fi=1; break;
1749     case 4: fi=2; break;
1750     case 8: fi=3; break;
1751     default: abort();
1752     }
1753     isebp=(baser==5)?0x40:0;
1754    
1755     emit_byte(0x66);
1756     emit_byte(0x8b);
1757     emit_byte(0x04+8*d+isebp);
1758     emit_byte(baser+8*index+0x40*fi);
1759     if (isebp)
1760     emit_byte(0x00);
1761     }
1762     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1763    
1764     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1765     {
1766     int fi;
1767     int isebp;
1768    
1769     switch(factor) {
1770     case 1: fi=0; break;
1771     case 2: fi=1; break;
1772     case 4: fi=2; break;
1773     case 8: fi=3; break;
1774     default: abort();
1775     }
1776     isebp=(baser==5)?0x40:0;
1777    
1778     emit_byte(0x8a);
1779     emit_byte(0x04+8*d+isebp);
1780     emit_byte(baser+8*index+0x40*fi);
1781     if (isebp)
1782     emit_byte(0x00);
1783     }
1784     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1785    
1786     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1787     {
1788     int fi;
1789     int isebp;
1790    
1791     switch(factor) {
1792     case 1: fi=0; break;
1793     case 2: fi=1; break;
1794     case 4: fi=2; break;
1795     case 8: fi=3; break;
1796     default: abort();
1797     }
1798    
1799    
1800     isebp=(baser==5)?0x40:0;
1801    
1802     emit_byte(0x89);
1803     emit_byte(0x04+8*s+isebp);
1804     emit_byte(baser+8*index+0x40*fi);
1805     if (isebp)
1806     emit_byte(0x00);
1807     }
1808     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1809    
1810     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1811     {
1812     int fi;
1813     int isebp;
1814    
1815     switch(factor) {
1816     case 1: fi=0; break;
1817     case 2: fi=1; break;
1818     case 4: fi=2; break;
1819     case 8: fi=3; break;
1820     default: abort();
1821     }
1822     isebp=(baser==5)?0x40:0;
1823    
1824     emit_byte(0x66);
1825     emit_byte(0x89);
1826     emit_byte(0x04+8*s+isebp);
1827     emit_byte(baser+8*index+0x40*fi);
1828     if (isebp)
1829     emit_byte(0x00);
1830     }
1831     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1832    
1833     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1834     {
1835     int fi;
1836     int isebp;
1837    
1838     switch(factor) {
1839     case 1: fi=0; break;
1840     case 2: fi=1; break;
1841     case 4: fi=2; break;
1842     case 8: fi=3; break;
1843     default: abort();
1844     }
1845     isebp=(baser==5)?0x40:0;
1846    
1847     emit_byte(0x88);
1848     emit_byte(0x04+8*s+isebp);
1849     emit_byte(baser+8*index+0x40*fi);
1850     if (isebp)
1851     emit_byte(0x00);
1852     }
1853     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1854    
1855     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1856     {
1857     int fi;
1858    
1859     switch(factor) {
1860     case 1: fi=0; break;
1861     case 2: fi=1; break;
1862     case 4: fi=2; break;
1863     case 8: fi=3; break;
1864     default: abort();
1865     }
1866    
1867     emit_byte(0x89);
1868     emit_byte(0x84+8*s);
1869     emit_byte(baser+8*index+0x40*fi);
1870     emit_long(base);
1871     }
1872     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1873    
1874     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1875     {
1876     int fi;
1877    
1878     switch(factor) {
1879     case 1: fi=0; break;
1880     case 2: fi=1; break;
1881     case 4: fi=2; break;
1882     case 8: fi=3; break;
1883     default: abort();
1884     }
1885    
1886     emit_byte(0x66);
1887     emit_byte(0x89);
1888     emit_byte(0x84+8*s);
1889     emit_byte(baser+8*index+0x40*fi);
1890     emit_long(base);
1891     }
1892     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1893    
1894     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1895     {
1896     int fi;
1897    
1898     switch(factor) {
1899     case 1: fi=0; break;
1900     case 2: fi=1; break;
1901     case 4: fi=2; break;
1902     case 8: fi=3; break;
1903     default: abort();
1904     }
1905    
1906     emit_byte(0x88);
1907     emit_byte(0x84+8*s);
1908     emit_byte(baser+8*index+0x40*fi);
1909     emit_long(base);
1910     }
1911     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1912    
1913     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
1914     {
1915     int fi;
1916    
1917     switch(factor) {
1918     case 1: fi=0; break;
1919     case 2: fi=1; break;
1920     case 4: fi=2; break;
1921     case 8: fi=3; break;
1922     default: abort();
1923     }
1924    
1925     emit_byte(0x8b);
1926     emit_byte(0x84+8*d);
1927     emit_byte(baser+8*index+0x40*fi);
1928     emit_long(base);
1929     }
1930     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
1931    
1932     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
1933     {
1934     int fi;
1935    
1936     switch(factor) {
1937     case 1: fi=0; break;
1938     case 2: fi=1; break;
1939     case 4: fi=2; break;
1940     case 8: fi=3; break;
1941     default: abort();
1942     }
1943    
1944     emit_byte(0x66);
1945     emit_byte(0x8b);
1946     emit_byte(0x84+8*d);
1947     emit_byte(baser+8*index+0x40*fi);
1948     emit_long(base);
1949     }
1950     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
1951    
1952     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
1953     {
1954     int fi;
1955    
1956     switch(factor) {
1957     case 1: fi=0; break;
1958     case 2: fi=1; break;
1959     case 4: fi=2; break;
1960     case 8: fi=3; break;
1961     default: abort();
1962     }
1963    
1964     emit_byte(0x8a);
1965     emit_byte(0x84+8*d);
1966     emit_byte(baser+8*index+0x40*fi);
1967     emit_long(base);
1968     }
1969     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
1970    
1971     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1972     {
1973     int fi;
1974     switch(factor) {
1975     case 1: fi=0; break;
1976     case 2: fi=1; break;
1977     case 4: fi=2; break;
1978     case 8: fi=3; break;
1979     default:
1980     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1981     abort();
1982     }
1983     emit_byte(0x8b);
1984     emit_byte(0x04+8*d);
1985     emit_byte(0x05+8*index+64*fi);
1986     emit_long(base);
1987     }
1988     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1989    
1990     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1991     {
1992     int fi;
1993     switch(factor) {
1994     case 1: fi=0; break;
1995     case 2: fi=1; break;
1996     case 4: fi=2; break;
1997     case 8: fi=3; break;
1998     default:
1999     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2000     abort();
2001     }
2002     if (have_cmov) {
2003     emit_byte(0x0f);
2004     emit_byte(0x40+cond);
2005     emit_byte(0x04+8*d);
2006     emit_byte(0x05+8*index+64*fi);
2007     emit_long(base);
2008     }
2009     else { /* replacement using branch and mov */
2010     int uncc=(cond^1);
2011     emit_byte(0x70+uncc);
2012     emit_byte(7); /* skip next 7 bytes if not cc=true */
2013     emit_byte(0x8b);
2014     emit_byte(0x04+8*d);
2015     emit_byte(0x05+8*index+64*fi);
2016     emit_long(base);
2017     }
2018     }
2019     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2020    
2021     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2022     {
2023     if (have_cmov) {
2024     emit_byte(0x0f);
2025     emit_byte(0x40+cond);
2026     emit_byte(0x05+8*d);
2027     emit_long(mem);
2028     }
2029     else { /* replacement using branch and mov */
2030     int uncc=(cond^1);
2031     emit_byte(0x70+uncc);
2032     emit_byte(6); /* skip next 6 bytes if not cc=true */
2033     emit_byte(0x8b);
2034     emit_byte(0x05+8*d);
2035     emit_long(mem);
2036     }
2037     }
2038     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2039    
2040     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2041     {
2042 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2043 gbeauche 1.1 emit_byte(0x8b);
2044     emit_byte(0x40+8*d+s);
2045     emit_byte(offset);
2046     }
2047     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2048    
2049     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2050     {
2051 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2052 gbeauche 1.1 emit_byte(0x66);
2053     emit_byte(0x8b);
2054     emit_byte(0x40+8*d+s);
2055     emit_byte(offset);
2056     }
2057     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2058    
2059     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2060     {
2061 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2062 gbeauche 1.1 emit_byte(0x8a);
2063     emit_byte(0x40+8*d+s);
2064     emit_byte(offset);
2065     }
2066     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2067    
2068     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2069     {
2070     emit_byte(0x8b);
2071     emit_byte(0x80+8*d+s);
2072     emit_long(offset);
2073     }
2074     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2075    
2076     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2077     {
2078     emit_byte(0x66);
2079     emit_byte(0x8b);
2080     emit_byte(0x80+8*d+s);
2081     emit_long(offset);
2082     }
2083     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2084    
2085     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2086     {
2087     emit_byte(0x8a);
2088     emit_byte(0x80+8*d+s);
2089     emit_long(offset);
2090     }
2091     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2092    
2093     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2094     {
2095 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2096 gbeauche 1.1 emit_byte(0xc7);
2097     emit_byte(0x40+d);
2098     emit_byte(offset);
2099     emit_long(i);
2100     }
2101     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2102    
2103     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2104     {
2105 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2106 gbeauche 1.1 emit_byte(0x66);
2107     emit_byte(0xc7);
2108     emit_byte(0x40+d);
2109     emit_byte(offset);
2110     emit_word(i);
2111     }
2112     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2113    
2114     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2115     {
2116 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2117 gbeauche 1.1 emit_byte(0xc6);
2118     emit_byte(0x40+d);
2119     emit_byte(offset);
2120     emit_byte(i);
2121     }
2122     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2123    
2124     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2125     {
2126 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2127 gbeauche 1.1 emit_byte(0x89);
2128     emit_byte(0x40+8*s+d);
2129     emit_byte(offset);
2130     }
2131     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2132    
2133     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2134     {
2135 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2136 gbeauche 1.1 emit_byte(0x66);
2137     emit_byte(0x89);
2138     emit_byte(0x40+8*s+d);
2139     emit_byte(offset);
2140     }
2141     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2142    
2143     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2144     {
2145 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2146 gbeauche 1.1 emit_byte(0x88);
2147     emit_byte(0x40+8*s+d);
2148     emit_byte(offset);
2149     }
2150     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2151    
2152     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2153     {
2154     if (optimize_imm8 && isbyte(offset)) {
2155     emit_byte(0x8d);
2156     emit_byte(0x40+8*d+s);
2157     emit_byte(offset);
2158     }
2159     else {
2160     emit_byte(0x8d);
2161     emit_byte(0x80+8*d+s);
2162     emit_long(offset);
2163     }
2164     }
2165     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2166    
2167     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2168     {
2169     int fi;
2170    
2171     switch(factor) {
2172     case 1: fi=0; break;
2173     case 2: fi=1; break;
2174     case 4: fi=2; break;
2175     case 8: fi=3; break;
2176     default: abort();
2177     }
2178    
2179     if (optimize_imm8 && isbyte(offset)) {
2180     emit_byte(0x8d);
2181     emit_byte(0x44+8*d);
2182     emit_byte(0x40*fi+8*index+s);
2183     emit_byte(offset);
2184     }
2185     else {
2186     emit_byte(0x8d);
2187     emit_byte(0x84+8*d);
2188     emit_byte(0x40*fi+8*index+s);
2189     emit_long(offset);
2190     }
2191     }
2192     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2193    
2194     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2195     {
2196     int isebp=(s==5)?0x40:0;
2197     int fi;
2198    
2199     switch(factor) {
2200     case 1: fi=0; break;
2201     case 2: fi=1; break;
2202     case 4: fi=2; break;
2203     case 8: fi=3; break;
2204     default: abort();
2205     }
2206    
2207     emit_byte(0x8d);
2208     emit_byte(0x04+8*d+isebp);
2209     emit_byte(0x40*fi+8*index+s);
2210     if (isebp)
2211     emit_byte(0);
2212     }
2213     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2214    
2215     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2216     {
2217     if (optimize_imm8 && isbyte(offset)) {
2218     emit_byte(0x89);
2219     emit_byte(0x40+8*s+d);
2220     emit_byte(offset);
2221     }
2222     else {
2223     emit_byte(0x89);
2224     emit_byte(0x80+8*s+d);
2225     emit_long(offset);
2226     }
2227     }
2228     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2229    
2230     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2231     {
2232     emit_byte(0x66);
2233     emit_byte(0x89);
2234     emit_byte(0x80+8*s+d);
2235     emit_long(offset);
2236     }
2237     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2238    
2239     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2240     {
2241     if (optimize_imm8 && isbyte(offset)) {
2242     emit_byte(0x88);
2243     emit_byte(0x40+8*s+d);
2244     emit_byte(offset);
2245     }
2246     else {
2247     emit_byte(0x88);
2248     emit_byte(0x80+8*s+d);
2249     emit_long(offset);
2250     }
2251     }
2252     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2253    
2254     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2255     {
2256     emit_byte(0x0f);
2257     emit_byte(0xc8+r);
2258     }
2259     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2260    
2261     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2262     {
2263     emit_byte(0x66);
2264     emit_byte(0xc1);
2265     emit_byte(0xc0+r);
2266     emit_byte(0x08);
2267     }
2268     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2269    
2270     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2271     {
2272     emit_byte(0x89);
2273     emit_byte(0xc0+8*s+d);
2274     }
2275     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2276    
2277     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2278     {
2279     emit_byte(0x89);
2280     emit_byte(0x05+8*s);
2281     emit_long(d);
2282     }
2283     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2284    
2285     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2286     {
2287     emit_byte(0x66);
2288     emit_byte(0x89);
2289     emit_byte(0x05+8*s);
2290     emit_long(d);
2291     }
2292     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2293    
2294     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2295     {
2296     emit_byte(0x66);
2297     emit_byte(0x8b);
2298     emit_byte(0x05+8*d);
2299     emit_long(s);
2300     }
2301     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2302    
2303     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2304     {
2305     emit_byte(0x88);
2306     emit_byte(0x05+8*s);
2307     emit_long(d);
2308     }
2309     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2310    
2311     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2312     {
2313     emit_byte(0x8a);
2314     emit_byte(0x05+8*d);
2315     emit_long(s);
2316     }
2317     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2318    
2319     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2320     {
2321     emit_byte(0xb8+d);
2322     emit_long(s);
2323     }
2324     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2325    
2326     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2327     {
2328     emit_byte(0x66);
2329     emit_byte(0xb8+d);
2330     emit_word(s);
2331     }
2332     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2333    
2334     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2335     {
2336     emit_byte(0xb0+d);
2337     emit_byte(s);
2338     }
2339     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2340    
2341     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2342     {
2343     emit_byte(0x81);
2344     emit_byte(0x15);
2345     emit_long(d);
2346     emit_long(s);
2347     }
2348     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2349    
2350     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2351     {
2352     if (optimize_imm8 && isbyte(s)) {
2353     emit_byte(0x83);
2354     emit_byte(0x05);
2355     emit_long(d);
2356     emit_byte(s);
2357     }
2358     else {
2359     emit_byte(0x81);
2360     emit_byte(0x05);
2361     emit_long(d);
2362     emit_long(s);
2363     }
2364     }
2365     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2366    
2367     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2368     {
2369     emit_byte(0x66);
2370     emit_byte(0x81);
2371     emit_byte(0x05);
2372     emit_long(d);
2373     emit_word(s);
2374     }
2375     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2376    
2377     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2378     {
2379     emit_byte(0x80);
2380     emit_byte(0x05);
2381     emit_long(d);
2382     emit_byte(s);
2383     }
2384     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2385    
2386     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2387     {
2388 gbeauche 1.2 if (optimize_accum && isaccum(d))
2389     emit_byte(0xa9);
2390     else {
2391 gbeauche 1.1 emit_byte(0xf7);
2392     emit_byte(0xc0+d);
2393 gbeauche 1.2 }
2394 gbeauche 1.1 emit_long(i);
2395     }
2396     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2397    
2398     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2399     {
2400     emit_byte(0x85);
2401     emit_byte(0xc0+8*s+d);
2402     }
2403     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2404    
2405     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2406     {
2407     emit_byte(0x66);
2408     emit_byte(0x85);
2409     emit_byte(0xc0+8*s+d);
2410     }
2411     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2412    
2413     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2414     {
2415     emit_byte(0x84);
2416     emit_byte(0xc0+8*s+d);
2417     }
2418     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2419    
2420     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2421     {
2422     if (optimize_imm8 && isbyte(i)) {
2423 gbeauche 1.2 emit_byte(0x83);
2424     emit_byte(0xe0+d);
2425     emit_byte(i);
2426 gbeauche 1.1 }
2427     else {
2428 gbeauche 1.2 if (optimize_accum && isaccum(d))
2429     emit_byte(0x25);
2430     else {
2431     emit_byte(0x81);
2432     emit_byte(0xe0+d);
2433     }
2434     emit_long(i);
2435 gbeauche 1.1 }
2436     }
2437     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2438    
2439     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2440     {
2441 gbeauche 1.2 emit_byte(0x66);
2442     if (optimize_imm8 && isbyte(i)) {
2443     emit_byte(0x83);
2444     emit_byte(0xe0+d);
2445     emit_byte(i);
2446     }
2447     else {
2448     if (optimize_accum && isaccum(d))
2449     emit_byte(0x25);
2450     else {
2451     emit_byte(0x81);
2452     emit_byte(0xe0+d);
2453     }
2454     emit_word(i);
2455     }
2456 gbeauche 1.1 }
2457     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2458    
2459     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2460     {
2461     emit_byte(0x21);
2462     emit_byte(0xc0+8*s+d);
2463     }
2464     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2465    
2466     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2467     {
2468     emit_byte(0x66);
2469     emit_byte(0x21);
2470     emit_byte(0xc0+8*s+d);
2471     }
2472     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2473    
2474     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2475     {
2476     emit_byte(0x20);
2477     emit_byte(0xc0+8*s+d);
2478     }
2479     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2480    
2481     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2482     {
2483     if (optimize_imm8 && isbyte(i)) {
2484     emit_byte(0x83);
2485     emit_byte(0xc8+d);
2486     emit_byte(i);
2487     }
2488     else {
2489 gbeauche 1.2 if (optimize_accum && isaccum(d))
2490     emit_byte(0x0d);
2491     else {
2492 gbeauche 1.1 emit_byte(0x81);
2493     emit_byte(0xc8+d);
2494 gbeauche 1.2 }
2495 gbeauche 1.1 emit_long(i);
2496     }
2497     }
2498     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2499    
2500     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2501     {
2502     emit_byte(0x09);
2503     emit_byte(0xc0+8*s+d);
2504     }
2505     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2506    
2507     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2508     {
2509     emit_byte(0x66);
2510     emit_byte(0x09);
2511     emit_byte(0xc0+8*s+d);
2512     }
2513     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2514    
2515     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2516     {
2517     emit_byte(0x08);
2518     emit_byte(0xc0+8*s+d);
2519     }
2520     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2521    
2522     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2523     {
2524     emit_byte(0x11);
2525     emit_byte(0xc0+8*s+d);
2526     }
2527     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2528    
2529     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2530     {
2531     emit_byte(0x66);
2532     emit_byte(0x11);
2533     emit_byte(0xc0+8*s+d);
2534     }
2535     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2536    
2537     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2538     {
2539     emit_byte(0x10);
2540     emit_byte(0xc0+8*s+d);
2541     }
2542     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2543    
2544     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2545     {
2546     emit_byte(0x01);
2547     emit_byte(0xc0+8*s+d);
2548     }
2549     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2550    
2551     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2552     {
2553     emit_byte(0x66);
2554     emit_byte(0x01);
2555     emit_byte(0xc0+8*s+d);
2556     }
2557     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2558    
2559     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2560     {
2561     emit_byte(0x00);
2562     emit_byte(0xc0+8*s+d);
2563     }
2564     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2565    
2566     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2567     {
2568     if (isbyte(i)) {
2569     emit_byte(0x83);
2570     emit_byte(0xe8+d);
2571     emit_byte(i);
2572     }
2573     else {
2574 gbeauche 1.2 if (optimize_accum && isaccum(d))
2575     emit_byte(0x2d);
2576     else {
2577 gbeauche 1.1 emit_byte(0x81);
2578     emit_byte(0xe8+d);
2579 gbeauche 1.2 }
2580 gbeauche 1.1 emit_long(i);
2581     }
2582     }
2583     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2584    
2585     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2586     {
2587 gbeauche 1.2 if (optimize_accum && isaccum(d))
2588     emit_byte(0x2c);
2589     else {
2590 gbeauche 1.1 emit_byte(0x80);
2591     emit_byte(0xe8+d);
2592 gbeauche 1.2 }
2593 gbeauche 1.1 emit_byte(i);
2594     }
2595     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2596    
2597     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2598     {
2599     if (isbyte(i)) {
2600     emit_byte(0x83);
2601     emit_byte(0xc0+d);
2602     emit_byte(i);
2603     }
2604     else {
2605 gbeauche 1.2 if (optimize_accum && isaccum(d))
2606     emit_byte(0x05);
2607     else {
2608 gbeauche 1.1 emit_byte(0x81);
2609     emit_byte(0xc0+d);
2610 gbeauche 1.2 }
2611 gbeauche 1.1 emit_long(i);
2612     }
2613     }
2614     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2615    
2616     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2617     {
2618 gbeauche 1.2 emit_byte(0x66);
2619 gbeauche 1.1 if (isbyte(i)) {
2620     emit_byte(0x83);
2621     emit_byte(0xc0+d);
2622     emit_byte(i);
2623     }
2624     else {
2625 gbeauche 1.2 if (optimize_accum && isaccum(d))
2626     emit_byte(0x05);
2627     else {
2628 gbeauche 1.1 emit_byte(0x81);
2629     emit_byte(0xc0+d);
2630 gbeauche 1.2 }
2631 gbeauche 1.1 emit_word(i);
2632     }
2633     }
2634     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2635    
2636     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2637     {
2638 gbeauche 1.2 if (optimize_accum && isaccum(d))
2639     emit_byte(0x04);
2640     else {
2641     emit_byte(0x80);
2642     emit_byte(0xc0+d);
2643     }
2644 gbeauche 1.1 emit_byte(i);
2645     }
2646     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2647    
2648     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2649     {
2650     emit_byte(0x19);
2651     emit_byte(0xc0+8*s+d);
2652     }
2653     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2654    
2655     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2656     {
2657     emit_byte(0x66);
2658     emit_byte(0x19);
2659     emit_byte(0xc0+8*s+d);
2660     }
2661     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2662    
2663     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2664     {
2665     emit_byte(0x18);
2666     emit_byte(0xc0+8*s+d);
2667     }
2668     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2669    
2670     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2671     {
2672     emit_byte(0x29);
2673     emit_byte(0xc0+8*s+d);
2674     }
2675     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2676    
2677     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2678     {
2679     emit_byte(0x66);
2680     emit_byte(0x29);
2681     emit_byte(0xc0+8*s+d);
2682     }
2683     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2684    
2685     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2686     {
2687     emit_byte(0x28);
2688     emit_byte(0xc0+8*s+d);
2689     }
2690     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2691    
2692     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2693     {
2694     emit_byte(0x39);
2695     emit_byte(0xc0+8*s+d);
2696     }
2697     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2698    
2699     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2700     {
2701     if (optimize_imm8 && isbyte(i)) {
2702     emit_byte(0x83);
2703     emit_byte(0xf8+r);
2704     emit_byte(i);
2705     }
2706     else {
2707 gbeauche 1.2 if (optimize_accum && isaccum(r))
2708     emit_byte(0x3d);
2709     else {
2710 gbeauche 1.1 emit_byte(0x81);
2711     emit_byte(0xf8+r);
2712 gbeauche 1.2 }
2713 gbeauche 1.1 emit_long(i);
2714     }
2715     }
2716     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2717    
2718     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2719     {
2720     emit_byte(0x66);
2721     emit_byte(0x39);
2722     emit_byte(0xc0+8*s+d);
2723     }
2724     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2725    
2726 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2727     {
2728     emit_byte(0x80);
2729     emit_byte(0x3d);
2730     emit_long(d);
2731     emit_byte(s);
2732     }
2733     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2734    
2735 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2736     {
2737 gbeauche 1.2 if (optimize_accum && isaccum(d))
2738     emit_byte(0x3c);
2739     else {
2740 gbeauche 1.1 emit_byte(0x80);
2741     emit_byte(0xf8+d);
2742 gbeauche 1.2 }
2743 gbeauche 1.1 emit_byte(i);
2744     }
2745     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2746    
2747     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2748     {
2749     emit_byte(0x38);
2750     emit_byte(0xc0+8*s+d);
2751     }
2752     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2753    
2754     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2755     {
2756     int fi;
2757    
2758     switch(factor) {
2759     case 1: fi=0; break;
2760     case 2: fi=1; break;
2761     case 4: fi=2; break;
2762     case 8: fi=3; break;
2763     default: abort();
2764     }
2765     emit_byte(0x39);
2766     emit_byte(0x04+8*d);
2767     emit_byte(5+8*index+0x40*fi);
2768     emit_long(offset);
2769     }
2770     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2771    
2772     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2773     {
2774     emit_byte(0x31);
2775     emit_byte(0xc0+8*s+d);
2776     }
2777     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2778    
2779     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2780     {
2781     emit_byte(0x66);
2782     emit_byte(0x31);
2783     emit_byte(0xc0+8*s+d);
2784     }
2785     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2786    
2787     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2788     {
2789     emit_byte(0x30);
2790     emit_byte(0xc0+8*s+d);
2791     }
2792     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2793    
2794     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2795     {
2796     if (optimize_imm8 && isbyte(s)) {
2797     emit_byte(0x83);
2798     emit_byte(0x2d);
2799     emit_long(d);
2800     emit_byte(s);
2801     }
2802     else {
2803     emit_byte(0x81);
2804     emit_byte(0x2d);
2805     emit_long(d);
2806     emit_long(s);
2807     }
2808     }
2809     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2810    
2811     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2812     {
2813     if (optimize_imm8 && isbyte(s)) {
2814     emit_byte(0x83);
2815     emit_byte(0x3d);
2816     emit_long(d);
2817     emit_byte(s);
2818     }
2819     else {
2820     emit_byte(0x81);
2821     emit_byte(0x3d);
2822     emit_long(d);
2823     emit_long(s);
2824     }
2825     }
2826     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2827    
2828     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2829     {
2830     emit_byte(0x87);
2831     emit_byte(0xc0+8*r1+r2);
2832     }
2833     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2834    
2835     /*************************************************************************
2836     * FIXME: mem access modes probably wrong *
2837     *************************************************************************/
2838    
2839     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2840     {
2841     emit_byte(0x9c);
2842     }
2843     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2844    
2845     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2846     {
2847     emit_byte(0x9d);
2848     }
2849     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2850 gbeauche 1.13
2851     #endif
2852 gbeauche 1.1
2853     /*************************************************************************
2854     * Unoptimizable stuff --- jump *
2855     *************************************************************************/
2856    
2857     static __inline__ void raw_call_r(R4 r)
2858     {
2859     emit_byte(0xff);
2860     emit_byte(0xd0+r);
2861 gbeauche 1.5 }
2862    
2863     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2864     {
2865     int mu;
2866     switch(m) {
2867     case 1: mu=0; break;
2868     case 2: mu=1; break;
2869     case 4: mu=2; break;
2870     case 8: mu=3; break;
2871     default: abort();
2872     }
2873     emit_byte(0xff);
2874     emit_byte(0x14);
2875     emit_byte(0x05+8*r+0x40*mu);
2876     emit_long(base);
2877 gbeauche 1.1 }
2878    
2879     static __inline__ void raw_jmp_r(R4 r)
2880     {
2881     emit_byte(0xff);
2882     emit_byte(0xe0+r);
2883     }
2884    
2885     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2886     {
2887     int mu;
2888     switch(m) {
2889     case 1: mu=0; break;
2890     case 2: mu=1; break;
2891     case 4: mu=2; break;
2892     case 8: mu=3; break;
2893     default: abort();
2894     }
2895     emit_byte(0xff);
2896     emit_byte(0x24);
2897     emit_byte(0x05+8*r+0x40*mu);
2898     emit_long(base);
2899     }
2900    
2901     static __inline__ void raw_jmp_m(uae_u32 base)
2902     {
2903     emit_byte(0xff);
2904     emit_byte(0x25);
2905     emit_long(base);
2906     }
2907    
2908    
2909     static __inline__ void raw_call(uae_u32 t)
2910     {
2911     emit_byte(0xe8);
2912     emit_long(t-(uae_u32)target-4);
2913     }
2914    
2915     static __inline__ void raw_jmp(uae_u32 t)
2916     {
2917     emit_byte(0xe9);
2918     emit_long(t-(uae_u32)target-4);
2919     }
2920    
2921     static __inline__ void raw_jl(uae_u32 t)
2922     {
2923     emit_byte(0x0f);
2924     emit_byte(0x8c);
2925     emit_long(t-(uae_u32)target-4);
2926     }
2927    
2928     static __inline__ void raw_jz(uae_u32 t)
2929     {
2930     emit_byte(0x0f);
2931     emit_byte(0x84);
2932     emit_long(t-(uae_u32)target-4);
2933     }
2934    
2935     static __inline__ void raw_jnz(uae_u32 t)
2936     {
2937     emit_byte(0x0f);
2938     emit_byte(0x85);
2939     emit_long(t-(uae_u32)target-4);
2940     }
2941    
2942     static __inline__ void raw_jnz_l_oponly(void)
2943     {
2944     emit_byte(0x0f);
2945     emit_byte(0x85);
2946     }
2947    
2948     static __inline__ void raw_jcc_l_oponly(int cc)
2949     {
2950     emit_byte(0x0f);
2951     emit_byte(0x80+cc);
2952     }
2953    
2954     static __inline__ void raw_jnz_b_oponly(void)
2955     {
2956     emit_byte(0x75);
2957     }
2958    
2959     static __inline__ void raw_jz_b_oponly(void)
2960     {
2961     emit_byte(0x74);
2962     }
2963    
2964     static __inline__ void raw_jcc_b_oponly(int cc)
2965     {
2966     emit_byte(0x70+cc);
2967     }
2968    
2969     static __inline__ void raw_jmp_l_oponly(void)
2970     {
2971     emit_byte(0xe9);
2972     }
2973    
2974     static __inline__ void raw_jmp_b_oponly(void)
2975     {
2976     emit_byte(0xeb);
2977     }
2978    
2979     static __inline__ void raw_ret(void)
2980     {
2981     emit_byte(0xc3);
2982     }
2983    
2984     static __inline__ void raw_nop(void)
2985     {
2986     emit_byte(0x90);
2987     }
2988    
2989 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
2990     {
2991     /* Source: GNU Binutils 2.12.90.0.15 */
2992     /* Various efficient no-op patterns for aligning code labels.
2993     Note: Don't try to assemble the instructions in the comments.
2994     0L and 0w are not legal. */
2995     static const uae_u8 f32_1[] =
2996     {0x90}; /* nop */
2997     static const uae_u8 f32_2[] =
2998     {0x89,0xf6}; /* movl %esi,%esi */
2999     static const uae_u8 f32_3[] =
3000     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3001     static const uae_u8 f32_4[] =
3002     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3003     static const uae_u8 f32_5[] =
3004     {0x90, /* nop */
3005     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3006     static const uae_u8 f32_6[] =
3007     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3008     static const uae_u8 f32_7[] =
3009     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3010     static const uae_u8 f32_8[] =
3011     {0x90, /* nop */
3012     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3013     static const uae_u8 f32_9[] =
3014     {0x89,0xf6, /* movl %esi,%esi */
3015     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3016     static const uae_u8 f32_10[] =
3017     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3018     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3019     static const uae_u8 f32_11[] =
3020     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3021     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3022     static const uae_u8 f32_12[] =
3023     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3024     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3025     static const uae_u8 f32_13[] =
3026     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3027     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3028     static const uae_u8 f32_14[] =
3029     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3030     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3031     static const uae_u8 f32_15[] =
3032     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3033     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3034     static const uae_u8 f32_16[] =
3035     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3036     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3037     static const uae_u8 *const f32_patt[] = {
3038     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3039     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3040     };
3041    
3042     int nloops = nbytes / 16;
3043     while (nloops-- > 0)
3044     emit_block(f32_16, sizeof(f32_16));
3045    
3046     nbytes %= 16;
3047     if (nbytes)
3048     emit_block(f32_patt[nbytes - 1], nbytes);
3049     }
3050    
3051 gbeauche 1.1
3052     /*************************************************************************
3053     * Flag handling, to and fro UAE flag register *
3054     *************************************************************************/
3055    
3056     #ifdef SAHF_SETO_PROFITABLE
3057    
3058     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3059    
3060     static __inline__ void raw_flags_to_reg(int r)
3061     {
3062     raw_lahf(0); /* Most flags in AH */
3063     //raw_setcc(r,0); /* V flag in AL */
3064     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
3065    
3066     #if 1 /* Let's avoid those nasty partial register stalls */
3067     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
3068     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
3069     //live.state[FLAGTMP].status=CLEAN;
3070     live.state[FLAGTMP].status=INMEM;
3071     live.state[FLAGTMP].realreg=-1;
3072     /* We just "evicted" FLAGTMP. */
3073     if (live.nat[r].nholds!=1) {
3074     /* Huh? */
3075     abort();
3076     }
3077     live.nat[r].nholds=0;
3078     #endif
3079     }
3080    
3081     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3082     static __inline__ void raw_reg_to_flags(int r)
3083     {
3084     raw_cmp_b_ri(r,-127); /* set V */
3085     raw_sahf(0);
3086     }
3087    
3088     #else
3089    
3090     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3091     static __inline__ void raw_flags_to_reg(int r)
3092     {
3093     raw_pushfl();
3094     raw_pop_l_r(r);
3095     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
3096     // live.state[FLAGTMP].status=CLEAN;
3097     live.state[FLAGTMP].status=INMEM;
3098     live.state[FLAGTMP].realreg=-1;
3099     /* We just "evicted" FLAGTMP. */
3100     if (live.nat[r].nholds!=1) {
3101     /* Huh? */
3102     abort();
3103     }
3104     live.nat[r].nholds=0;
3105     }
3106    
3107     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3108     static __inline__ void raw_reg_to_flags(int r)
3109     {
3110     raw_push_l_r(r);
3111     raw_popfl();
3112     }
3113    
3114     #endif
3115    
3116     /* Apparently, there are enough instructions between flag store and
3117     flag reload to avoid the partial memory stall */
3118     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3119     {
3120     #if 1
3121     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3122     #else
3123     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3124     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
3125     #endif
3126     }
3127    
3128     /* FLAGX is byte sized, and we *do* write it at that size */
3129     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3130     {
3131     if (live.nat[target].canbyte)
3132     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3133     else if (live.nat[target].canword)
3134     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
3135     else
3136     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3137     }
3138    
3139 gbeauche 1.11 #define NATIVE_FLAG_Z 0x40
3140     static __inline__ void raw_flags_set_zero(int f, int r, int t)
3141     {
3142     // FIXME: this is really suboptimal
3143     raw_pushfl();
3144     raw_pop_l_r(f);
3145     raw_and_l_ri(f,~NATIVE_FLAG_Z);
3146     raw_test_l_rr(r,r);
3147     raw_mov_l_ri(r,0);
3148     raw_mov_l_ri(t,NATIVE_FLAG_Z);
3149     raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
3150     raw_or_l(f,r);
3151     raw_push_l_r(f);
3152     raw_popfl();
3153     }
3154 gbeauche 1.1
3155     static __inline__ void raw_inc_sp(int off)
3156     {
3157 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
3158 gbeauche 1.1 }
3159    
3160     /*************************************************************************
3161     * Handling mistaken direct memory access *
3162     *************************************************************************/
3163    
3164     // gb-- I don't need that part for JIT Basilisk II
3165     #if defined(NATMEM_OFFSET) && 0
3166     #include <asm/sigcontext.h>
3167     #include <signal.h>
3168    
3169     #define SIG_READ 1
3170     #define SIG_WRITE 2
3171    
3172     static int in_handler=0;
3173     static uae_u8 veccode[256];
3174    
3175     static void vec(int x, struct sigcontext sc)
3176     {
3177     uae_u8* i=(uae_u8*)sc.eip;
3178     uae_u32 addr=sc.cr2;
3179     int r=-1;
3180     int size=4;
3181     int dir=-1;
3182     int len=0;
3183     int j;
3184    
3185     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3186     if (!canbang)
3187     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3188     if (in_handler)
3189     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3190    
3191     if (canbang && i>=compiled_code && i<=current_compile_p) {
3192     if (*i==0x66) {
3193     i++;
3194     size=2;
3195     len++;
3196     }
3197    
3198     switch(i[0]) {
3199     case 0x8a:
3200     if ((i[1]&0xc0)==0x80) {
3201     r=(i[1]>>3)&7;
3202     dir=SIG_READ;
3203     size=1;
3204     len+=6;
3205     break;
3206     }
3207     break;
3208     case 0x88:
3209     if ((i[1]&0xc0)==0x80) {
3210     r=(i[1]>>3)&7;
3211     dir=SIG_WRITE;
3212     size=1;
3213     len+=6;
3214     break;
3215     }
3216     break;
3217     case 0x8b:
3218     if ((i[1]&0xc0)==0x80) {
3219     r=(i[1]>>3)&7;
3220     dir=SIG_READ;
3221     len+=6;
3222     break;
3223     }
3224     if ((i[1]&0xc0)==0x40) {
3225     r=(i[1]>>3)&7;
3226     dir=SIG_READ;
3227     len+=3;
3228     break;
3229     }
3230     break;
3231     case 0x89:
3232     if ((i[1]&0xc0)==0x80) {
3233     r=(i[1]>>3)&7;
3234     dir=SIG_WRITE;
3235     len+=6;
3236     break;
3237     }
3238     if ((i[1]&0xc0)==0x40) {
3239     r=(i[1]>>3)&7;
3240     dir=SIG_WRITE;
3241     len+=3;
3242     break;
3243     }
3244     break;
3245     }
3246     }
3247    
3248     if (r!=-1) {
3249     void* pr=NULL;
3250     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3251    
3252     switch(r) {
3253     case 0: pr=&(sc.eax); break;
3254     case 1: pr=&(sc.ecx); break;
3255     case 2: pr=&(sc.edx); break;
3256     case 3: pr=&(sc.ebx); break;
3257     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3258     case 5: pr=(size>1)?
3259     (void*)(&(sc.ebp)):
3260     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3261     case 6: pr=(size>1)?
3262     (void*)(&(sc.esi)):
3263     (void*)(((uae_u8*)&(sc.edx))+1); break;
3264     case 7: pr=(size>1)?
3265     (void*)(&(sc.edi)):
3266     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3267     default: abort();
3268     }
3269     if (pr) {
3270     blockinfo* bi;
3271    
3272     if (currprefs.comp_oldsegv) {
3273     addr-=NATMEM_OFFSET;
3274    
3275     if ((addr>=0x10000000 && addr<0x40000000) ||
3276     (addr>=0x50000000)) {
3277     write_log("Suspicious address in %x SEGV handler.\n",addr);
3278     }
3279     if (dir==SIG_READ) {
3280     switch(size) {
3281     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3282     case 2: *((uae_u16*)pr)=get_word(addr); break;
3283     case 4: *((uae_u32*)pr)=get_long(addr); break;
3284     default: abort();
3285     }
3286     }
3287     else { /* write */
3288     switch(size) {
3289     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3290     case 2: put_word(addr,*((uae_u16*)pr)); break;
3291     case 4: put_long(addr,*((uae_u32*)pr)); break;
3292     default: abort();
3293     }
3294     }
3295     write_log("Handled one access!\n");
3296     fflush(stdout);
3297     segvcount++;
3298     sc.eip+=len;
3299     }
3300     else {
3301     void* tmp=target;
3302     int i;
3303     uae_u8 vecbuf[5];
3304    
3305     addr-=NATMEM_OFFSET;
3306    
3307     if ((addr>=0x10000000 && addr<0x40000000) ||
3308     (addr>=0x50000000)) {
3309     write_log("Suspicious address in %x SEGV handler.\n",addr);
3310     }
3311    
3312     target=(uae_u8*)sc.eip;
3313     for (i=0;i<5;i++)
3314     vecbuf[i]=target[i];
3315     emit_byte(0xe9);
3316     emit_long((uae_u32)veccode-(uae_u32)target-4);
3317     write_log("Create jump to %p\n",veccode);
3318    
3319     write_log("Handled one access!\n");
3320     fflush(stdout);
3321     segvcount++;
3322    
3323     target=veccode;
3324    
3325     if (dir==SIG_READ) {
3326     switch(size) {
3327     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3328     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3329     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3330     default: abort();
3331     }
3332     }
3333     else { /* write */
3334     switch(size) {
3335     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3336     case 2: put_word(addr,*((uae_u16*)pr)); break;
3337     case 4: put_long(addr,*((uae_u32*)pr)); break;
3338     default: abort();
3339     }
3340     }
3341     for (i=0;i<5;i++)
3342     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3343     raw_mov_l_mi((uae_u32)&in_handler,0);
3344     emit_byte(0xe9);
3345     emit_long(sc.eip+len-(uae_u32)target-4);
3346     in_handler=1;
3347     target=tmp;
3348     }
3349     bi=active;
3350     while (bi) {
3351     if (bi->handler &&
3352     (uae_u8*)bi->direct_handler<=i &&
3353     (uae_u8*)bi->nexthandler>i) {
3354     write_log("deleted trigger (%p<%p<%p) %p\n",
3355     bi->handler,
3356     i,
3357     bi->nexthandler,
3358     bi->pc_p);
3359     invalidate_block(bi);
3360     raise_in_cl_list(bi);
3361     set_special(0);
3362     return;
3363     }
3364     bi=bi->next;
3365     }
3366     /* Not found in the active list. Might be a rom routine that
3367     is in the dormant list */
3368     bi=dormant;
3369     while (bi) {
3370     if (bi->handler &&
3371     (uae_u8*)bi->direct_handler<=i &&
3372     (uae_u8*)bi->nexthandler>i) {
3373     write_log("deleted trigger (%p<%p<%p) %p\n",
3374     bi->handler,
3375     i,
3376     bi->nexthandler,
3377     bi->pc_p);
3378     invalidate_block(bi);
3379     raise_in_cl_list(bi);
3380     set_special(0);
3381     return;
3382     }
3383     bi=bi->next;
3384     }
3385     write_log("Huh? Could not find trigger!\n");
3386     return;
3387     }
3388     }
3389     write_log("Can't handle access!\n");
3390     for (j=0;j<10;j++) {
3391     write_log("instruction byte %2d is %02x\n",j,i[j]);
3392     }
3393     write_log("Please send the above info (starting at \"fault address\") to\n"
3394     "bmeyer@csse.monash.edu.au\n"
3395     "This shouldn't happen ;-)\n");
3396     fflush(stdout);
3397     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3398     }
3399     #endif
3400    
3401    
3402     /*************************************************************************
3403     * Checking for CPU features *
3404     *************************************************************************/
3405    
3406 gbeauche 1.3 struct cpuinfo_x86 {
3407     uae_u8 x86; // CPU family
3408     uae_u8 x86_vendor; // CPU vendor
3409     uae_u8 x86_processor; // CPU canonical processor type
3410     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3411     uae_u32 x86_hwcap;
3412     uae_u8 x86_model;
3413     uae_u8 x86_mask;
3414     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3415     char x86_vendor_id[16];
3416     };
3417     struct cpuinfo_x86 cpuinfo;
3418    
3419     enum {
3420     X86_VENDOR_INTEL = 0,
3421     X86_VENDOR_CYRIX = 1,
3422     X86_VENDOR_AMD = 2,
3423     X86_VENDOR_UMC = 3,
3424     X86_VENDOR_NEXGEN = 4,
3425     X86_VENDOR_CENTAUR = 5,
3426     X86_VENDOR_RISE = 6,
3427     X86_VENDOR_TRANSMETA = 7,
3428     X86_VENDOR_NSC = 8,
3429     X86_VENDOR_UNKNOWN = 0xff
3430     };
3431    
3432     enum {
3433     X86_PROCESSOR_I386, /* 80386 */
3434     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3435     X86_PROCESSOR_PENTIUM,
3436     X86_PROCESSOR_PENTIUMPRO,
3437     X86_PROCESSOR_K6,
3438     X86_PROCESSOR_ATHLON,
3439     X86_PROCESSOR_PENTIUM4,
3440     X86_PROCESSOR_max
3441     };
3442    
3443     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3444     "80386",
3445     "80486",
3446     "Pentium",
3447     "PentiumPro",
3448     "K6",
3449     "Athlon",
3450     "Pentium4"
3451     };
3452    
3453     static struct ptt {
3454     const int align_loop;
3455     const int align_loop_max_skip;
3456     const int align_jump;
3457     const int align_jump_max_skip;
3458     const int align_func;
3459     }
3460     x86_alignments[X86_PROCESSOR_max] = {
3461     { 4, 3, 4, 3, 4 },
3462     { 16, 15, 16, 15, 16 },
3463     { 16, 7, 16, 7, 16 },
3464     { 16, 15, 16, 7, 16 },
3465     { 32, 7, 32, 7, 32 },
3466 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3467 gbeauche 1.3 { 0, 0, 0, 0, 0 }
3468     };
3469 gbeauche 1.1
3470 gbeauche 1.3 static void
3471     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3472 gbeauche 1.1 {
3473 gbeauche 1.3 char *v = c->x86_vendor_id;
3474    
3475     if (!strcmp(v, "GenuineIntel"))
3476     c->x86_vendor = X86_VENDOR_INTEL;
3477     else if (!strcmp(v, "AuthenticAMD"))
3478     c->x86_vendor = X86_VENDOR_AMD;
3479     else if (!strcmp(v, "CyrixInstead"))
3480     c->x86_vendor = X86_VENDOR_CYRIX;
3481     else if (!strcmp(v, "Geode by NSC"))
3482     c->x86_vendor = X86_VENDOR_NSC;
3483     else if (!strcmp(v, "UMC UMC UMC "))
3484     c->x86_vendor = X86_VENDOR_UMC;
3485     else if (!strcmp(v, "CentaurHauls"))
3486     c->x86_vendor = X86_VENDOR_CENTAUR;
3487     else if (!strcmp(v, "NexGenDriven"))
3488     c->x86_vendor = X86_VENDOR_NEXGEN;
3489     else if (!strcmp(v, "RiseRiseRise"))
3490     c->x86_vendor = X86_VENDOR_RISE;
3491     else if (!strcmp(v, "GenuineTMx86") ||
3492     !strcmp(v, "TransmetaCPU"))
3493     c->x86_vendor = X86_VENDOR_TRANSMETA;
3494     else
3495     c->x86_vendor = X86_VENDOR_UNKNOWN;
3496     }
3497 gbeauche 1.1
3498 gbeauche 1.3 static void
3499     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3500     {
3501     static uae_u8 cpuid_space[256];
3502     uae_u8* tmp=get_target();
3503 gbeauche 1.1
3504 gbeauche 1.3 set_target(cpuid_space);
3505     raw_push_l_r(0); /* eax */
3506     raw_push_l_r(1); /* ecx */
3507     raw_push_l_r(2); /* edx */
3508     raw_push_l_r(3); /* ebx */
3509     raw_mov_l_rm(0,(uae_u32)&op);
3510     raw_cpuid(0);
3511     if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
3512     if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
3513     if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
3514     if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
3515     raw_pop_l_r(3);
3516     raw_pop_l_r(2);
3517     raw_pop_l_r(1);
3518     raw_pop_l_r(0);
3519     raw_ret();
3520     set_target(tmp);
3521 gbeauche 1.1
3522 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3523 gbeauche 1.1 }
3524    
3525 gbeauche 1.3 static void
3526     raw_init_cpu(void)
3527 gbeauche 1.1 {
3528 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3529    
3530     /* Defaults */
3531     c->x86_vendor = X86_VENDOR_UNKNOWN;
3532     c->cpuid_level = -1; /* CPUID not detected */
3533     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3534     c->x86_vendor_id[0] = '\0'; /* Unset */
3535     c->x86_hwcap = 0;
3536    
3537     /* Get vendor name */
3538     c->x86_vendor_id[12] = '\0';
3539     cpuid(0x00000000,
3540     (uae_u32 *)&c->cpuid_level,
3541     (uae_u32 *)&c->x86_vendor_id[0],
3542     (uae_u32 *)&c->x86_vendor_id[8],
3543     (uae_u32 *)&c->x86_vendor_id[4]);
3544     x86_get_cpu_vendor(c);
3545    
3546     /* Intel-defined flags: level 0x00000001 */
3547     c->x86_brand_id = 0;
3548     if ( c->cpuid_level >= 0x00000001 ) {
3549     uae_u32 tfms, brand_id;
3550     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3551     c->x86 = (tfms >> 8) & 15;
3552     c->x86_model = (tfms >> 4) & 15;
3553     c->x86_brand_id = brand_id & 0xff;
3554     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
3555     (c->x86 == 0xf)) {
3556     /* AMD Extended Family and Model Values */
3557     c->x86 += (tfms >> 20) & 0xff;
3558     c->x86_model += (tfms >> 12) & 0xf0;
3559     }
3560     c->x86_mask = tfms & 15;
3561     } else {
3562     /* Have CPUID level 0 only - unheard of */
3563     c->x86 = 4;
3564     }
3565    
3566     /* Canonicalize processor ID */
3567     c->x86_processor = X86_PROCESSOR_max;
3568     switch (c->x86) {
3569     case 3:
3570     c->x86_processor = X86_PROCESSOR_I386;
3571     break;
3572     case 4:
3573     c->x86_processor = X86_PROCESSOR_I486;
3574     break;
3575     case 5:
3576     if (c->x86_vendor == X86_VENDOR_AMD)
3577     c->x86_processor = X86_PROCESSOR_K6;
3578     else
3579     c->x86_processor = X86_PROCESSOR_PENTIUM;
3580     break;
3581     case 6:
3582     if (c->x86_vendor == X86_VENDOR_AMD)
3583     c->x86_processor = X86_PROCESSOR_ATHLON;
3584     else
3585     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3586     break;
3587     case 15:
3588     if (c->x86_vendor == X86_VENDOR_INTEL) {
3589     /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
3590     if (c->x86_brand_id >= 8)
3591     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3592     }
3593     break;
3594     }
3595     if (c->x86_processor == X86_PROCESSOR_max) {
3596     fprintf(stderr, "Error: unknown processor type\n");
3597     fprintf(stderr, " Family : %d\n", c->x86);
3598     fprintf(stderr, " Model : %d\n", c->x86_model);
3599     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3600     if (c->x86_brand_id)
3601     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3602     abort();
3603     }
3604    
3605     /* Have CMOV support? */
3606     have_cmov = (c->x86_hwcap & (1 << 15)) && true;
3607    
3608     /* Can the host CPU suffer from partial register stalls? */
3609     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3610     #if 1
3611     /* It appears that partial register writes are a bad idea even on
3612 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3613     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3614 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3615     have_rat_stall = true;
3616 gbeauche 1.1 #endif
3617 gbeauche 1.3
3618     /* Alignments */
3619     if (tune_alignment) {
3620     align_loops = x86_alignments[c->x86_processor].align_loop;
3621     align_jumps = x86_alignments[c->x86_processor].align_jump;
3622     }
3623    
3624     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3625     c->cpuid_level, c->x86_vendor_id,
3626     x86_processor_string_table[c->x86_processor]);
3627 gbeauche 1.1 }
3628    
3629 gbeauche 1.10 static bool target_check_bsf(void)
3630     {
3631     bool mismatch = false;
3632     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3633     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3634     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3635     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3636     for (int value = -1; value <= 1; value++) {
3637     int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3638     int tmp = value;
3639     __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3640 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3641 gbeauche 1.10 int OF = (flags >> 11) & 1;
3642     int SF = (flags >> 7) & 1;
3643     int ZF = (flags >> 6) & 1;
3644     int CF = flags & 1;
3645     tmp = (value == 0);
3646     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3647     mismatch = true;
3648     }
3649     }}}}
3650     if (mismatch)
3651     write_log("Target CPU defines all flags on BSF instruction\n");
3652     return !mismatch;
3653     }
3654    
3655 gbeauche 1.1
3656     /*************************************************************************
3657     * FPU stuff *
3658     *************************************************************************/
3659    
3660    
3661     static __inline__ void raw_fp_init(void)
3662     {
3663     int i;
3664    
3665     for (i=0;i<N_FREGS;i++)
3666     live.spos[i]=-2;
3667     live.tos=-1; /* Stack is empty */
3668     }
3669    
3670     static __inline__ void raw_fp_cleanup_drop(void)
3671     {
3672     #if 0
3673     /* using FINIT instead of popping all the entries.
3674     Seems to have side effects --- there is display corruption in
3675     Quake when this is used */
3676     if (live.tos>1) {
3677     emit_byte(0x9b);
3678     emit_byte(0xdb);
3679     emit_byte(0xe3);
3680     live.tos=-1;
3681     }
3682     #endif
3683     while (live.tos>=1) {
3684     emit_byte(0xde);
3685     emit_byte(0xd9);
3686     live.tos-=2;
3687     }
3688     while (live.tos>=0) {
3689     emit_byte(0xdd);
3690     emit_byte(0xd8);
3691     live.tos--;
3692     }
3693     raw_fp_init();
3694     }
3695    
3696     static __inline__ void make_tos(int r)
3697     {
3698     int p,q;
3699    
3700     if (live.spos[r]<0) { /* Register not yet on stack */
3701     emit_byte(0xd9);
3702     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3703     live.tos++;
3704     live.spos[r]=live.tos;
3705     live.onstack[live.tos]=r;
3706     return;
3707     }
3708     /* Register is on stack */
3709     if (live.tos==live.spos[r])
3710     return;
3711     p=live.spos[r];
3712     q=live.onstack[live.tos];
3713    
3714     emit_byte(0xd9);
3715     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3716     live.onstack[live.tos]=r;
3717     live.spos[r]=live.tos;
3718     live.onstack[p]=q;
3719     live.spos[q]=p;
3720     }
3721    
3722     static __inline__ void make_tos2(int r, int r2)
3723     {
3724     int q;
3725    
3726     make_tos(r2); /* Put the reg that's supposed to end up in position2
3727     on top */
3728    
3729     if (live.spos[r]<0) { /* Register not yet on stack */
3730     make_tos(r); /* This will extend the stack */
3731     return;
3732     }
3733     /* Register is on stack */
3734     emit_byte(0xd9);
3735     emit_byte(0xc9); /* Move r2 into position 2 */
3736    
3737     q=live.onstack[live.tos-1];
3738     live.onstack[live.tos]=q;
3739     live.spos[q]=live.tos;
3740     live.onstack[live.tos-1]=r2;
3741     live.spos[r2]=live.tos-1;
3742    
3743     make_tos(r); /* And r into 1 */
3744     }
3745    
3746     static __inline__ int stackpos(int r)
3747     {
3748     if (live.spos[r]<0)
3749     abort();
3750     if (live.tos<live.spos[r]) {
3751     printf("Looking for spos for fnreg %d\n",r);
3752     abort();
3753     }
3754     return live.tos-live.spos[r];
3755     }
3756    
3757     static __inline__ void usereg(int r)
3758     {
3759     if (live.spos[r]<0)
3760     make_tos(r);
3761     }
3762    
3763     /* This is called with one FP value in a reg *above* tos, which it will
3764     pop off the stack if necessary */
3765     static __inline__ void tos_make(int r)
3766     {
3767     if (live.spos[r]<0) {
3768     live.tos++;
3769     live.spos[r]=live.tos;
3770     live.onstack[live.tos]=r;
3771     return;
3772     }
3773     emit_byte(0xdd);
3774     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3775     and pop it*/
3776     }
3777    
3778    
3779     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3780     {
3781     make_tos(r);
3782     emit_byte(0xdd);
3783     emit_byte(0x15);
3784     emit_long(m);
3785     }
3786     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3787    
3788     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
3789     {
3790     make_tos(r);
3791     emit_byte(0xdd);
3792     emit_byte(0x1d);
3793     emit_long(m);
3794     live.onstack[live.tos]=-1;
3795     live.tos--;
3796     live.spos[r]=-2;
3797     }
3798     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3799    
3800     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3801     {
3802     emit_byte(0xdd);
3803     emit_byte(0x05);
3804     emit_long(m);
3805     tos_make(r);
3806     }
3807     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3808    
3809     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3810     {
3811     emit_byte(0xdb);
3812     emit_byte(0x05);
3813     emit_long(m);
3814     tos_make(r);
3815     }
3816     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3817    
3818     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
3819     {
3820     make_tos(r);
3821     emit_byte(0xdb);
3822     emit_byte(0x15);
3823     emit_long(m);
3824     }
3825     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
3826    
3827     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
3828     {
3829     emit_byte(0xd9);
3830     emit_byte(0x05);
3831     emit_long(m);
3832     tos_make(r);
3833     }
3834     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
3835    
3836     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
3837     {
3838     make_tos(r);
3839     emit_byte(0xd9);
3840     emit_byte(0x15);
3841     emit_long(m);
3842     }
3843     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
3844    
3845     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3846     {
3847     int rs;
3848    
3849     /* Stupid x87 can't write a long double to mem without popping the
3850     stack! */
3851     usereg(r);
3852     rs=stackpos(r);
3853     emit_byte(0xd9); /* Get a copy to the top of stack */
3854     emit_byte(0xc0+rs);
3855    
3856     emit_byte(0xdb); /* store and pop it */
3857     emit_byte(0x3d);
3858     emit_long(m);
3859     }
3860     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3861    
3862     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
3863     {
3864     int rs;
3865    
3866     make_tos(r);
3867     emit_byte(0xdb); /* store and pop it */
3868     emit_byte(0x3d);
3869     emit_long(m);
3870     live.onstack[live.tos]=-1;
3871     live.tos--;
3872     live.spos[r]=-2;
3873     }
3874     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3875    
3876     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
3877     {
3878     emit_byte(0xdb);
3879     emit_byte(0x2d);
3880     emit_long(m);
3881     tos_make(r);
3882     }
3883     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
3884    
3885     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
3886     {
3887     emit_byte(0xd9);
3888     emit_byte(0xeb);
3889     tos_make(r);
3890     }
3891     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
3892    
3893     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
3894     {
3895     emit_byte(0xd9);
3896     emit_byte(0xec);
3897     tos_make(r);
3898     }
3899     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
3900    
3901     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
3902     {
3903     emit_byte(0xd9);
3904     emit_byte(0xea);
3905     tos_make(r);
3906     }
3907     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
3908    
3909     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
3910     {
3911     emit_byte(0xd9);
3912     emit_byte(0xed);
3913     tos_make(r);
3914     }
3915     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
3916    
3917     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
3918     {
3919     emit_byte(0xd9);
3920     emit_byte(0xe8);
3921     tos_make(r);
3922     }
3923     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
3924    
3925     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
3926     {
3927     emit_byte(0xd9);
3928     emit_byte(0xee);
3929     tos_make(r);
3930     }
3931     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
3932    
3933     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
3934     {
3935     int ds;
3936    
3937     usereg(s);
3938     ds=stackpos(s);
3939     if (ds==0 && live.spos[d]>=0) {
3940     /* source is on top of stack, and we already have the dest */
3941     int dd=stackpos(d);
3942     emit_byte(0xdd);
3943     emit_byte(0xd0+dd);
3944     }
3945     else {
3946     emit_byte(0xd9);
3947     emit_byte(0xc0+ds); /* duplicate source on tos */
3948     tos_make(d); /* store to destination, pop if necessary */
3949     }
3950     }
3951     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
3952    
3953     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
3954     {
3955     emit_byte(0xd9);
3956     emit_byte(0xa8+index);
3957     emit_long(base);
3958     }
3959     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
3960    
3961    
3962     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
3963     {
3964     int ds;
3965    
3966     if (d!=s) {
3967     usereg(s);
3968     ds=stackpos(s);
3969     emit_byte(0xd9);
3970     emit_byte(0xc0+ds); /* duplicate source */
3971     emit_byte(0xd9);
3972     emit_byte(0xfa); /* take square root */
3973     tos_make(d); /* store to destination */
3974     }
3975     else {
3976     make_tos(d);
3977     emit_byte(0xd9);
3978     emit_byte(0xfa); /* take square root */
3979     }
3980     }
3981     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
3982    
3983     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
3984     {
3985     int ds;
3986    
3987     if (d!=s) {
3988     usereg(s);
3989     ds=stackpos(s);
3990     emit_byte(0xd9);
3991     emit_byte(0xc0+ds); /* duplicate source */
3992     emit_byte(0xd9);
3993     emit_byte(0xe1); /* take fabs */
3994     tos_make(d); /* store to destination */
3995     }
3996     else {
3997     make_tos(d);
3998     emit_byte(0xd9);
3999     emit_byte(0xe1); /* take fabs */
4000     }
4001     }
4002     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4003    
4004     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4005     {
4006     int ds;
4007    
4008     if (d!=s) {
4009     usereg(s);
4010     ds=stackpos(s);
4011     emit_byte(0xd9);
4012     emit_byte(0xc0+ds); /* duplicate source */
4013     emit_byte(0xd9);
4014     emit_byte(0xfc); /* take frndint */
4015     tos_make(d); /* store to destination */
4016     }
4017     else {
4018     make_tos(d);
4019     emit_byte(0xd9);
4020     emit_byte(0xfc); /* take frndint */
4021     }
4022     }
4023     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4024    
4025     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4026     {
4027     int ds;
4028    
4029     if (d!=s) {
4030     usereg(s);
4031     ds=stackpos(s);
4032     emit_byte(0xd9);
4033     emit_byte(0xc0+ds); /* duplicate source */
4034     emit_byte(0xd9);
4035     emit_byte(0xff); /* take cos */
4036     tos_make(d); /* store to destination */
4037     }
4038     else {
4039     make_tos(d);
4040     emit_byte(0xd9);
4041     emit_byte(0xff); /* take cos */
4042     }
4043     }
4044     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4045    
4046     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4047     {
4048     int ds;
4049    
4050     if (d!=s) {
4051     usereg(s);
4052     ds=stackpos(s);
4053     emit_byte(0xd9);
4054     emit_byte(0xc0+ds); /* duplicate source */
4055     emit_byte(0xd9);
4056     emit_byte(0xfe); /* take sin */
4057     tos_make(d); /* store to destination */
4058     }
4059     else {
4060     make_tos(d);
4061     emit_byte(0xd9);
4062     emit_byte(0xfe); /* take sin */
4063     }
4064     }
4065     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4066    
4067     double one=1;
4068     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4069     {
4070     int ds;
4071    
4072     usereg(s);
4073     ds=stackpos(s);
4074     emit_byte(0xd9);
4075     emit_byte(0xc0+ds); /* duplicate source */
4076    
4077     emit_byte(0xd9);
4078     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4079     emit_byte(0xd9);
4080     emit_byte(0xfc); /* rndint */
4081     emit_byte(0xd9);
4082     emit_byte(0xc9); /* swap top two elements */
4083     emit_byte(0xd8);
4084     emit_byte(0xe1); /* subtract rounded from original */
4085     emit_byte(0xd9);
4086     emit_byte(0xf0); /* f2xm1 */
4087     emit_byte(0xdc);
4088     emit_byte(0x05);
4089     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
4090     emit_byte(0xd9);
4091     emit_byte(0xfd); /* and scale it */
4092     emit_byte(0xdd);
4093     emit_byte(0xd9); /* take he rounded value off */
4094     tos_make(d); /* store to destination */
4095     }
4096     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4097    
4098     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4099     {
4100     int ds;
4101    
4102     usereg(s);
4103     ds=stackpos(s);
4104     emit_byte(0xd9);
4105     emit_byte(0xc0+ds); /* duplicate source */
4106     emit_byte(0xd9);
4107     emit_byte(0xea); /* fldl2e */
4108     emit_byte(0xde);
4109     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4110    
4111     emit_byte(0xd9);
4112     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4113     emit_byte(0xd9);
4114     emit_byte(0xfc); /* rndint */
4115     emit_byte(0xd9);
4116     emit_byte(0xc9); /* swap top two elements */
4117     emit_byte(0xd8);
4118     emit_byte(0xe1); /* subtract rounded from original */
4119     emit_byte(0xd9);
4120     emit_byte(0xf0); /* f2xm1 */
4121     emit_byte(0xdc);
4122     emit_byte(0x05);
4123     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
4124     emit_byte(0xd9);
4125     emit_byte(0xfd); /* and scale it */
4126     emit_byte(0xdd);
4127     emit_byte(0xd9); /* take he rounded value off */
4128     tos_make(d); /* store to destination */
4129     }
4130     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4131    
4132     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4133     {
4134     int ds;
4135    
4136     usereg(s);
4137     ds=stackpos(s);
4138     emit_byte(0xd9);
4139     emit_byte(0xc0+ds); /* duplicate source */
4140     emit_byte(0xd9);
4141     emit_byte(0xe8); /* push '1' */
4142     emit_byte(0xd9);
4143     emit_byte(0xc9); /* swap top two */
4144     emit_byte(0xd9);
4145     emit_byte(0xf1); /* take 1*log2(x) */
4146     tos_make(d); /* store to destination */
4147     }
4148     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4149    
4150    
4151     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4152     {
4153     int ds;
4154    
4155     if (d!=s) {
4156     usereg(s);
4157     ds=stackpos(s);
4158     emit_byte(0xd9);
4159     emit_byte(0xc0+ds); /* duplicate source */
4160     emit_byte(0xd9);
4161     emit_byte(0xe0); /* take fchs */
4162     tos_make(d); /* store to destination */
4163     }
4164     else {
4165     make_tos(d);
4166     emit_byte(0xd9);
4167     emit_byte(0xe0); /* take fchs */
4168     }
4169     }
4170     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4171    
4172     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4173     {
4174     int ds;
4175    
4176     usereg(s);
4177     usereg(d);
4178    
4179     if (live.spos[s]==live.tos) {
4180     /* Source is on top of stack */
4181     ds=stackpos(d);
4182     emit_byte(0xdc);
4183     emit_byte(0xc0+ds); /* add source to dest*/
4184     }
4185     else {
4186     make_tos(d);
4187     ds=stackpos(s);
4188    
4189     emit_byte(0xd8);
4190     emit_byte(0xc0+ds); /* add source to dest*/
4191     }
4192     }
4193     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4194    
4195     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4196     {
4197     int ds;
4198    
4199     usereg(s);
4200     usereg(d);
4201    
4202     if (live.spos[s]==live.tos) {
4203     /* Source is on top of stack */
4204     ds=stackpos(d);
4205     emit_byte(0xdc);
4206     emit_byte(0xe8+ds); /* sub source from dest*/
4207     }
4208     else {
4209     make_tos(d);
4210     ds=stackpos(s);
4211    
4212     emit_byte(0xd8);
4213     emit_byte(0xe0+ds); /* sub src from dest */
4214     }
4215     }
4216     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4217    
4218     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4219     {
4220     int ds;
4221    
4222     usereg(s);
4223     usereg(d);
4224    
4225     make_tos(d);
4226     ds=stackpos(s);
4227    
4228     emit_byte(0xdd);
4229     emit_byte(0xe0+ds); /* cmp dest with source*/
4230     }
4231     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4232    
4233     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4234     {
4235     int ds;
4236    
4237     usereg(s);
4238     usereg(d);
4239    
4240     if (live.spos[s]==live.tos) {
4241     /* Source is on top of stack */
4242     ds=stackpos(d);
4243     emit_byte(0xdc);
4244     emit_byte(0xc8+ds); /* mul dest by source*/
4245     }
4246     else {
4247     make_tos(d);
4248     ds=stackpos(s);
4249    
4250     emit_byte(0xd8);
4251     emit_byte(0xc8+ds); /* mul dest by source*/
4252     }
4253     }
4254     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4255    
4256     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4257     {
4258     int ds;
4259    
4260     usereg(s);
4261     usereg(d);
4262    
4263     if (live.spos[s]==live.tos) {
4264     /* Source is on top of stack */
4265     ds=stackpos(d);
4266     emit_byte(0xdc);
4267     emit_byte(0xf8+ds); /* div dest by source */
4268     }
4269     else {
4270     make_tos(d);
4271     ds=stackpos(s);
4272    
4273     emit_byte(0xd8);
4274     emit_byte(0xf0+ds); /* div dest by source*/
4275     }
4276     }
4277     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4278    
4279     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4280     {
4281     int ds;
4282    
4283     usereg(s);
4284     usereg(d);
4285    
4286     make_tos2(d,s);
4287     ds=stackpos(s);
4288    
4289     if (ds!=1) {
4290     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4291     abort();
4292     }
4293     emit_byte(0xd9);
4294     emit_byte(0xf8); /* take rem from dest by source */
4295     }
4296     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4297    
4298     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4299     {
4300     int ds;
4301    
4302     usereg(s);
4303     usereg(d);
4304    
4305     make_tos2(d,s);
4306     ds=stackpos(s);
4307    
4308     if (ds!=1) {
4309     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4310     abort();
4311     }
4312     emit_byte(0xd9);
4313     emit_byte(0xf5); /* take rem1 from dest by source */
4314     }
4315     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4316    
4317    
4318     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4319     {
4320     make_tos(r);
4321     emit_byte(0xd9); /* ftst */
4322     emit_byte(0xe4);
4323     }
4324     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4325    
4326     /* %eax register is clobbered if target processor doesn't support fucomi */
4327     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4328     #define FFLAG_NREG EAX_INDEX
4329    
4330     static __inline__ void raw_fflags_into_flags(int r)
4331     {
4332     int p;
4333    
4334     usereg(r);
4335     p=stackpos(r);
4336    
4337     emit_byte(0xd9);
4338     emit_byte(0xee); /* Push 0 */
4339     emit_byte(0xd9);
4340     emit_byte(0xc9+p); /* swap top two around */
4341     if (have_cmov) {
4342     // gb-- fucomi is for P6 cores only, not K6-2 then...
4343     emit_byte(0xdb);
4344     emit_byte(0xe9+p); /* fucomi them */
4345     }
4346     else {
4347     emit_byte(0xdd);
4348     emit_byte(0xe1+p); /* fucom them */
4349     emit_byte(0x9b);
4350     emit_byte(0xdf);
4351     emit_byte(0xe0); /* fstsw ax */
4352     raw_sahf(0); /* sahf */
4353     }
4354     emit_byte(0xdd);
4355     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4356     }