ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.15
Committed: 2003-03-19T17:05:02Z (21 years, 6 months ago) by gbeauche
Branch: MAIN
Changes since 1.14: +30 -3 lines
Log Message:
Emulate CMOV in the new code generator for processors that don't support
this intruction

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6     * Adaptation for Basilisk II and improvements, copyright 2000-2002
7     * Gwenole Beauchesne
8     *
9     * Basilisk II (C) 1997-2002 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45    
46     /* The register in which subroutines return an integer return value */
47     #define REG_RESULT 0
48    
49     /* The registers subroutines take their first and second argument in */
50     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
51     /* Handle the _fastcall parameters of ECX and EDX */
52     #define REG_PAR1 1
53     #define REG_PAR2 2
54     #else
55     #define REG_PAR1 0
56     #define REG_PAR2 2
57     #endif
58    
59     /* Three registers that are not used for any of the above */
60     #define REG_NOPAR1 6
61     #define REG_NOPAR2 5
62     #define REG_NOPAR3 3
63    
64     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     #define REG_PC_TMP 0
67     #else
68     #define REG_PC_TMP 1 /* Another register that is not the above */
69     #endif
70    
71     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
72     -1 if any reg will do */
73     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
74     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
75    
76     uae_s8 always_used[]={4,-1};
77     uae_s8 can_byte[]={0,1,2,3,-1};
78     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
79    
80     /* cpuopti mutate instruction handlers to assume registers are saved
81     by the caller */
82     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
83    
84     /* This *should* be the same as call_saved. But:
85     - We might not really know which registers are saved, and which aren't,
86     so we need to preserve some, but don't want to rely on everyone else
87     also saving those registers
88     - Special registers (such like the stack pointer) should not be "preserved"
89     by pushing, even though they are "saved" across function calls
90     */
91     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
92    
93     /* Whether classes of instructions do or don't clobber the native flags */
94     #define CLOBBER_MOV
95     #define CLOBBER_LEA
96     #define CLOBBER_CMOV
97     #define CLOBBER_POP
98     #define CLOBBER_PUSH
99     #define CLOBBER_SUB clobber_flags()
100     #define CLOBBER_SBB clobber_flags()
101     #define CLOBBER_CMP clobber_flags()
102     #define CLOBBER_ADD clobber_flags()
103     #define CLOBBER_ADC clobber_flags()
104     #define CLOBBER_AND clobber_flags()
105     #define CLOBBER_OR clobber_flags()
106     #define CLOBBER_XOR clobber_flags()
107    
108     #define CLOBBER_ROL clobber_flags()
109     #define CLOBBER_ROR clobber_flags()
110     #define CLOBBER_SHLL clobber_flags()
111     #define CLOBBER_SHRL clobber_flags()
112     #define CLOBBER_SHRA clobber_flags()
113     #define CLOBBER_TEST clobber_flags()
114     #define CLOBBER_CL16
115     #define CLOBBER_CL8
116     #define CLOBBER_SE16
117     #define CLOBBER_SE8
118     #define CLOBBER_ZE16
119     #define CLOBBER_ZE8
120     #define CLOBBER_SW16 clobber_flags()
121     #define CLOBBER_SW32
122     #define CLOBBER_SETCC
123     #define CLOBBER_MUL clobber_flags()
124     #define CLOBBER_BT clobber_flags()
125     #define CLOBBER_BSF clobber_flags()
126    
127 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
128     #if 0
129    
130     #if defined(__x86_64__)
131     #define X86_TARGET_64BIT 1
132     #endif
133     #define X86_FLAT_REGISTERS 0
134 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
135     #define X86_OPTIMIZE_ROTSHI 1
136 gbeauche 1.13 #include "codegen_x86.h"
137    
138     #define x86_emit_byte(B) emit_byte(B)
139     #define x86_emit_word(W) emit_word(W)
140     #define x86_emit_long(L) emit_long(L)
141     #define x86_get_target() get_target()
142     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
143    
144     static void jit_fail(const char *msg, const char *file, int line, const char *function)
145     {
146     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
147     function, file, line, msg);
148     abort();
149     }
150    
151     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
152     {
153     PUSHLr(r);
154     }
155     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
156    
157     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
158     {
159     POPLr(r);
160     }
161     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
162    
163     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
164     {
165     BTLir(i, r);
166     }
167     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
168    
169     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
170     {
171     BTLrr(b, r);
172     }
173     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
174    
175     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
176     {
177     BTCLir(i, r);
178     }
179     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
180    
181     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
182     {
183     BTCLrr(b, r);
184     }
185     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
186    
187     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
188     {
189     BTRLir(i, r);
190     }
191     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
192    
193     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
194     {
195     BTRLrr(b, r);
196     }
197     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
198    
199     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
200     {
201     BTSLir(i, r);
202     }
203     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
204    
205     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
206     {
207     BTSLrr(b, r);
208     }
209     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
210    
211     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
212     {
213     SUBWir(i, d);
214     }
215     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
216    
217     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
218     {
219     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
220     }
221     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
222    
223     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
224     {
225     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
226     }
227     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
228    
229     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
230     {
231     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
232     }
233     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
234    
235     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
236     {
237     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
238     }
239     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
240    
241     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
242     {
243     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
244     }
245     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
246    
247     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
248     {
249     ROLBir(i, r);
250     }
251     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
252    
253     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
254     {
255     ROLWir(i, r);
256     }
257     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
258    
259     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
260     {
261     ROLLir(i, r);
262     }
263     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
264    
265     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
266     {
267     ROLLrr(r, d);
268     }
269     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
270    
271     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
272     {
273     ROLWrr(r, d);
274     }
275     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
276    
277     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
278     {
279     ROLBrr(r, d);
280     }
281     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
282    
283     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
284     {
285     SHLLrr(r, d);
286     }
287     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
288    
289     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
290     {
291     SHLWrr(r, d);
292     }
293     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
294    
295     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
296     {
297     SHLBrr(r, d);
298     }
299     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
300    
301     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
302     {
303     RORBir(i, r);
304     }
305     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
306    
307     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
308     {
309     RORWir(i, r);
310     }
311     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
312    
313     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
314     {
315     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
316     }
317     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
318    
319     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
320     {
321     RORLir(i, r);
322     }
323     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
324    
325     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
326     {
327     RORLrr(r, d);
328     }
329     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
330    
331     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
332     {
333     RORWrr(r, d);
334     }
335     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
336    
337     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
338     {
339     RORBrr(r, d);
340     }
341     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
342    
343     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
344     {
345     SHRLrr(r, d);
346     }
347     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
348    
349     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
350     {
351     SHRWrr(r, d);
352     }
353     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
354    
355     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
356     {
357     SHRBrr(r, d);
358     }
359     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
360    
361     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
362     {
363 gbeauche 1.14 SARLrr(r, d);
364 gbeauche 1.13 }
365     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
366    
367     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
368     {
369 gbeauche 1.14 SARWrr(r, d);
370 gbeauche 1.13 }
371     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
372    
373     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
374     {
375 gbeauche 1.14 SARBrr(r, d);
376 gbeauche 1.13 }
377     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
378    
379     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
380     {
381     SHLLir(i, r);
382     }
383     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
384    
385     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
386     {
387     SHLWir(i, r);
388     }
389     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
390    
391     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
392     {
393     SHLBir(i, r);
394     }
395     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
396    
397     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
398     {
399     SHRLir(i, r);
400     }
401     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
402    
403     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
404     {
405     SHRWir(i, r);
406     }
407     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
408    
409     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
410     {
411     SHRBir(i, r);
412     }
413     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
414    
415     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
416     {
417 gbeauche 1.14 SARLir(i, r);
418 gbeauche 1.13 }
419     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
420    
421     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
422     {
423 gbeauche 1.14 SARWir(i, r);
424 gbeauche 1.13 }
425     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
426    
427     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
428     {
429 gbeauche 1.14 SARBir(i, r);
430 gbeauche 1.13 }
431     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
432    
433     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
434     {
435     SAHF();
436     }
437     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
438    
439     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
440     {
441     CPUID();
442     }
443     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
444    
445     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
446     {
447     LAHF();
448     }
449     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
450    
451     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
452     {
453     SETCCir(cc, d);
454     }
455     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
456    
457     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
458     {
459     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
460     }
461     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
462    
463     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
464     {
465 gbeauche 1.15 if (have_cmov)
466     CMOVLrr(cc, s, d);
467     else { /* replacement using branch and mov */
468     #if defined(__x86_64__)
469     write_log("x86-64 implementations are bound to have CMOV!\n");
470     abort();
471     #endif
472     JCCSii(cc^1, 2);
473     MOVLrr(s, d);
474     }
475 gbeauche 1.13 }
476     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
477    
478     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
479     {
480     BSFLrr(s, d);
481     }
482     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
483    
484     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
485     {
486     MOVSWLrr(s, d);
487     }
488     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
489    
490     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
491     {
492     MOVSBLrr(s, d);
493     }
494     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
495    
496     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
497     {
498     MOVZWLrr(s, d);
499     }
500     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
501    
502     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
503     {
504     MOVZBLrr(s, d);
505     }
506     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
507    
508     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
509     {
510 gbeauche 1.14 IMULLrr(s, d);
511 gbeauche 1.13 }
512     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
513    
514     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
515     {
516 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
517     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
518 gbeauche 1.13 abort();
519 gbeauche 1.14 }
520     IMULLr(s);
521 gbeauche 1.13 }
522     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
523    
524     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
525     {
526 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
527     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
528 gbeauche 1.13 abort();
529 gbeauche 1.14 }
530     MULLr(s);
531 gbeauche 1.13 }
532     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
533    
534     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
535     {
536 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
537 gbeauche 1.13 }
538     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
539    
540     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
541     {
542     MOVBrr(s, d);
543     }
544     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
545    
546     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
547     {
548     MOVWrr(s, d);
549     }
550     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
551    
552     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
553     {
554     MOVLmr(0, baser, index, factor, d);
555     }
556     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
557    
558     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
559     {
560     MOVWmr(0, baser, index, factor, d);
561     }
562     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
563    
564     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
565     {
566     MOVBmr(0, baser, index, factor, d);
567     }
568     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
569    
570     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
571     {
572     MOVLrm(s, 0, baser, index, factor);
573     }
574     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
575    
576     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
577     {
578     MOVWrm(s, 0, baser, index, factor);
579     }
580     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
581    
582     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
583     {
584     MOVBrm(s, 0, baser, index, factor);
585     }
586     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
587    
588     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
589     {
590     MOVLrm(s, base, baser, index, factor);
591     }
592     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
593    
594     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
595     {
596     MOVWrm(s, base, baser, index, factor);
597     }
598     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
599    
600     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
601     {
602     MOVBrm(s, base, baser, index, factor);
603     }
604     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
605    
606     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
607     {
608     MOVLmr(base, baser, index, factor, d);
609     }
610     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
611    
612     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
613     {
614     MOVWmr(base, baser, index, factor, d);
615     }
616     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
617    
618     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
619     {
620     MOVBmr(base, baser, index, factor, d);
621     }
622     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
623    
624     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
625     {
626     MOVLmr(base, X86_NOREG, index, factor, d);
627     }
628     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
629    
630     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
631     {
632 gbeauche 1.15 if (have_cmov)
633     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
634     else { /* replacement using branch and mov */
635     #if defined(__x86_64__)
636     write_log("x86-64 implementations are bound to have CMOV!\n");
637     abort();
638     #endif
639     JCCSii(cond^1, 7);
640     MOVLmr(base, X86_NOREG, index, factor, d);
641     }
642 gbeauche 1.13 }
643     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
644    
645     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
646     {
647 gbeauche 1.15 if (have_cmov)
648     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
649     else { /* replacement using branch and mov */
650     #if defined(__x86_64__)
651     write_log("x86-64 implementations are bound to have CMOV!\n");
652     abort();
653     #endif
654     JCCSii(cond^1, 6);
655     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
656     }
657 gbeauche 1.13 }
658     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
659    
660     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
661     {
662     MOVLmr(offset, s, X86_NOREG, 1, d);
663     }
664     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
665    
666     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
667     {
668     MOVWmr(offset, s, X86_NOREG, 1, d);
669     }
670     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
671    
672     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
673     {
674     MOVBmr(offset, s, X86_NOREG, 1, d);
675     }
676     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
677    
678     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
679     {
680     MOVLmr(offset, s, X86_NOREG, 1, d);
681     }
682     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
683    
684     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
685     {
686     MOVWmr(offset, s, X86_NOREG, 1, d);
687     }
688     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
689    
690     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
691     {
692     MOVBmr(offset, s, X86_NOREG, 1, d);
693     }
694     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
695    
696     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
697     {
698     MOVLim(i, offset, d, X86_NOREG, 1);
699     }
700     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
701    
702     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
703     {
704     MOVWim(i, offset, d, X86_NOREG, 1);
705     }
706     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
707    
708     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
709     {
710     MOVBim(i, offset, d, X86_NOREG, 1);
711     }
712     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
713    
714     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
715     {
716     MOVLrm(s, offset, d, X86_NOREG, 1);
717     }
718     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
719    
720     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
721     {
722     MOVWrm(s, offset, d, X86_NOREG, 1);
723     }
724     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
725    
726     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
727     {
728     MOVBrm(s, offset, d, X86_NOREG, 1);
729     }
730     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
731    
732     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
733     {
734     LEALmr(offset, s, X86_NOREG, 1, d);
735     }
736     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
737    
738     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
739     {
740     LEALmr(offset, s, index, factor, d);
741     }
742     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
743    
744     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
745     {
746     LEALmr(0, s, index, factor, d);
747     }
748     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
749    
750     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
751     {
752     MOVLrm(s, offset, d, X86_NOREG, 1);
753     }
754     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
755    
756     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
757     {
758     MOVWrm(s, offset, d, X86_NOREG, 1);
759     }
760     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
761    
762     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
763     {
764     MOVBrm(s, offset, d, X86_NOREG, 1);
765     }
766     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
767    
768     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
769     {
770     BSWAPLr(r);
771     }
772     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
773    
774     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
775     {
776     ROLWir(8, r);
777     }
778     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
779    
780     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
781     {
782     MOVLrr(s, d);
783     }
784     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
785    
786     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
787     {
788     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
789     }
790     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
791    
792     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
793     {
794     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
795     }
796     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
797    
798     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
799     {
800     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
801     }
802     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
803    
804     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
805     {
806     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
807     }
808     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
809    
810     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
811     {
812     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
813     }
814     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
815    
816     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
817     {
818     MOVLir(s, d);
819     }
820     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
821    
822     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
823     {
824     MOVWir(s, d);
825     }
826     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
827    
828     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
829     {
830     MOVBir(s, d);
831     }
832     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
833    
834     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
835     {
836     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
837     }
838     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
839    
840     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
841     {
842     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
843     }
844     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
845    
846     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
847     {
848     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
849     }
850     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
851    
852     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
853     {
854     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
855     }
856     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
857    
858     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
859     {
860     TESTLir(i, d);
861     }
862     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
863    
864     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
865     {
866     TESTLrr(s, d);
867     }
868     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
869    
870     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
871     {
872     TESTWrr(s, d);
873     }
874     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
875    
876     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
877     {
878     TESTBrr(s, d);
879     }
880     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
881    
882     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
883     {
884     ANDLir(i, d);
885     }
886     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
887    
888     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
889     {
890     ANDWir(i, d);
891     }
892     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
893    
894     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
895     {
896     ANDLrr(s, d);
897     }
898     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
899    
900     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
901     {
902     ANDWrr(s, d);
903     }
904     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
905    
906     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
907     {
908     ANDBrr(s, d);
909     }
910     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
911    
912     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
913     {
914     ORLir(i, d);
915     }
916     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
917    
918     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
919     {
920     ORLrr(s, d);
921     }
922     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
923    
924     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
925     {
926     ORWrr(s, d);
927     }
928     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
929    
930     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
931     {
932     ORBrr(s, d);
933     }
934     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
935    
936     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
937     {
938     ADCLrr(s, d);
939     }
940     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
941    
942     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
943     {
944     ADCWrr(s, d);
945     }
946     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
947    
948     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
949     {
950     ADCBrr(s, d);
951     }
952     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
953    
954     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
955     {
956     ADDLrr(s, d);
957     }
958     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
959    
960     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
961     {
962     ADDWrr(s, d);
963     }
964     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
965    
966     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
967     {
968     ADDBrr(s, d);
969     }
970     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
971    
972     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
973     {
974     SUBLir(i, d);
975     }
976     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
977    
978     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
979     {
980     SUBBir(i, d);
981     }
982     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
983    
984     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
985     {
986     ADDLir(i, d);
987     }
988     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
989    
990     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
991     {
992     ADDWir(i, d);
993     }
994     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
995    
996     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
997     {
998     ADDBir(i, d);
999     }
1000     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1001    
1002     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1003     {
1004     SBBLrr(s, d);
1005     }
1006     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1007    
1008     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1009     {
1010     SBBWrr(s, d);
1011     }
1012     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1013    
1014     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1015     {
1016     SBBBrr(s, d);
1017     }
1018     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1019    
1020     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1021     {
1022     SUBLrr(s, d);
1023     }
1024     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1025    
1026     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1027     {
1028     SUBWrr(s, d);
1029     }
1030     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1031    
1032     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1033     {
1034     SUBBrr(s, d);
1035     }
1036     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1037    
1038     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1039     {
1040     CMPLrr(s, d);
1041     }
1042     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1043    
1044     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1045     {
1046     CMPLir(i, r);
1047     }
1048     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1049    
1050     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1051     {
1052     CMPWrr(s, d);
1053     }
1054     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1055    
1056     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1057     {
1058     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1059     }
1060     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1061    
1062     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1063     {
1064     CMPBir(i, d);
1065     }
1066     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1067    
1068     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1069     {
1070     CMPBrr(s, d);
1071     }
1072     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1073    
1074     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1075     {
1076     CMPLmr(offset, X86_NOREG, index, factor, d);
1077     }
1078     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1079    
1080     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1081     {
1082     XORLrr(s, d);
1083     }
1084     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1085    
1086     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1087     {
1088     XORWrr(s, d);
1089     }
1090     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1091    
1092     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1093     {
1094     XORBrr(s, d);
1095     }
1096     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1097    
1098     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1099     {
1100     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1101     }
1102     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1103    
1104     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1105     {
1106     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1107     }
1108     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1109    
1110     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1111     {
1112     XCHGLrr(r2, r1);
1113     }
1114     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1115    
1116     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1117     {
1118     PUSHFD();
1119     }
1120     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1121    
1122     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1123     {
1124     POPFD();
1125     }
1126     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1127    
1128     #else
1129    
1130 gbeauche 1.2 const bool optimize_accum = true;
1131 gbeauche 1.1 const bool optimize_imm8 = true;
1132     const bool optimize_shift_once = true;
1133    
1134     /*************************************************************************
1135     * Actual encoding of the instructions on the target CPU *
1136     *************************************************************************/
1137    
1138 gbeauche 1.2 static __inline__ int isaccum(int r)
1139     {
1140     return (r == EAX_INDEX);
1141     }
1142    
1143 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1144     {
1145     return (x>=-128 && x<=127);
1146     }
1147    
1148     static __inline__ int isword(uae_s32 x)
1149     {
1150     return (x>=-32768 && x<=32767);
1151     }
1152    
1153     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1154     {
1155     emit_byte(0x50+r);
1156     }
1157     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1158    
1159     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1160     {
1161     emit_byte(0x58+r);
1162     }
1163     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1164    
1165     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1166     {
1167     emit_byte(0x0f);
1168     emit_byte(0xba);
1169     emit_byte(0xe0+r);
1170     emit_byte(i);
1171     }
1172     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1173    
1174     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1175     {
1176     emit_byte(0x0f);
1177     emit_byte(0xa3);
1178     emit_byte(0xc0+8*b+r);
1179     }
1180     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1181    
1182     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1183     {
1184     emit_byte(0x0f);
1185     emit_byte(0xba);
1186     emit_byte(0xf8+r);
1187     emit_byte(i);
1188     }
1189     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1190    
1191     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1192     {
1193     emit_byte(0x0f);
1194     emit_byte(0xbb);
1195     emit_byte(0xc0+8*b+r);
1196     }
1197     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1198    
1199    
1200     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1201     {
1202     emit_byte(0x0f);
1203     emit_byte(0xba);
1204     emit_byte(0xf0+r);
1205     emit_byte(i);
1206     }
1207     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1208    
1209     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1210     {
1211     emit_byte(0x0f);
1212     emit_byte(0xb3);
1213     emit_byte(0xc0+8*b+r);
1214     }
1215     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1216    
1217     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1218     {
1219     emit_byte(0x0f);
1220     emit_byte(0xba);
1221     emit_byte(0xe8+r);
1222     emit_byte(i);
1223     }
1224     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1225    
1226     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1227     {
1228     emit_byte(0x0f);
1229     emit_byte(0xab);
1230     emit_byte(0xc0+8*b+r);
1231     }
1232     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1233    
1234     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1235     {
1236     emit_byte(0x66);
1237     if (isbyte(i)) {
1238     emit_byte(0x83);
1239     emit_byte(0xe8+d);
1240     emit_byte(i);
1241     }
1242     else {
1243 gbeauche 1.2 if (optimize_accum && isaccum(d))
1244     emit_byte(0x2d);
1245     else {
1246 gbeauche 1.1 emit_byte(0x81);
1247     emit_byte(0xe8+d);
1248 gbeauche 1.2 }
1249 gbeauche 1.1 emit_word(i);
1250     }
1251     }
1252     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1253    
1254    
1255     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1256     {
1257     emit_byte(0x8b);
1258     emit_byte(0x05+8*d);
1259     emit_long(s);
1260     }
1261     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1262    
1263     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1264     {
1265     emit_byte(0xc7);
1266     emit_byte(0x05);
1267     emit_long(d);
1268     emit_long(s);
1269     }
1270     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1271    
1272     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1273     {
1274     emit_byte(0x66);
1275     emit_byte(0xc7);
1276     emit_byte(0x05);
1277     emit_long(d);
1278     emit_word(s);
1279     }
1280     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1281    
1282     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1283     {
1284     emit_byte(0xc6);
1285     emit_byte(0x05);
1286     emit_long(d);
1287     emit_byte(s);
1288     }
1289     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1290    
1291     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1292     {
1293     if (optimize_shift_once && (i == 1)) {
1294     emit_byte(0xd0);
1295     emit_byte(0x05);
1296     emit_long(d);
1297     }
1298     else {
1299     emit_byte(0xc0);
1300     emit_byte(0x05);
1301     emit_long(d);
1302     emit_byte(i);
1303     }
1304     }
1305     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1306    
1307     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1308     {
1309     if (optimize_shift_once && (i == 1)) {
1310     emit_byte(0xd0);
1311     emit_byte(0xc0+r);
1312     }
1313     else {
1314     emit_byte(0xc0);
1315     emit_byte(0xc0+r);
1316     emit_byte(i);
1317     }
1318     }
1319     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1320    
1321     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1322     {
1323     emit_byte(0x66);
1324     emit_byte(0xc1);
1325     emit_byte(0xc0+r);
1326     emit_byte(i);
1327     }
1328     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1329    
1330     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1331     {
1332     if (optimize_shift_once && (i == 1)) {
1333     emit_byte(0xd1);
1334     emit_byte(0xc0+r);
1335     }
1336     else {
1337     emit_byte(0xc1);
1338     emit_byte(0xc0+r);
1339     emit_byte(i);
1340     }
1341     }
1342     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1343    
1344     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1345     {
1346     emit_byte(0xd3);
1347     emit_byte(0xc0+d);
1348     }
1349     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1350    
1351     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1352     {
1353     emit_byte(0x66);
1354     emit_byte(0xd3);
1355     emit_byte(0xc0+d);
1356     }
1357     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1358    
1359     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1360     {
1361     emit_byte(0xd2);
1362     emit_byte(0xc0+d);
1363     }
1364     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1365    
1366     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1367     {
1368     emit_byte(0xd3);
1369     emit_byte(0xe0+d);
1370     }
1371     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1372    
1373     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1374     {
1375     emit_byte(0x66);
1376     emit_byte(0xd3);
1377     emit_byte(0xe0+d);
1378     }
1379     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1380    
1381     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1382     {
1383     emit_byte(0xd2);
1384     emit_byte(0xe0+d);
1385     }
1386     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1387    
1388     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1389     {
1390     if (optimize_shift_once && (i == 1)) {
1391     emit_byte(0xd0);
1392     emit_byte(0xc8+r);
1393     }
1394     else {
1395     emit_byte(0xc0);
1396     emit_byte(0xc8+r);
1397     emit_byte(i);
1398     }
1399     }
1400     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1401    
1402     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1403     {
1404     emit_byte(0x66);
1405     emit_byte(0xc1);
1406     emit_byte(0xc8+r);
1407     emit_byte(i);
1408     }
1409     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1410    
1411     // gb-- used for making an fpcr value in compemu_fpp.cpp
1412     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1413     {
1414     emit_byte(0x0b);
1415     emit_byte(0x05+8*d);
1416     emit_long(s);
1417     }
1418     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1419    
1420     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1421     {
1422     if (optimize_shift_once && (i == 1)) {
1423     emit_byte(0xd1);
1424     emit_byte(0xc8+r);
1425     }
1426     else {
1427     emit_byte(0xc1);
1428     emit_byte(0xc8+r);
1429     emit_byte(i);
1430     }
1431     }
1432     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1433    
1434     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1435     {
1436     emit_byte(0xd3);
1437     emit_byte(0xc8+d);
1438     }
1439     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1440    
1441     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1442     {
1443     emit_byte(0x66);
1444     emit_byte(0xd3);
1445     emit_byte(0xc8+d);
1446     }
1447     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1448    
1449     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1450     {
1451     emit_byte(0xd2);
1452     emit_byte(0xc8+d);
1453     }
1454     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1455    
1456     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1457     {
1458     emit_byte(0xd3);
1459     emit_byte(0xe8+d);
1460     }
1461     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1462    
1463     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1464     {
1465     emit_byte(0x66);
1466     emit_byte(0xd3);
1467     emit_byte(0xe8+d);
1468     }
1469     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1470    
1471     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1472     {
1473     emit_byte(0xd2);
1474     emit_byte(0xe8+d);
1475     }
1476     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1477    
1478     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1479     {
1480     emit_byte(0xd3);
1481     emit_byte(0xf8+d);
1482     }
1483     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1484    
1485     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1486     {
1487     emit_byte(0x66);
1488     emit_byte(0xd3);
1489     emit_byte(0xf8+d);
1490     }
1491     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1492    
1493     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1494     {
1495     emit_byte(0xd2);
1496     emit_byte(0xf8+d);
1497     }
1498     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1499    
1500     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1501     {
1502     if (optimize_shift_once && (i == 1)) {
1503     emit_byte(0xd1);
1504     emit_byte(0xe0+r);
1505     }
1506     else {
1507     emit_byte(0xc1);
1508     emit_byte(0xe0+r);
1509     emit_byte(i);
1510     }
1511     }
1512     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1513    
1514     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1515     {
1516     emit_byte(0x66);
1517     emit_byte(0xc1);
1518     emit_byte(0xe0+r);
1519     emit_byte(i);
1520     }
1521     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1522    
1523     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1524     {
1525     if (optimize_shift_once && (i == 1)) {
1526     emit_byte(0xd0);
1527     emit_byte(0xe0+r);
1528     }
1529     else {
1530     emit_byte(0xc0);
1531     emit_byte(0xe0+r);
1532     emit_byte(i);
1533     }
1534     }
1535     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1536    
1537     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1538     {
1539     if (optimize_shift_once && (i == 1)) {
1540     emit_byte(0xd1);
1541     emit_byte(0xe8+r);
1542     }
1543     else {
1544     emit_byte(0xc1);
1545     emit_byte(0xe8+r);
1546     emit_byte(i);
1547     }
1548     }
1549     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1550    
1551     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1552     {
1553     emit_byte(0x66);
1554     emit_byte(0xc1);
1555     emit_byte(0xe8+r);
1556     emit_byte(i);
1557     }
1558     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1559    
1560     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1561     {
1562     if (optimize_shift_once && (i == 1)) {
1563     emit_byte(0xd0);
1564     emit_byte(0xe8+r);
1565     }
1566     else {
1567     emit_byte(0xc0);
1568     emit_byte(0xe8+r);
1569     emit_byte(i);
1570     }
1571     }
1572     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1573    
1574     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1575     {
1576     if (optimize_shift_once && (i == 1)) {
1577     emit_byte(0xd1);
1578     emit_byte(0xf8+r);
1579     }
1580     else {
1581     emit_byte(0xc1);
1582     emit_byte(0xf8+r);
1583     emit_byte(i);
1584     }
1585     }
1586     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1587    
1588     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1589     {
1590     emit_byte(0x66);
1591     emit_byte(0xc1);
1592     emit_byte(0xf8+r);
1593     emit_byte(i);
1594     }
1595     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1596    
1597     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1598     {
1599     if (optimize_shift_once && (i == 1)) {
1600     emit_byte(0xd0);
1601     emit_byte(0xf8+r);
1602     }
1603     else {
1604     emit_byte(0xc0);
1605     emit_byte(0xf8+r);
1606     emit_byte(i);
1607     }
1608     }
1609     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1610    
1611     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1612     {
1613     emit_byte(0x9e);
1614     }
1615     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1616    
1617     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1618     {
1619     emit_byte(0x0f);
1620     emit_byte(0xa2);
1621     }
1622     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1623    
1624     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1625     {
1626     emit_byte(0x9f);
1627     }
1628     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1629    
1630     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1631     {
1632     emit_byte(0x0f);
1633     emit_byte(0x90+cc);
1634     emit_byte(0xc0+d);
1635     }
1636     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1637    
1638     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1639     {
1640     emit_byte(0x0f);
1641     emit_byte(0x90+cc);
1642     emit_byte(0x05);
1643     emit_long(d);
1644     }
1645     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1646    
1647     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1648     {
1649     if (have_cmov) {
1650     emit_byte(0x0f);
1651     emit_byte(0x40+cc);
1652     emit_byte(0xc0+8*d+s);
1653     }
1654     else { /* replacement using branch and mov */
1655     int uncc=(cc^1);
1656     emit_byte(0x70+uncc);
1657     emit_byte(2); /* skip next 2 bytes if not cc=true */
1658     emit_byte(0x89);
1659     emit_byte(0xc0+8*s+d);
1660     }
1661     }
1662     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1663    
1664     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1665     {
1666     emit_byte(0x0f);
1667     emit_byte(0xbc);
1668     emit_byte(0xc0+8*d+s);
1669     }
1670     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1671    
1672     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1673     {
1674     emit_byte(0x0f);
1675     emit_byte(0xbf);
1676     emit_byte(0xc0+8*d+s);
1677     }
1678     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1679    
1680     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1681     {
1682     emit_byte(0x0f);
1683     emit_byte(0xbe);
1684     emit_byte(0xc0+8*d+s);
1685     }
1686     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1687    
1688     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1689     {
1690     emit_byte(0x0f);
1691     emit_byte(0xb7);
1692     emit_byte(0xc0+8*d+s);
1693     }
1694     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1695    
1696     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1697     {
1698     emit_byte(0x0f);
1699     emit_byte(0xb6);
1700     emit_byte(0xc0+8*d+s);
1701     }
1702     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1703    
1704     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1705     {
1706     emit_byte(0x0f);
1707     emit_byte(0xaf);
1708     emit_byte(0xc0+8*d+s);
1709     }
1710     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1711    
1712     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1713     {
1714     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1715     abort();
1716     emit_byte(0xf7);
1717     emit_byte(0xea);
1718     }
1719     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1720    
1721     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1722     {
1723     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1724     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1725     abort();
1726     }
1727     emit_byte(0xf7);
1728     emit_byte(0xe2);
1729     }
1730     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1731    
1732     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1733     {
1734     abort(); /* %^$&%^$%#^ x86! */
1735     emit_byte(0x0f);
1736     emit_byte(0xaf);
1737     emit_byte(0xc0+8*d+s);
1738     }
1739     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1740    
1741     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1742     {
1743     emit_byte(0x88);
1744     emit_byte(0xc0+8*s+d);
1745     }
1746     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1747    
1748     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1749     {
1750     emit_byte(0x66);
1751     emit_byte(0x89);
1752     emit_byte(0xc0+8*s+d);
1753     }
1754     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1755    
1756     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1757     {
1758     int isebp=(baser==5)?0x40:0;
1759     int fi;
1760    
1761     switch(factor) {
1762     case 1: fi=0; break;
1763     case 2: fi=1; break;
1764     case 4: fi=2; break;
1765     case 8: fi=3; break;
1766     default: abort();
1767     }
1768    
1769    
1770     emit_byte(0x8b);
1771     emit_byte(0x04+8*d+isebp);
1772     emit_byte(baser+8*index+0x40*fi);
1773     if (isebp)
1774     emit_byte(0x00);
1775     }
1776     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1777    
1778     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1779     {
1780     int fi;
1781     int isebp;
1782    
1783     switch(factor) {
1784     case 1: fi=0; break;
1785     case 2: fi=1; break;
1786     case 4: fi=2; break;
1787     case 8: fi=3; break;
1788     default: abort();
1789     }
1790     isebp=(baser==5)?0x40:0;
1791    
1792     emit_byte(0x66);
1793     emit_byte(0x8b);
1794     emit_byte(0x04+8*d+isebp);
1795     emit_byte(baser+8*index+0x40*fi);
1796     if (isebp)
1797     emit_byte(0x00);
1798     }
1799     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1800    
1801     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1802     {
1803     int fi;
1804     int isebp;
1805    
1806     switch(factor) {
1807     case 1: fi=0; break;
1808     case 2: fi=1; break;
1809     case 4: fi=2; break;
1810     case 8: fi=3; break;
1811     default: abort();
1812     }
1813     isebp=(baser==5)?0x40:0;
1814    
1815     emit_byte(0x8a);
1816     emit_byte(0x04+8*d+isebp);
1817     emit_byte(baser+8*index+0x40*fi);
1818     if (isebp)
1819     emit_byte(0x00);
1820     }
1821     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1822    
1823     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1824     {
1825     int fi;
1826     int isebp;
1827    
1828     switch(factor) {
1829     case 1: fi=0; break;
1830     case 2: fi=1; break;
1831     case 4: fi=2; break;
1832     case 8: fi=3; break;
1833     default: abort();
1834     }
1835    
1836    
1837     isebp=(baser==5)?0x40:0;
1838    
1839     emit_byte(0x89);
1840     emit_byte(0x04+8*s+isebp);
1841     emit_byte(baser+8*index+0x40*fi);
1842     if (isebp)
1843     emit_byte(0x00);
1844     }
1845     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1846    
1847     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1848     {
1849     int fi;
1850     int isebp;
1851    
1852     switch(factor) {
1853     case 1: fi=0; break;
1854     case 2: fi=1; break;
1855     case 4: fi=2; break;
1856     case 8: fi=3; break;
1857     default: abort();
1858     }
1859     isebp=(baser==5)?0x40:0;
1860    
1861     emit_byte(0x66);
1862     emit_byte(0x89);
1863     emit_byte(0x04+8*s+isebp);
1864     emit_byte(baser+8*index+0x40*fi);
1865     if (isebp)
1866     emit_byte(0x00);
1867     }
1868     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1869    
1870     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1871     {
1872     int fi;
1873     int isebp;
1874    
1875     switch(factor) {
1876     case 1: fi=0; break;
1877     case 2: fi=1; break;
1878     case 4: fi=2; break;
1879     case 8: fi=3; break;
1880     default: abort();
1881     }
1882     isebp=(baser==5)?0x40:0;
1883    
1884     emit_byte(0x88);
1885     emit_byte(0x04+8*s+isebp);
1886     emit_byte(baser+8*index+0x40*fi);
1887     if (isebp)
1888     emit_byte(0x00);
1889     }
1890     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1891    
1892     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1893     {
1894     int fi;
1895    
1896     switch(factor) {
1897     case 1: fi=0; break;
1898     case 2: fi=1; break;
1899     case 4: fi=2; break;
1900     case 8: fi=3; break;
1901     default: abort();
1902     }
1903    
1904     emit_byte(0x89);
1905     emit_byte(0x84+8*s);
1906     emit_byte(baser+8*index+0x40*fi);
1907     emit_long(base);
1908     }
1909     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1910    
1911     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1912     {
1913     int fi;
1914    
1915     switch(factor) {
1916     case 1: fi=0; break;
1917     case 2: fi=1; break;
1918     case 4: fi=2; break;
1919     case 8: fi=3; break;
1920     default: abort();
1921     }
1922    
1923     emit_byte(0x66);
1924     emit_byte(0x89);
1925     emit_byte(0x84+8*s);
1926     emit_byte(baser+8*index+0x40*fi);
1927     emit_long(base);
1928     }
1929     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1930    
1931     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1932     {
1933     int fi;
1934    
1935     switch(factor) {
1936     case 1: fi=0; break;
1937     case 2: fi=1; break;
1938     case 4: fi=2; break;
1939     case 8: fi=3; break;
1940     default: abort();
1941     }
1942    
1943     emit_byte(0x88);
1944     emit_byte(0x84+8*s);
1945     emit_byte(baser+8*index+0x40*fi);
1946     emit_long(base);
1947     }
1948     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1949    
1950     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
1951     {
1952     int fi;
1953    
1954     switch(factor) {
1955     case 1: fi=0; break;
1956     case 2: fi=1; break;
1957     case 4: fi=2; break;
1958     case 8: fi=3; break;
1959     default: abort();
1960     }
1961    
1962     emit_byte(0x8b);
1963     emit_byte(0x84+8*d);
1964     emit_byte(baser+8*index+0x40*fi);
1965     emit_long(base);
1966     }
1967     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
1968    
1969     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
1970     {
1971     int fi;
1972    
1973     switch(factor) {
1974     case 1: fi=0; break;
1975     case 2: fi=1; break;
1976     case 4: fi=2; break;
1977     case 8: fi=3; break;
1978     default: abort();
1979     }
1980    
1981     emit_byte(0x66);
1982     emit_byte(0x8b);
1983     emit_byte(0x84+8*d);
1984     emit_byte(baser+8*index+0x40*fi);
1985     emit_long(base);
1986     }
1987     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
1988    
1989     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
1990     {
1991     int fi;
1992    
1993     switch(factor) {
1994     case 1: fi=0; break;
1995     case 2: fi=1; break;
1996     case 4: fi=2; break;
1997     case 8: fi=3; break;
1998     default: abort();
1999     }
2000    
2001     emit_byte(0x8a);
2002     emit_byte(0x84+8*d);
2003     emit_byte(baser+8*index+0x40*fi);
2004     emit_long(base);
2005     }
2006     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2007    
2008     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2009     {
2010     int fi;
2011     switch(factor) {
2012     case 1: fi=0; break;
2013     case 2: fi=1; break;
2014     case 4: fi=2; break;
2015     case 8: fi=3; break;
2016     default:
2017     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2018     abort();
2019     }
2020     emit_byte(0x8b);
2021     emit_byte(0x04+8*d);
2022     emit_byte(0x05+8*index+64*fi);
2023     emit_long(base);
2024     }
2025     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2026    
2027     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2028     {
2029     int fi;
2030     switch(factor) {
2031     case 1: fi=0; break;
2032     case 2: fi=1; break;
2033     case 4: fi=2; break;
2034     case 8: fi=3; break;
2035     default:
2036     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2037     abort();
2038     }
2039     if (have_cmov) {
2040     emit_byte(0x0f);
2041     emit_byte(0x40+cond);
2042     emit_byte(0x04+8*d);
2043     emit_byte(0x05+8*index+64*fi);
2044     emit_long(base);
2045     }
2046     else { /* replacement using branch and mov */
2047     int uncc=(cond^1);
2048     emit_byte(0x70+uncc);
2049     emit_byte(7); /* skip next 7 bytes if not cc=true */
2050     emit_byte(0x8b);
2051     emit_byte(0x04+8*d);
2052     emit_byte(0x05+8*index+64*fi);
2053     emit_long(base);
2054     }
2055     }
2056     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2057    
2058     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2059     {
2060     if (have_cmov) {
2061     emit_byte(0x0f);
2062     emit_byte(0x40+cond);
2063     emit_byte(0x05+8*d);
2064     emit_long(mem);
2065     }
2066     else { /* replacement using branch and mov */
2067     int uncc=(cond^1);
2068     emit_byte(0x70+uncc);
2069     emit_byte(6); /* skip next 6 bytes if not cc=true */
2070     emit_byte(0x8b);
2071     emit_byte(0x05+8*d);
2072     emit_long(mem);
2073     }
2074     }
2075     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2076    
2077     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2078     {
2079 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2080 gbeauche 1.1 emit_byte(0x8b);
2081     emit_byte(0x40+8*d+s);
2082     emit_byte(offset);
2083     }
2084     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2085    
2086     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2087     {
2088 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2089 gbeauche 1.1 emit_byte(0x66);
2090     emit_byte(0x8b);
2091     emit_byte(0x40+8*d+s);
2092     emit_byte(offset);
2093     }
2094     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2095    
2096     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2097     {
2098 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2099 gbeauche 1.1 emit_byte(0x8a);
2100     emit_byte(0x40+8*d+s);
2101     emit_byte(offset);
2102     }
2103     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2104    
2105     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2106     {
2107     emit_byte(0x8b);
2108     emit_byte(0x80+8*d+s);
2109     emit_long(offset);
2110     }
2111     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2112    
2113     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2114     {
2115     emit_byte(0x66);
2116     emit_byte(0x8b);
2117     emit_byte(0x80+8*d+s);
2118     emit_long(offset);
2119     }
2120     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2121    
2122     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2123     {
2124     emit_byte(0x8a);
2125     emit_byte(0x80+8*d+s);
2126     emit_long(offset);
2127     }
2128     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2129    
2130     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2131     {
2132 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2133 gbeauche 1.1 emit_byte(0xc7);
2134     emit_byte(0x40+d);
2135     emit_byte(offset);
2136     emit_long(i);
2137     }
2138     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2139    
2140     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2141     {
2142 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2143 gbeauche 1.1 emit_byte(0x66);
2144     emit_byte(0xc7);
2145     emit_byte(0x40+d);
2146     emit_byte(offset);
2147     emit_word(i);
2148     }
2149     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2150    
2151     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2152     {
2153 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2154 gbeauche 1.1 emit_byte(0xc6);
2155     emit_byte(0x40+d);
2156     emit_byte(offset);
2157     emit_byte(i);
2158     }
2159     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2160    
2161     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2162     {
2163 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2164 gbeauche 1.1 emit_byte(0x89);
2165     emit_byte(0x40+8*s+d);
2166     emit_byte(offset);
2167     }
2168     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2169    
2170     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2171     {
2172 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2173 gbeauche 1.1 emit_byte(0x66);
2174     emit_byte(0x89);
2175     emit_byte(0x40+8*s+d);
2176     emit_byte(offset);
2177     }
2178     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2179    
2180     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2181     {
2182 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2183 gbeauche 1.1 emit_byte(0x88);
2184     emit_byte(0x40+8*s+d);
2185     emit_byte(offset);
2186     }
2187     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2188    
2189     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2190     {
2191     if (optimize_imm8 && isbyte(offset)) {
2192     emit_byte(0x8d);
2193     emit_byte(0x40+8*d+s);
2194     emit_byte(offset);
2195     }
2196     else {
2197     emit_byte(0x8d);
2198     emit_byte(0x80+8*d+s);
2199     emit_long(offset);
2200     }
2201     }
2202     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2203    
2204     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2205     {
2206     int fi;
2207    
2208     switch(factor) {
2209     case 1: fi=0; break;
2210     case 2: fi=1; break;
2211     case 4: fi=2; break;
2212     case 8: fi=3; break;
2213     default: abort();
2214     }
2215    
2216     if (optimize_imm8 && isbyte(offset)) {
2217     emit_byte(0x8d);
2218     emit_byte(0x44+8*d);
2219     emit_byte(0x40*fi+8*index+s);
2220     emit_byte(offset);
2221     }
2222     else {
2223     emit_byte(0x8d);
2224     emit_byte(0x84+8*d);
2225     emit_byte(0x40*fi+8*index+s);
2226     emit_long(offset);
2227     }
2228     }
2229     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2230    
2231     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2232     {
2233     int isebp=(s==5)?0x40:0;
2234     int fi;
2235    
2236     switch(factor) {
2237     case 1: fi=0; break;
2238     case 2: fi=1; break;
2239     case 4: fi=2; break;
2240     case 8: fi=3; break;
2241     default: abort();
2242     }
2243    
2244     emit_byte(0x8d);
2245     emit_byte(0x04+8*d+isebp);
2246     emit_byte(0x40*fi+8*index+s);
2247     if (isebp)
2248     emit_byte(0);
2249     }
2250     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2251    
2252     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2253     {
2254     if (optimize_imm8 && isbyte(offset)) {
2255     emit_byte(0x89);
2256     emit_byte(0x40+8*s+d);
2257     emit_byte(offset);
2258     }
2259     else {
2260     emit_byte(0x89);
2261     emit_byte(0x80+8*s+d);
2262     emit_long(offset);
2263     }
2264     }
2265     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2266    
2267     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2268     {
2269     emit_byte(0x66);
2270     emit_byte(0x89);
2271     emit_byte(0x80+8*s+d);
2272     emit_long(offset);
2273     }
2274     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2275    
2276     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2277     {
2278     if (optimize_imm8 && isbyte(offset)) {
2279     emit_byte(0x88);
2280     emit_byte(0x40+8*s+d);
2281     emit_byte(offset);
2282     }
2283     else {
2284     emit_byte(0x88);
2285     emit_byte(0x80+8*s+d);
2286     emit_long(offset);
2287     }
2288     }
2289     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2290    
2291     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2292     {
2293     emit_byte(0x0f);
2294     emit_byte(0xc8+r);
2295     }
2296     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2297    
2298     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2299     {
2300     emit_byte(0x66);
2301     emit_byte(0xc1);
2302     emit_byte(0xc0+r);
2303     emit_byte(0x08);
2304     }
2305     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2306    
2307     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2308     {
2309     emit_byte(0x89);
2310     emit_byte(0xc0+8*s+d);
2311     }
2312     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2313    
2314     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2315     {
2316     emit_byte(0x89);
2317     emit_byte(0x05+8*s);
2318     emit_long(d);
2319     }
2320     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2321    
2322     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2323     {
2324     emit_byte(0x66);
2325     emit_byte(0x89);
2326     emit_byte(0x05+8*s);
2327     emit_long(d);
2328     }
2329     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2330    
2331     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2332     {
2333     emit_byte(0x66);
2334     emit_byte(0x8b);
2335     emit_byte(0x05+8*d);
2336     emit_long(s);
2337     }
2338     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2339    
2340     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2341     {
2342     emit_byte(0x88);
2343     emit_byte(0x05+8*s);
2344     emit_long(d);
2345     }
2346     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2347    
2348     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2349     {
2350     emit_byte(0x8a);
2351     emit_byte(0x05+8*d);
2352     emit_long(s);
2353     }
2354     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2355    
2356     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2357     {
2358     emit_byte(0xb8+d);
2359     emit_long(s);
2360     }
2361     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2362    
2363     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2364     {
2365     emit_byte(0x66);
2366     emit_byte(0xb8+d);
2367     emit_word(s);
2368     }
2369     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2370    
2371     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2372     {
2373     emit_byte(0xb0+d);
2374     emit_byte(s);
2375     }
2376     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2377    
2378     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2379     {
2380     emit_byte(0x81);
2381     emit_byte(0x15);
2382     emit_long(d);
2383     emit_long(s);
2384     }
2385     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2386    
2387     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2388     {
2389     if (optimize_imm8 && isbyte(s)) {
2390     emit_byte(0x83);
2391     emit_byte(0x05);
2392     emit_long(d);
2393     emit_byte(s);
2394     }
2395     else {
2396     emit_byte(0x81);
2397     emit_byte(0x05);
2398     emit_long(d);
2399     emit_long(s);
2400     }
2401     }
2402     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2403    
2404     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2405     {
2406     emit_byte(0x66);
2407     emit_byte(0x81);
2408     emit_byte(0x05);
2409     emit_long(d);
2410     emit_word(s);
2411     }
2412     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2413    
2414     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2415     {
2416     emit_byte(0x80);
2417     emit_byte(0x05);
2418     emit_long(d);
2419     emit_byte(s);
2420     }
2421     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2422    
2423     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2424     {
2425 gbeauche 1.2 if (optimize_accum && isaccum(d))
2426     emit_byte(0xa9);
2427     else {
2428 gbeauche 1.1 emit_byte(0xf7);
2429     emit_byte(0xc0+d);
2430 gbeauche 1.2 }
2431 gbeauche 1.1 emit_long(i);
2432     }
2433     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2434    
2435     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2436     {
2437     emit_byte(0x85);
2438     emit_byte(0xc0+8*s+d);
2439     }
2440     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2441    
2442     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2443     {
2444     emit_byte(0x66);
2445     emit_byte(0x85);
2446     emit_byte(0xc0+8*s+d);
2447     }
2448     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2449    
2450     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2451     {
2452     emit_byte(0x84);
2453     emit_byte(0xc0+8*s+d);
2454     }
2455     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2456    
2457     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2458     {
2459     if (optimize_imm8 && isbyte(i)) {
2460 gbeauche 1.2 emit_byte(0x83);
2461     emit_byte(0xe0+d);
2462     emit_byte(i);
2463 gbeauche 1.1 }
2464     else {
2465 gbeauche 1.2 if (optimize_accum && isaccum(d))
2466     emit_byte(0x25);
2467     else {
2468     emit_byte(0x81);
2469     emit_byte(0xe0+d);
2470     }
2471     emit_long(i);
2472 gbeauche 1.1 }
2473     }
2474     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2475    
2476     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2477     {
2478 gbeauche 1.2 emit_byte(0x66);
2479     if (optimize_imm8 && isbyte(i)) {
2480     emit_byte(0x83);
2481     emit_byte(0xe0+d);
2482     emit_byte(i);
2483     }
2484     else {
2485     if (optimize_accum && isaccum(d))
2486     emit_byte(0x25);
2487     else {
2488     emit_byte(0x81);
2489     emit_byte(0xe0+d);
2490     }
2491     emit_word(i);
2492     }
2493 gbeauche 1.1 }
2494     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2495    
2496     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2497     {
2498     emit_byte(0x21);
2499     emit_byte(0xc0+8*s+d);
2500     }
2501     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2502    
2503     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2504     {
2505     emit_byte(0x66);
2506     emit_byte(0x21);
2507     emit_byte(0xc0+8*s+d);
2508     }
2509     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2510    
2511     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2512     {
2513     emit_byte(0x20);
2514     emit_byte(0xc0+8*s+d);
2515     }
2516     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2517    
2518     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2519     {
2520     if (optimize_imm8 && isbyte(i)) {
2521     emit_byte(0x83);
2522     emit_byte(0xc8+d);
2523     emit_byte(i);
2524     }
2525     else {
2526 gbeauche 1.2 if (optimize_accum && isaccum(d))
2527     emit_byte(0x0d);
2528     else {
2529 gbeauche 1.1 emit_byte(0x81);
2530     emit_byte(0xc8+d);
2531 gbeauche 1.2 }
2532 gbeauche 1.1 emit_long(i);
2533     }
2534     }
2535     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2536    
2537     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2538     {
2539     emit_byte(0x09);
2540     emit_byte(0xc0+8*s+d);
2541     }
2542     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2543    
2544     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2545     {
2546     emit_byte(0x66);
2547     emit_byte(0x09);
2548     emit_byte(0xc0+8*s+d);
2549     }
2550     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2551    
2552     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2553     {
2554     emit_byte(0x08);
2555     emit_byte(0xc0+8*s+d);
2556     }
2557     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2558    
2559     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2560     {
2561     emit_byte(0x11);
2562     emit_byte(0xc0+8*s+d);
2563     }
2564     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2565    
2566     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2567     {
2568     emit_byte(0x66);
2569     emit_byte(0x11);
2570     emit_byte(0xc0+8*s+d);
2571     }
2572     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2573    
2574     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2575     {
2576     emit_byte(0x10);
2577     emit_byte(0xc0+8*s+d);
2578     }
2579     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2580    
2581     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2582     {
2583     emit_byte(0x01);
2584     emit_byte(0xc0+8*s+d);
2585     }
2586     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2587    
2588     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2589     {
2590     emit_byte(0x66);
2591     emit_byte(0x01);
2592     emit_byte(0xc0+8*s+d);
2593     }
2594     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2595    
2596     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2597     {
2598     emit_byte(0x00);
2599     emit_byte(0xc0+8*s+d);
2600     }
2601     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2602    
2603     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2604     {
2605     if (isbyte(i)) {
2606     emit_byte(0x83);
2607     emit_byte(0xe8+d);
2608     emit_byte(i);
2609     }
2610     else {
2611 gbeauche 1.2 if (optimize_accum && isaccum(d))
2612     emit_byte(0x2d);
2613     else {
2614 gbeauche 1.1 emit_byte(0x81);
2615     emit_byte(0xe8+d);
2616 gbeauche 1.2 }
2617 gbeauche 1.1 emit_long(i);
2618     }
2619     }
2620     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2621    
2622     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2623     {
2624 gbeauche 1.2 if (optimize_accum && isaccum(d))
2625     emit_byte(0x2c);
2626     else {
2627 gbeauche 1.1 emit_byte(0x80);
2628     emit_byte(0xe8+d);
2629 gbeauche 1.2 }
2630 gbeauche 1.1 emit_byte(i);
2631     }
2632     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2633    
2634     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2635     {
2636     if (isbyte(i)) {
2637     emit_byte(0x83);
2638     emit_byte(0xc0+d);
2639     emit_byte(i);
2640     }
2641     else {
2642 gbeauche 1.2 if (optimize_accum && isaccum(d))
2643     emit_byte(0x05);
2644     else {
2645 gbeauche 1.1 emit_byte(0x81);
2646     emit_byte(0xc0+d);
2647 gbeauche 1.2 }
2648 gbeauche 1.1 emit_long(i);
2649     }
2650     }
2651     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2652    
2653     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2654     {
2655 gbeauche 1.2 emit_byte(0x66);
2656 gbeauche 1.1 if (isbyte(i)) {
2657     emit_byte(0x83);
2658     emit_byte(0xc0+d);
2659     emit_byte(i);
2660     }
2661     else {
2662 gbeauche 1.2 if (optimize_accum && isaccum(d))
2663     emit_byte(0x05);
2664     else {
2665 gbeauche 1.1 emit_byte(0x81);
2666     emit_byte(0xc0+d);
2667 gbeauche 1.2 }
2668 gbeauche 1.1 emit_word(i);
2669     }
2670     }
2671     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2672    
2673     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2674     {
2675 gbeauche 1.2 if (optimize_accum && isaccum(d))
2676     emit_byte(0x04);
2677     else {
2678     emit_byte(0x80);
2679     emit_byte(0xc0+d);
2680     }
2681 gbeauche 1.1 emit_byte(i);
2682     }
2683     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2684    
2685     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2686     {
2687     emit_byte(0x19);
2688     emit_byte(0xc0+8*s+d);
2689     }
2690     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2691    
2692     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2693     {
2694     emit_byte(0x66);
2695     emit_byte(0x19);
2696     emit_byte(0xc0+8*s+d);
2697     }
2698     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2699    
2700     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2701     {
2702     emit_byte(0x18);
2703     emit_byte(0xc0+8*s+d);
2704     }
2705     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2706    
2707     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2708     {
2709     emit_byte(0x29);
2710     emit_byte(0xc0+8*s+d);
2711     }
2712     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2713    
2714     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2715     {
2716     emit_byte(0x66);
2717     emit_byte(0x29);
2718     emit_byte(0xc0+8*s+d);
2719     }
2720     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2721    
2722     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2723     {
2724     emit_byte(0x28);
2725     emit_byte(0xc0+8*s+d);
2726     }
2727     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2728    
2729     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2730     {
2731     emit_byte(0x39);
2732     emit_byte(0xc0+8*s+d);
2733     }
2734     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2735    
2736     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2737     {
2738     if (optimize_imm8 && isbyte(i)) {
2739     emit_byte(0x83);
2740     emit_byte(0xf8+r);
2741     emit_byte(i);
2742     }
2743     else {
2744 gbeauche 1.2 if (optimize_accum && isaccum(r))
2745     emit_byte(0x3d);
2746     else {
2747 gbeauche 1.1 emit_byte(0x81);
2748     emit_byte(0xf8+r);
2749 gbeauche 1.2 }
2750 gbeauche 1.1 emit_long(i);
2751     }
2752     }
2753     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2754    
2755     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2756     {
2757     emit_byte(0x66);
2758     emit_byte(0x39);
2759     emit_byte(0xc0+8*s+d);
2760     }
2761     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2762    
2763 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2764     {
2765     emit_byte(0x80);
2766     emit_byte(0x3d);
2767     emit_long(d);
2768     emit_byte(s);
2769     }
2770     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2771    
2772 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2773     {
2774 gbeauche 1.2 if (optimize_accum && isaccum(d))
2775     emit_byte(0x3c);
2776     else {
2777 gbeauche 1.1 emit_byte(0x80);
2778     emit_byte(0xf8+d);
2779 gbeauche 1.2 }
2780 gbeauche 1.1 emit_byte(i);
2781     }
2782     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2783    
2784     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2785     {
2786     emit_byte(0x38);
2787     emit_byte(0xc0+8*s+d);
2788     }
2789     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2790    
2791     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2792     {
2793     int fi;
2794    
2795     switch(factor) {
2796     case 1: fi=0; break;
2797     case 2: fi=1; break;
2798     case 4: fi=2; break;
2799     case 8: fi=3; break;
2800     default: abort();
2801     }
2802     emit_byte(0x39);
2803     emit_byte(0x04+8*d);
2804     emit_byte(5+8*index+0x40*fi);
2805     emit_long(offset);
2806     }
2807     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2808    
2809     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2810     {
2811     emit_byte(0x31);
2812     emit_byte(0xc0+8*s+d);
2813     }
2814     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2815    
2816     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2817     {
2818     emit_byte(0x66);
2819     emit_byte(0x31);
2820     emit_byte(0xc0+8*s+d);
2821     }
2822     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2823    
2824     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2825     {
2826     emit_byte(0x30);
2827     emit_byte(0xc0+8*s+d);
2828     }
2829     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2830    
2831     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2832     {
2833     if (optimize_imm8 && isbyte(s)) {
2834     emit_byte(0x83);
2835     emit_byte(0x2d);
2836     emit_long(d);
2837     emit_byte(s);
2838     }
2839     else {
2840     emit_byte(0x81);
2841     emit_byte(0x2d);
2842     emit_long(d);
2843     emit_long(s);
2844     }
2845     }
2846     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2847    
2848     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2849     {
2850     if (optimize_imm8 && isbyte(s)) {
2851     emit_byte(0x83);
2852     emit_byte(0x3d);
2853     emit_long(d);
2854     emit_byte(s);
2855     }
2856     else {
2857     emit_byte(0x81);
2858     emit_byte(0x3d);
2859     emit_long(d);
2860     emit_long(s);
2861     }
2862     }
2863     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2864    
2865     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2866     {
2867     emit_byte(0x87);
2868     emit_byte(0xc0+8*r1+r2);
2869     }
2870     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2871    
2872     /*************************************************************************
2873     * FIXME: mem access modes probably wrong *
2874     *************************************************************************/
2875    
2876     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2877     {
2878     emit_byte(0x9c);
2879     }
2880     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2881    
2882     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2883     {
2884     emit_byte(0x9d);
2885     }
2886     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2887 gbeauche 1.13
2888     #endif
2889 gbeauche 1.1
2890     /*************************************************************************
2891     * Unoptimizable stuff --- jump *
2892     *************************************************************************/
2893    
2894     static __inline__ void raw_call_r(R4 r)
2895     {
2896     emit_byte(0xff);
2897     emit_byte(0xd0+r);
2898 gbeauche 1.5 }
2899    
2900     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2901     {
2902     int mu;
2903     switch(m) {
2904     case 1: mu=0; break;
2905     case 2: mu=1; break;
2906     case 4: mu=2; break;
2907     case 8: mu=3; break;
2908     default: abort();
2909     }
2910     emit_byte(0xff);
2911     emit_byte(0x14);
2912     emit_byte(0x05+8*r+0x40*mu);
2913     emit_long(base);
2914 gbeauche 1.1 }
2915    
2916     static __inline__ void raw_jmp_r(R4 r)
2917     {
2918     emit_byte(0xff);
2919     emit_byte(0xe0+r);
2920     }
2921    
2922     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2923     {
2924     int mu;
2925     switch(m) {
2926     case 1: mu=0; break;
2927     case 2: mu=1; break;
2928     case 4: mu=2; break;
2929     case 8: mu=3; break;
2930     default: abort();
2931     }
2932     emit_byte(0xff);
2933     emit_byte(0x24);
2934     emit_byte(0x05+8*r+0x40*mu);
2935     emit_long(base);
2936     }
2937    
2938     static __inline__ void raw_jmp_m(uae_u32 base)
2939     {
2940     emit_byte(0xff);
2941     emit_byte(0x25);
2942     emit_long(base);
2943     }
2944    
2945    
2946     static __inline__ void raw_call(uae_u32 t)
2947     {
2948     emit_byte(0xe8);
2949     emit_long(t-(uae_u32)target-4);
2950     }
2951    
2952     static __inline__ void raw_jmp(uae_u32 t)
2953     {
2954     emit_byte(0xe9);
2955     emit_long(t-(uae_u32)target-4);
2956     }
2957    
2958     static __inline__ void raw_jl(uae_u32 t)
2959     {
2960     emit_byte(0x0f);
2961     emit_byte(0x8c);
2962     emit_long(t-(uae_u32)target-4);
2963     }
2964    
2965     static __inline__ void raw_jz(uae_u32 t)
2966     {
2967     emit_byte(0x0f);
2968     emit_byte(0x84);
2969     emit_long(t-(uae_u32)target-4);
2970     }
2971    
2972     static __inline__ void raw_jnz(uae_u32 t)
2973     {
2974     emit_byte(0x0f);
2975     emit_byte(0x85);
2976     emit_long(t-(uae_u32)target-4);
2977     }
2978    
2979     static __inline__ void raw_jnz_l_oponly(void)
2980     {
2981     emit_byte(0x0f);
2982     emit_byte(0x85);
2983     }
2984    
2985     static __inline__ void raw_jcc_l_oponly(int cc)
2986     {
2987     emit_byte(0x0f);
2988     emit_byte(0x80+cc);
2989     }
2990    
2991     static __inline__ void raw_jnz_b_oponly(void)
2992     {
2993     emit_byte(0x75);
2994     }
2995    
2996     static __inline__ void raw_jz_b_oponly(void)
2997     {
2998     emit_byte(0x74);
2999     }
3000    
3001     static __inline__ void raw_jcc_b_oponly(int cc)
3002     {
3003     emit_byte(0x70+cc);
3004     }
3005    
3006     static __inline__ void raw_jmp_l_oponly(void)
3007     {
3008     emit_byte(0xe9);
3009     }
3010    
3011     static __inline__ void raw_jmp_b_oponly(void)
3012     {
3013     emit_byte(0xeb);
3014     }
3015    
3016     static __inline__ void raw_ret(void)
3017     {
3018     emit_byte(0xc3);
3019     }
3020    
3021     static __inline__ void raw_nop(void)
3022     {
3023     emit_byte(0x90);
3024     }
3025    
3026 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3027     {
3028     /* Source: GNU Binutils 2.12.90.0.15 */
3029     /* Various efficient no-op patterns for aligning code labels.
3030     Note: Don't try to assemble the instructions in the comments.
3031     0L and 0w are not legal. */
3032     static const uae_u8 f32_1[] =
3033     {0x90}; /* nop */
3034     static const uae_u8 f32_2[] =
3035     {0x89,0xf6}; /* movl %esi,%esi */
3036     static const uae_u8 f32_3[] =
3037     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3038     static const uae_u8 f32_4[] =
3039     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3040     static const uae_u8 f32_5[] =
3041     {0x90, /* nop */
3042     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3043     static const uae_u8 f32_6[] =
3044     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3045     static const uae_u8 f32_7[] =
3046     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3047     static const uae_u8 f32_8[] =
3048     {0x90, /* nop */
3049     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3050     static const uae_u8 f32_9[] =
3051     {0x89,0xf6, /* movl %esi,%esi */
3052     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3053     static const uae_u8 f32_10[] =
3054     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3055     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3056     static const uae_u8 f32_11[] =
3057     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3058     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3059     static const uae_u8 f32_12[] =
3060     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3061     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3062     static const uae_u8 f32_13[] =
3063     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3064     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3065     static const uae_u8 f32_14[] =
3066     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3067     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3068     static const uae_u8 f32_15[] =
3069     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3070     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3071     static const uae_u8 f32_16[] =
3072     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3073     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3074     static const uae_u8 *const f32_patt[] = {
3075     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3076     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3077     };
3078    
3079     int nloops = nbytes / 16;
3080     while (nloops-- > 0)
3081     emit_block(f32_16, sizeof(f32_16));
3082    
3083     nbytes %= 16;
3084     if (nbytes)
3085     emit_block(f32_patt[nbytes - 1], nbytes);
3086     }
3087    
3088 gbeauche 1.1
3089     /*************************************************************************
3090     * Flag handling, to and fro UAE flag register *
3091     *************************************************************************/
3092    
3093     #ifdef SAHF_SETO_PROFITABLE
3094    
3095     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3096    
3097     static __inline__ void raw_flags_to_reg(int r)
3098     {
3099     raw_lahf(0); /* Most flags in AH */
3100     //raw_setcc(r,0); /* V flag in AL */
3101     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
3102    
3103     #if 1 /* Let's avoid those nasty partial register stalls */
3104     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
3105     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
3106     //live.state[FLAGTMP].status=CLEAN;
3107     live.state[FLAGTMP].status=INMEM;
3108     live.state[FLAGTMP].realreg=-1;
3109     /* We just "evicted" FLAGTMP. */
3110     if (live.nat[r].nholds!=1) {
3111     /* Huh? */
3112     abort();
3113     }
3114     live.nat[r].nholds=0;
3115     #endif
3116     }
3117    
3118     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3119     static __inline__ void raw_reg_to_flags(int r)
3120     {
3121     raw_cmp_b_ri(r,-127); /* set V */
3122     raw_sahf(0);
3123     }
3124    
3125     #else
3126    
3127     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3128     static __inline__ void raw_flags_to_reg(int r)
3129     {
3130     raw_pushfl();
3131     raw_pop_l_r(r);
3132     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
3133     // live.state[FLAGTMP].status=CLEAN;
3134     live.state[FLAGTMP].status=INMEM;
3135     live.state[FLAGTMP].realreg=-1;
3136     /* We just "evicted" FLAGTMP. */
3137     if (live.nat[r].nholds!=1) {
3138     /* Huh? */
3139     abort();
3140     }
3141     live.nat[r].nholds=0;
3142     }
3143    
3144     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3145     static __inline__ void raw_reg_to_flags(int r)
3146     {
3147     raw_push_l_r(r);
3148     raw_popfl();
3149     }
3150    
3151     #endif
3152    
3153     /* Apparently, there are enough instructions between flag store and
3154     flag reload to avoid the partial memory stall */
3155     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3156     {
3157     #if 1
3158     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3159     #else
3160     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3161     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
3162     #endif
3163     }
3164    
3165     /* FLAGX is byte sized, and we *do* write it at that size */
3166     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3167     {
3168     if (live.nat[target].canbyte)
3169     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3170     else if (live.nat[target].canword)
3171     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
3172     else
3173     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3174     }
3175    
3176 gbeauche 1.11 #define NATIVE_FLAG_Z 0x40
3177     static __inline__ void raw_flags_set_zero(int f, int r, int t)
3178     {
3179     // FIXME: this is really suboptimal
3180     raw_pushfl();
3181     raw_pop_l_r(f);
3182     raw_and_l_ri(f,~NATIVE_FLAG_Z);
3183     raw_test_l_rr(r,r);
3184     raw_mov_l_ri(r,0);
3185     raw_mov_l_ri(t,NATIVE_FLAG_Z);
3186     raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
3187     raw_or_l(f,r);
3188     raw_push_l_r(f);
3189     raw_popfl();
3190     }
3191 gbeauche 1.1
3192     static __inline__ void raw_inc_sp(int off)
3193     {
3194 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
3195 gbeauche 1.1 }
3196    
3197     /*************************************************************************
3198     * Handling mistaken direct memory access *
3199     *************************************************************************/
3200    
3201     // gb-- I don't need that part for JIT Basilisk II
3202     #if defined(NATMEM_OFFSET) && 0
3203     #include <asm/sigcontext.h>
3204     #include <signal.h>
3205    
3206     #define SIG_READ 1
3207     #define SIG_WRITE 2
3208    
3209     static int in_handler=0;
3210     static uae_u8 veccode[256];
3211    
3212     static void vec(int x, struct sigcontext sc)
3213     {
3214     uae_u8* i=(uae_u8*)sc.eip;
3215     uae_u32 addr=sc.cr2;
3216     int r=-1;
3217     int size=4;
3218     int dir=-1;
3219     int len=0;
3220     int j;
3221    
3222     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3223     if (!canbang)
3224     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3225     if (in_handler)
3226     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3227    
3228     if (canbang && i>=compiled_code && i<=current_compile_p) {
3229     if (*i==0x66) {
3230     i++;
3231     size=2;
3232     len++;
3233     }
3234    
3235     switch(i[0]) {
3236     case 0x8a:
3237     if ((i[1]&0xc0)==0x80) {
3238     r=(i[1]>>3)&7;
3239     dir=SIG_READ;
3240     size=1;
3241     len+=6;
3242     break;
3243     }
3244     break;
3245     case 0x88:
3246     if ((i[1]&0xc0)==0x80) {
3247     r=(i[1]>>3)&7;
3248     dir=SIG_WRITE;
3249     size=1;
3250     len+=6;
3251     break;
3252     }
3253     break;
3254     case 0x8b:
3255     if ((i[1]&0xc0)==0x80) {
3256     r=(i[1]>>3)&7;
3257     dir=SIG_READ;
3258     len+=6;
3259     break;
3260     }
3261     if ((i[1]&0xc0)==0x40) {
3262     r=(i[1]>>3)&7;
3263     dir=SIG_READ;
3264     len+=3;
3265     break;
3266     }
3267     break;
3268     case 0x89:
3269     if ((i[1]&0xc0)==0x80) {
3270     r=(i[1]>>3)&7;
3271     dir=SIG_WRITE;
3272     len+=6;
3273     break;
3274     }
3275     if ((i[1]&0xc0)==0x40) {
3276     r=(i[1]>>3)&7;
3277     dir=SIG_WRITE;
3278     len+=3;
3279     break;
3280     }
3281     break;
3282     }
3283     }
3284    
3285     if (r!=-1) {
3286     void* pr=NULL;
3287     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3288    
3289     switch(r) {
3290     case 0: pr=&(sc.eax); break;
3291     case 1: pr=&(sc.ecx); break;
3292     case 2: pr=&(sc.edx); break;
3293     case 3: pr=&(sc.ebx); break;
3294     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3295     case 5: pr=(size>1)?
3296     (void*)(&(sc.ebp)):
3297     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3298     case 6: pr=(size>1)?
3299     (void*)(&(sc.esi)):
3300     (void*)(((uae_u8*)&(sc.edx))+1); break;
3301     case 7: pr=(size>1)?
3302     (void*)(&(sc.edi)):
3303     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3304     default: abort();
3305     }
3306     if (pr) {
3307     blockinfo* bi;
3308    
3309     if (currprefs.comp_oldsegv) {
3310     addr-=NATMEM_OFFSET;
3311    
3312     if ((addr>=0x10000000 && addr<0x40000000) ||
3313     (addr>=0x50000000)) {
3314     write_log("Suspicious address in %x SEGV handler.\n",addr);
3315     }
3316     if (dir==SIG_READ) {
3317     switch(size) {
3318     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3319     case 2: *((uae_u16*)pr)=get_word(addr); break;
3320     case 4: *((uae_u32*)pr)=get_long(addr); break;
3321     default: abort();
3322     }
3323     }
3324     else { /* write */
3325     switch(size) {
3326     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3327     case 2: put_word(addr,*((uae_u16*)pr)); break;
3328     case 4: put_long(addr,*((uae_u32*)pr)); break;
3329     default: abort();
3330     }
3331     }
3332     write_log("Handled one access!\n");
3333     fflush(stdout);
3334     segvcount++;
3335     sc.eip+=len;
3336     }
3337     else {
3338     void* tmp=target;
3339     int i;
3340     uae_u8 vecbuf[5];
3341    
3342     addr-=NATMEM_OFFSET;
3343    
3344     if ((addr>=0x10000000 && addr<0x40000000) ||
3345     (addr>=0x50000000)) {
3346     write_log("Suspicious address in %x SEGV handler.\n",addr);
3347     }
3348    
3349     target=(uae_u8*)sc.eip;
3350     for (i=0;i<5;i++)
3351     vecbuf[i]=target[i];
3352     emit_byte(0xe9);
3353     emit_long((uae_u32)veccode-(uae_u32)target-4);
3354     write_log("Create jump to %p\n",veccode);
3355    
3356     write_log("Handled one access!\n");
3357     fflush(stdout);
3358     segvcount++;
3359    
3360     target=veccode;
3361    
3362     if (dir==SIG_READ) {
3363     switch(size) {
3364     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3365     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3366     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3367     default: abort();
3368     }
3369     }
3370     else { /* write */
3371     switch(size) {
3372     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3373     case 2: put_word(addr,*((uae_u16*)pr)); break;
3374     case 4: put_long(addr,*((uae_u32*)pr)); break;
3375     default: abort();
3376     }
3377     }
3378     for (i=0;i<5;i++)
3379     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3380     raw_mov_l_mi((uae_u32)&in_handler,0);
3381     emit_byte(0xe9);
3382     emit_long(sc.eip+len-(uae_u32)target-4);
3383     in_handler=1;
3384     target=tmp;
3385     }
3386     bi=active;
3387     while (bi) {
3388     if (bi->handler &&
3389     (uae_u8*)bi->direct_handler<=i &&
3390     (uae_u8*)bi->nexthandler>i) {
3391     write_log("deleted trigger (%p<%p<%p) %p\n",
3392     bi->handler,
3393     i,
3394     bi->nexthandler,
3395     bi->pc_p);
3396     invalidate_block(bi);
3397     raise_in_cl_list(bi);
3398     set_special(0);
3399     return;
3400     }
3401     bi=bi->next;
3402     }
3403     /* Not found in the active list. Might be a rom routine that
3404     is in the dormant list */
3405     bi=dormant;
3406     while (bi) {
3407     if (bi->handler &&
3408     (uae_u8*)bi->direct_handler<=i &&
3409     (uae_u8*)bi->nexthandler>i) {
3410     write_log("deleted trigger (%p<%p<%p) %p\n",
3411     bi->handler,
3412     i,
3413     bi->nexthandler,
3414     bi->pc_p);
3415     invalidate_block(bi);
3416     raise_in_cl_list(bi);
3417     set_special(0);
3418     return;
3419     }
3420     bi=bi->next;
3421     }
3422     write_log("Huh? Could not find trigger!\n");
3423     return;
3424     }
3425     }
3426     write_log("Can't handle access!\n");
3427     for (j=0;j<10;j++) {
3428     write_log("instruction byte %2d is %02x\n",j,i[j]);
3429     }
3430     write_log("Please send the above info (starting at \"fault address\") to\n"
3431     "bmeyer@csse.monash.edu.au\n"
3432     "This shouldn't happen ;-)\n");
3433     fflush(stdout);
3434     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3435     }
3436     #endif
3437    
3438    
3439     /*************************************************************************
3440     * Checking for CPU features *
3441     *************************************************************************/
3442    
3443 gbeauche 1.3 struct cpuinfo_x86 {
3444     uae_u8 x86; // CPU family
3445     uae_u8 x86_vendor; // CPU vendor
3446     uae_u8 x86_processor; // CPU canonical processor type
3447     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3448     uae_u32 x86_hwcap;
3449     uae_u8 x86_model;
3450     uae_u8 x86_mask;
3451     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3452     char x86_vendor_id[16];
3453     };
3454     struct cpuinfo_x86 cpuinfo;
3455    
3456     enum {
3457     X86_VENDOR_INTEL = 0,
3458     X86_VENDOR_CYRIX = 1,
3459     X86_VENDOR_AMD = 2,
3460     X86_VENDOR_UMC = 3,
3461     X86_VENDOR_NEXGEN = 4,
3462     X86_VENDOR_CENTAUR = 5,
3463     X86_VENDOR_RISE = 6,
3464     X86_VENDOR_TRANSMETA = 7,
3465     X86_VENDOR_NSC = 8,
3466     X86_VENDOR_UNKNOWN = 0xff
3467     };
3468    
3469     enum {
3470     X86_PROCESSOR_I386, /* 80386 */
3471     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3472     X86_PROCESSOR_PENTIUM,
3473     X86_PROCESSOR_PENTIUMPRO,
3474     X86_PROCESSOR_K6,
3475     X86_PROCESSOR_ATHLON,
3476     X86_PROCESSOR_PENTIUM4,
3477     X86_PROCESSOR_max
3478     };
3479    
3480     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3481     "80386",
3482     "80486",
3483     "Pentium",
3484     "PentiumPro",
3485     "K6",
3486     "Athlon",
3487     "Pentium4"
3488     };
3489    
3490     static struct ptt {
3491     const int align_loop;
3492     const int align_loop_max_skip;
3493     const int align_jump;
3494     const int align_jump_max_skip;
3495     const int align_func;
3496     }
3497     x86_alignments[X86_PROCESSOR_max] = {
3498     { 4, 3, 4, 3, 4 },
3499     { 16, 15, 16, 15, 16 },
3500     { 16, 7, 16, 7, 16 },
3501     { 16, 15, 16, 7, 16 },
3502     { 32, 7, 32, 7, 32 },
3503 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3504 gbeauche 1.3 { 0, 0, 0, 0, 0 }
3505     };
3506 gbeauche 1.1
3507 gbeauche 1.3 static void
3508     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3509 gbeauche 1.1 {
3510 gbeauche 1.3 char *v = c->x86_vendor_id;
3511    
3512     if (!strcmp(v, "GenuineIntel"))
3513     c->x86_vendor = X86_VENDOR_INTEL;
3514     else if (!strcmp(v, "AuthenticAMD"))
3515     c->x86_vendor = X86_VENDOR_AMD;
3516     else if (!strcmp(v, "CyrixInstead"))
3517     c->x86_vendor = X86_VENDOR_CYRIX;
3518     else if (!strcmp(v, "Geode by NSC"))
3519     c->x86_vendor = X86_VENDOR_NSC;
3520     else if (!strcmp(v, "UMC UMC UMC "))
3521     c->x86_vendor = X86_VENDOR_UMC;
3522     else if (!strcmp(v, "CentaurHauls"))
3523     c->x86_vendor = X86_VENDOR_CENTAUR;
3524     else if (!strcmp(v, "NexGenDriven"))
3525     c->x86_vendor = X86_VENDOR_NEXGEN;
3526     else if (!strcmp(v, "RiseRiseRise"))
3527     c->x86_vendor = X86_VENDOR_RISE;
3528     else if (!strcmp(v, "GenuineTMx86") ||
3529     !strcmp(v, "TransmetaCPU"))
3530     c->x86_vendor = X86_VENDOR_TRANSMETA;
3531     else
3532     c->x86_vendor = X86_VENDOR_UNKNOWN;
3533     }
3534 gbeauche 1.1
3535 gbeauche 1.3 static void
3536     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3537     {
3538     static uae_u8 cpuid_space[256];
3539     uae_u8* tmp=get_target();
3540 gbeauche 1.1
3541 gbeauche 1.3 set_target(cpuid_space);
3542     raw_push_l_r(0); /* eax */
3543     raw_push_l_r(1); /* ecx */
3544     raw_push_l_r(2); /* edx */
3545     raw_push_l_r(3); /* ebx */
3546     raw_mov_l_rm(0,(uae_u32)&op);
3547     raw_cpuid(0);
3548     if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
3549     if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
3550     if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
3551     if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
3552     raw_pop_l_r(3);
3553     raw_pop_l_r(2);
3554     raw_pop_l_r(1);
3555     raw_pop_l_r(0);
3556     raw_ret();
3557     set_target(tmp);
3558 gbeauche 1.1
3559 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3560 gbeauche 1.1 }
3561    
3562 gbeauche 1.3 static void
3563     raw_init_cpu(void)
3564 gbeauche 1.1 {
3565 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3566    
3567     /* Defaults */
3568     c->x86_vendor = X86_VENDOR_UNKNOWN;
3569     c->cpuid_level = -1; /* CPUID not detected */
3570     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3571     c->x86_vendor_id[0] = '\0'; /* Unset */
3572     c->x86_hwcap = 0;
3573    
3574     /* Get vendor name */
3575     c->x86_vendor_id[12] = '\0';
3576     cpuid(0x00000000,
3577     (uae_u32 *)&c->cpuid_level,
3578     (uae_u32 *)&c->x86_vendor_id[0],
3579     (uae_u32 *)&c->x86_vendor_id[8],
3580     (uae_u32 *)&c->x86_vendor_id[4]);
3581     x86_get_cpu_vendor(c);
3582    
3583     /* Intel-defined flags: level 0x00000001 */
3584     c->x86_brand_id = 0;
3585     if ( c->cpuid_level >= 0x00000001 ) {
3586     uae_u32 tfms, brand_id;
3587     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3588     c->x86 = (tfms >> 8) & 15;
3589     c->x86_model = (tfms >> 4) & 15;
3590     c->x86_brand_id = brand_id & 0xff;
3591     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
3592     (c->x86 == 0xf)) {
3593     /* AMD Extended Family and Model Values */
3594     c->x86 += (tfms >> 20) & 0xff;
3595     c->x86_model += (tfms >> 12) & 0xf0;
3596     }
3597     c->x86_mask = tfms & 15;
3598     } else {
3599     /* Have CPUID level 0 only - unheard of */
3600     c->x86 = 4;
3601     }
3602    
3603     /* Canonicalize processor ID */
3604     c->x86_processor = X86_PROCESSOR_max;
3605     switch (c->x86) {
3606     case 3:
3607     c->x86_processor = X86_PROCESSOR_I386;
3608     break;
3609     case 4:
3610     c->x86_processor = X86_PROCESSOR_I486;
3611     break;
3612     case 5:
3613     if (c->x86_vendor == X86_VENDOR_AMD)
3614     c->x86_processor = X86_PROCESSOR_K6;
3615     else
3616     c->x86_processor = X86_PROCESSOR_PENTIUM;
3617     break;
3618     case 6:
3619     if (c->x86_vendor == X86_VENDOR_AMD)
3620     c->x86_processor = X86_PROCESSOR_ATHLON;
3621     else
3622     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3623     break;
3624     case 15:
3625     if (c->x86_vendor == X86_VENDOR_INTEL) {
3626     /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
3627     if (c->x86_brand_id >= 8)
3628     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3629     }
3630     break;
3631     }
3632     if (c->x86_processor == X86_PROCESSOR_max) {
3633     fprintf(stderr, "Error: unknown processor type\n");
3634     fprintf(stderr, " Family : %d\n", c->x86);
3635     fprintf(stderr, " Model : %d\n", c->x86_model);
3636     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3637     if (c->x86_brand_id)
3638     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3639     abort();
3640     }
3641    
3642     /* Have CMOV support? */
3643     have_cmov = (c->x86_hwcap & (1 << 15)) && true;
3644    
3645     /* Can the host CPU suffer from partial register stalls? */
3646     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3647     #if 1
3648     /* It appears that partial register writes are a bad idea even on
3649 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3650     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3651 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3652     have_rat_stall = true;
3653 gbeauche 1.1 #endif
3654 gbeauche 1.3
3655     /* Alignments */
3656     if (tune_alignment) {
3657     align_loops = x86_alignments[c->x86_processor].align_loop;
3658     align_jumps = x86_alignments[c->x86_processor].align_jump;
3659     }
3660    
3661     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3662     c->cpuid_level, c->x86_vendor_id,
3663     x86_processor_string_table[c->x86_processor]);
3664 gbeauche 1.1 }
3665    
3666 gbeauche 1.10 static bool target_check_bsf(void)
3667     {
3668     bool mismatch = false;
3669     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3670     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3671     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3672     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3673     for (int value = -1; value <= 1; value++) {
3674     int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3675     int tmp = value;
3676     __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3677 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3678 gbeauche 1.10 int OF = (flags >> 11) & 1;
3679     int SF = (flags >> 7) & 1;
3680     int ZF = (flags >> 6) & 1;
3681     int CF = flags & 1;
3682     tmp = (value == 0);
3683     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3684     mismatch = true;
3685     }
3686     }}}}
3687     if (mismatch)
3688     write_log("Target CPU defines all flags on BSF instruction\n");
3689     return !mismatch;
3690     }
3691    
3692 gbeauche 1.1
3693     /*************************************************************************
3694     * FPU stuff *
3695     *************************************************************************/
3696    
3697    
3698     static __inline__ void raw_fp_init(void)
3699     {
3700     int i;
3701    
3702     for (i=0;i<N_FREGS;i++)
3703     live.spos[i]=-2;
3704     live.tos=-1; /* Stack is empty */
3705     }
3706    
3707     static __inline__ void raw_fp_cleanup_drop(void)
3708     {
3709     #if 0
3710     /* using FINIT instead of popping all the entries.
3711     Seems to have side effects --- there is display corruption in
3712     Quake when this is used */
3713     if (live.tos>1) {
3714     emit_byte(0x9b);
3715     emit_byte(0xdb);
3716     emit_byte(0xe3);
3717     live.tos=-1;
3718     }
3719     #endif
3720     while (live.tos>=1) {
3721     emit_byte(0xde);
3722     emit_byte(0xd9);
3723     live.tos-=2;
3724     }
3725     while (live.tos>=0) {
3726     emit_byte(0xdd);
3727     emit_byte(0xd8);
3728     live.tos--;
3729     }
3730     raw_fp_init();
3731     }
3732    
3733     static __inline__ void make_tos(int r)
3734     {
3735     int p,q;
3736    
3737     if (live.spos[r]<0) { /* Register not yet on stack */
3738     emit_byte(0xd9);
3739     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3740     live.tos++;
3741     live.spos[r]=live.tos;
3742     live.onstack[live.tos]=r;
3743     return;
3744     }
3745     /* Register is on stack */
3746     if (live.tos==live.spos[r])
3747     return;
3748     p=live.spos[r];
3749     q=live.onstack[live.tos];
3750    
3751     emit_byte(0xd9);
3752     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3753     live.onstack[live.tos]=r;
3754     live.spos[r]=live.tos;
3755     live.onstack[p]=q;
3756     live.spos[q]=p;
3757     }
3758    
3759     static __inline__ void make_tos2(int r, int r2)
3760     {
3761     int q;
3762    
3763     make_tos(r2); /* Put the reg that's supposed to end up in position2
3764     on top */
3765    
3766     if (live.spos[r]<0) { /* Register not yet on stack */
3767     make_tos(r); /* This will extend the stack */
3768     return;
3769     }
3770     /* Register is on stack */
3771     emit_byte(0xd9);
3772     emit_byte(0xc9); /* Move r2 into position 2 */
3773    
3774     q=live.onstack[live.tos-1];
3775     live.onstack[live.tos]=q;
3776     live.spos[q]=live.tos;
3777     live.onstack[live.tos-1]=r2;
3778     live.spos[r2]=live.tos-1;
3779    
3780     make_tos(r); /* And r into 1 */
3781     }
3782    
3783     static __inline__ int stackpos(int r)
3784     {
3785     if (live.spos[r]<0)
3786     abort();
3787     if (live.tos<live.spos[r]) {
3788     printf("Looking for spos for fnreg %d\n",r);
3789     abort();
3790     }
3791     return live.tos-live.spos[r];
3792     }
3793    
3794     static __inline__ void usereg(int r)
3795     {
3796     if (live.spos[r]<0)
3797     make_tos(r);
3798     }
3799    
3800     /* This is called with one FP value in a reg *above* tos, which it will
3801     pop off the stack if necessary */
3802     static __inline__ void tos_make(int r)
3803     {
3804     if (live.spos[r]<0) {
3805     live.tos++;
3806     live.spos[r]=live.tos;
3807     live.onstack[live.tos]=r;
3808     return;
3809     }
3810     emit_byte(0xdd);
3811     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3812     and pop it*/
3813     }
3814    
3815    
3816     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3817     {
3818     make_tos(r);
3819     emit_byte(0xdd);
3820     emit_byte(0x15);
3821     emit_long(m);
3822     }
3823     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3824    
3825     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
3826     {
3827     make_tos(r);
3828     emit_byte(0xdd);
3829     emit_byte(0x1d);
3830     emit_long(m);
3831     live.onstack[live.tos]=-1;
3832     live.tos--;
3833     live.spos[r]=-2;
3834     }
3835     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3836    
3837     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3838     {
3839     emit_byte(0xdd);
3840     emit_byte(0x05);
3841     emit_long(m);
3842     tos_make(r);
3843     }
3844     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3845    
3846     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3847     {
3848     emit_byte(0xdb);
3849     emit_byte(0x05);
3850     emit_long(m);
3851     tos_make(r);
3852     }
3853     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3854    
3855     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
3856     {
3857     make_tos(r);
3858     emit_byte(0xdb);
3859     emit_byte(0x15);
3860     emit_long(m);
3861     }
3862     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
3863    
3864     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
3865     {
3866     emit_byte(0xd9);
3867     emit_byte(0x05);
3868     emit_long(m);
3869     tos_make(r);
3870     }
3871     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
3872    
3873     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
3874     {
3875     make_tos(r);
3876     emit_byte(0xd9);
3877     emit_byte(0x15);
3878     emit_long(m);
3879     }
3880     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
3881    
3882     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3883     {
3884     int rs;
3885    
3886     /* Stupid x87 can't write a long double to mem without popping the
3887     stack! */
3888     usereg(r);
3889     rs=stackpos(r);
3890     emit_byte(0xd9); /* Get a copy to the top of stack */
3891     emit_byte(0xc0+rs);
3892    
3893     emit_byte(0xdb); /* store and pop it */
3894     emit_byte(0x3d);
3895     emit_long(m);
3896     }
3897     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3898    
3899     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
3900     {
3901     int rs;
3902    
3903     make_tos(r);
3904     emit_byte(0xdb); /* store and pop it */
3905     emit_byte(0x3d);
3906     emit_long(m);
3907     live.onstack[live.tos]=-1;
3908     live.tos--;
3909     live.spos[r]=-2;
3910     }
3911     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3912    
3913     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
3914     {
3915     emit_byte(0xdb);
3916     emit_byte(0x2d);
3917     emit_long(m);
3918     tos_make(r);
3919     }
3920     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
3921    
3922     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
3923     {
3924     emit_byte(0xd9);
3925     emit_byte(0xeb);
3926     tos_make(r);
3927     }
3928     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
3929    
3930     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
3931     {
3932     emit_byte(0xd9);
3933     emit_byte(0xec);
3934     tos_make(r);
3935     }
3936     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
3937    
3938     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
3939     {
3940     emit_byte(0xd9);
3941     emit_byte(0xea);
3942     tos_make(r);
3943     }
3944     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
3945    
3946     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
3947     {
3948     emit_byte(0xd9);
3949     emit_byte(0xed);
3950     tos_make(r);
3951     }
3952     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
3953    
3954     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
3955     {
3956     emit_byte(0xd9);
3957     emit_byte(0xe8);
3958     tos_make(r);
3959     }
3960     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
3961    
3962     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
3963     {
3964     emit_byte(0xd9);
3965     emit_byte(0xee);
3966     tos_make(r);
3967     }
3968     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
3969    
3970     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
3971     {
3972     int ds;
3973    
3974     usereg(s);
3975     ds=stackpos(s);
3976     if (ds==0 && live.spos[d]>=0) {
3977     /* source is on top of stack, and we already have the dest */
3978     int dd=stackpos(d);
3979     emit_byte(0xdd);
3980     emit_byte(0xd0+dd);
3981     }
3982     else {
3983     emit_byte(0xd9);
3984     emit_byte(0xc0+ds); /* duplicate source on tos */
3985     tos_make(d); /* store to destination, pop if necessary */
3986     }
3987     }
3988     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
3989    
3990     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
3991     {
3992     emit_byte(0xd9);
3993     emit_byte(0xa8+index);
3994     emit_long(base);
3995     }
3996     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
3997    
3998    
3999     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4000     {
4001     int ds;
4002    
4003     if (d!=s) {
4004     usereg(s);
4005     ds=stackpos(s);
4006     emit_byte(0xd9);
4007     emit_byte(0xc0+ds); /* duplicate source */
4008     emit_byte(0xd9);
4009     emit_byte(0xfa); /* take square root */
4010     tos_make(d); /* store to destination */
4011     }
4012     else {
4013     make_tos(d);
4014     emit_byte(0xd9);
4015     emit_byte(0xfa); /* take square root */
4016     }
4017     }
4018     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4019    
4020     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4021     {
4022     int ds;
4023    
4024     if (d!=s) {
4025     usereg(s);
4026     ds=stackpos(s);
4027     emit_byte(0xd9);
4028     emit_byte(0xc0+ds); /* duplicate source */
4029     emit_byte(0xd9);
4030     emit_byte(0xe1); /* take fabs */
4031     tos_make(d); /* store to destination */
4032     }
4033     else {
4034     make_tos(d);
4035     emit_byte(0xd9);
4036     emit_byte(0xe1); /* take fabs */
4037     }
4038     }
4039     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4040    
4041     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4042     {
4043     int ds;
4044    
4045     if (d!=s) {
4046     usereg(s);
4047     ds=stackpos(s);
4048     emit_byte(0xd9);
4049     emit_byte(0xc0+ds); /* duplicate source */
4050     emit_byte(0xd9);
4051     emit_byte(0xfc); /* take frndint */
4052     tos_make(d); /* store to destination */
4053     }
4054     else {
4055     make_tos(d);
4056     emit_byte(0xd9);
4057     emit_byte(0xfc); /* take frndint */
4058     }
4059     }
4060     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4061    
4062     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4063     {
4064     int ds;
4065    
4066     if (d!=s) {
4067     usereg(s);
4068     ds=stackpos(s);
4069     emit_byte(0xd9);
4070     emit_byte(0xc0+ds); /* duplicate source */
4071     emit_byte(0xd9);
4072     emit_byte(0xff); /* take cos */
4073     tos_make(d); /* store to destination */
4074     }
4075     else {
4076     make_tos(d);
4077     emit_byte(0xd9);
4078     emit_byte(0xff); /* take cos */
4079     }
4080     }
4081     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4082    
4083     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4084     {
4085     int ds;
4086    
4087     if (d!=s) {
4088     usereg(s);
4089     ds=stackpos(s);
4090     emit_byte(0xd9);
4091     emit_byte(0xc0+ds); /* duplicate source */
4092     emit_byte(0xd9);
4093     emit_byte(0xfe); /* take sin */
4094     tos_make(d); /* store to destination */
4095     }
4096     else {
4097     make_tos(d);
4098     emit_byte(0xd9);
4099     emit_byte(0xfe); /* take sin */
4100     }
4101     }
4102     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4103    
4104     double one=1;
4105     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4106     {
4107     int ds;
4108    
4109     usereg(s);
4110     ds=stackpos(s);
4111     emit_byte(0xd9);
4112     emit_byte(0xc0+ds); /* duplicate source */
4113    
4114     emit_byte(0xd9);
4115     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4116     emit_byte(0xd9);
4117     emit_byte(0xfc); /* rndint */
4118     emit_byte(0xd9);
4119     emit_byte(0xc9); /* swap top two elements */
4120     emit_byte(0xd8);
4121     emit_byte(0xe1); /* subtract rounded from original */
4122     emit_byte(0xd9);
4123     emit_byte(0xf0); /* f2xm1 */
4124     emit_byte(0xdc);
4125     emit_byte(0x05);
4126     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
4127     emit_byte(0xd9);
4128     emit_byte(0xfd); /* and scale it */
4129     emit_byte(0xdd);
4130     emit_byte(0xd9); /* take he rounded value off */
4131     tos_make(d); /* store to destination */
4132     }
4133     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4134    
4135     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4136     {
4137     int ds;
4138    
4139     usereg(s);
4140     ds=stackpos(s);
4141     emit_byte(0xd9);
4142     emit_byte(0xc0+ds); /* duplicate source */
4143     emit_byte(0xd9);
4144     emit_byte(0xea); /* fldl2e */
4145     emit_byte(0xde);
4146     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4147    
4148     emit_byte(0xd9);
4149     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4150     emit_byte(0xd9);
4151     emit_byte(0xfc); /* rndint */
4152     emit_byte(0xd9);
4153     emit_byte(0xc9); /* swap top two elements */
4154     emit_byte(0xd8);
4155     emit_byte(0xe1); /* subtract rounded from original */
4156     emit_byte(0xd9);
4157     emit_byte(0xf0); /* f2xm1 */
4158     emit_byte(0xdc);
4159     emit_byte(0x05);
4160     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
4161     emit_byte(0xd9);
4162     emit_byte(0xfd); /* and scale it */
4163     emit_byte(0xdd);
4164     emit_byte(0xd9); /* take he rounded value off */
4165     tos_make(d); /* store to destination */
4166     }
4167     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4168    
4169     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4170     {
4171     int ds;
4172    
4173     usereg(s);
4174     ds=stackpos(s);
4175     emit_byte(0xd9);
4176     emit_byte(0xc0+ds); /* duplicate source */
4177     emit_byte(0xd9);
4178     emit_byte(0xe8); /* push '1' */
4179     emit_byte(0xd9);
4180     emit_byte(0xc9); /* swap top two */
4181     emit_byte(0xd9);
4182     emit_byte(0xf1); /* take 1*log2(x) */
4183     tos_make(d); /* store to destination */
4184     }
4185     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4186    
4187    
4188     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4189     {
4190     int ds;
4191    
4192     if (d!=s) {
4193     usereg(s);
4194     ds=stackpos(s);
4195     emit_byte(0xd9);
4196     emit_byte(0xc0+ds); /* duplicate source */
4197     emit_byte(0xd9);
4198     emit_byte(0xe0); /* take fchs */
4199     tos_make(d); /* store to destination */
4200     }
4201     else {
4202     make_tos(d);
4203     emit_byte(0xd9);
4204     emit_byte(0xe0); /* take fchs */
4205     }
4206     }
4207     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4208    
4209     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4210     {
4211     int ds;
4212    
4213     usereg(s);
4214     usereg(d);
4215    
4216     if (live.spos[s]==live.tos) {
4217     /* Source is on top of stack */
4218     ds=stackpos(d);
4219     emit_byte(0xdc);
4220     emit_byte(0xc0+ds); /* add source to dest*/
4221     }
4222     else {
4223     make_tos(d);
4224     ds=stackpos(s);
4225    
4226     emit_byte(0xd8);
4227     emit_byte(0xc0+ds); /* add source to dest*/
4228     }
4229     }
4230     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4231    
4232     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4233     {
4234     int ds;
4235    
4236     usereg(s);
4237     usereg(d);
4238    
4239     if (live.spos[s]==live.tos) {
4240     /* Source is on top of stack */
4241     ds=stackpos(d);
4242     emit_byte(0xdc);
4243     emit_byte(0xe8+ds); /* sub source from dest*/
4244     }
4245     else {
4246     make_tos(d);
4247     ds=stackpos(s);
4248    
4249     emit_byte(0xd8);
4250     emit_byte(0xe0+ds); /* sub src from dest */
4251     }
4252     }
4253     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4254    
4255     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4256     {
4257     int ds;
4258    
4259     usereg(s);
4260     usereg(d);
4261    
4262     make_tos(d);
4263     ds=stackpos(s);
4264    
4265     emit_byte(0xdd);
4266     emit_byte(0xe0+ds); /* cmp dest with source*/
4267     }
4268     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4269    
4270     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4271     {
4272     int ds;
4273    
4274     usereg(s);
4275     usereg(d);
4276    
4277     if (live.spos[s]==live.tos) {
4278     /* Source is on top of stack */
4279     ds=stackpos(d);
4280     emit_byte(0xdc);
4281     emit_byte(0xc8+ds); /* mul dest by source*/
4282     }
4283     else {
4284     make_tos(d);
4285     ds=stackpos(s);
4286    
4287     emit_byte(0xd8);
4288     emit_byte(0xc8+ds); /* mul dest by source*/
4289     }
4290     }
4291     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4292    
4293     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4294     {
4295     int ds;
4296    
4297     usereg(s);
4298     usereg(d);
4299    
4300     if (live.spos[s]==live.tos) {
4301     /* Source is on top of stack */
4302     ds=stackpos(d);
4303     emit_byte(0xdc);
4304     emit_byte(0xf8+ds); /* div dest by source */
4305     }
4306     else {
4307     make_tos(d);
4308     ds=stackpos(s);
4309    
4310     emit_byte(0xd8);
4311     emit_byte(0xf0+ds); /* div dest by source*/
4312     }
4313     }
4314     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4315    
4316     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4317     {
4318     int ds;
4319    
4320     usereg(s);
4321     usereg(d);
4322    
4323     make_tos2(d,s);
4324     ds=stackpos(s);
4325    
4326     if (ds!=1) {
4327     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4328     abort();
4329     }
4330     emit_byte(0xd9);
4331     emit_byte(0xf8); /* take rem from dest by source */
4332     }
4333     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4334    
4335     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4336     {
4337     int ds;
4338    
4339     usereg(s);
4340     usereg(d);
4341    
4342     make_tos2(d,s);
4343     ds=stackpos(s);
4344    
4345     if (ds!=1) {
4346     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4347     abort();
4348     }
4349     emit_byte(0xd9);
4350     emit_byte(0xf5); /* take rem1 from dest by source */
4351     }
4352     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4353    
4354    
4355     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4356     {
4357     make_tos(r);
4358     emit_byte(0xd9); /* ftst */
4359     emit_byte(0xe4);
4360     }
4361     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4362    
4363     /* %eax register is clobbered if target processor doesn't support fucomi */
4364     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4365     #define FFLAG_NREG EAX_INDEX
4366    
4367     static __inline__ void raw_fflags_into_flags(int r)
4368     {
4369     int p;
4370    
4371     usereg(r);
4372     p=stackpos(r);
4373    
4374     emit_byte(0xd9);
4375     emit_byte(0xee); /* Push 0 */
4376     emit_byte(0xd9);
4377     emit_byte(0xc9+p); /* swap top two around */
4378     if (have_cmov) {
4379     // gb-- fucomi is for P6 cores only, not K6-2 then...
4380     emit_byte(0xdb);
4381     emit_byte(0xe9+p); /* fucomi them */
4382     }
4383     else {
4384     emit_byte(0xdd);
4385     emit_byte(0xe1+p); /* fucom them */
4386     emit_byte(0x9b);
4387     emit_byte(0xdf);
4388     emit_byte(0xe0); /* fstsw ax */
4389     raw_sahf(0); /* sahf */
4390     }
4391     emit_byte(0xdd);
4392     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4393     }