ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.18
Committed: 2003-06-03T09:01:03Z (21 years, 3 months ago) by gbeauche
Branch: MAIN
Changes since 1.17: +2 -2 lines
Log Message:
Call correct PUSHF/POPF macro

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6     * Adaptation for Basilisk II and improvements, copyright 2000-2002
7     * Gwenole Beauchesne
8     *
9     * Basilisk II (C) 1997-2002 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45    
46     /* The register in which subroutines return an integer return value */
47     #define REG_RESULT 0
48    
49     /* The registers subroutines take their first and second argument in */
50     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
51     /* Handle the _fastcall parameters of ECX and EDX */
52     #define REG_PAR1 1
53     #define REG_PAR2 2
54     #else
55     #define REG_PAR1 0
56     #define REG_PAR2 2
57     #endif
58    
59     /* Three registers that are not used for any of the above */
60     #define REG_NOPAR1 6
61     #define REG_NOPAR2 5
62     #define REG_NOPAR3 3
63    
64     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     #define REG_PC_TMP 0
67     #else
68     #define REG_PC_TMP 1 /* Another register that is not the above */
69     #endif
70    
71     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
72     -1 if any reg will do */
73     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
74     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
75    
76     uae_s8 always_used[]={4,-1};
77     uae_s8 can_byte[]={0,1,2,3,-1};
78     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
79    
80 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
81     /* Make sure interpretive core does not use cpuopti */
82     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
83     #else
84 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
85     by the caller */
86     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
87 gbeauche 1.17 #endif
88 gbeauche 1.1
89     /* This *should* be the same as call_saved. But:
90     - We might not really know which registers are saved, and which aren't,
91     so we need to preserve some, but don't want to rely on everyone else
92     also saving those registers
93     - Special registers (such like the stack pointer) should not be "preserved"
94     by pushing, even though they are "saved" across function calls
95     */
96     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
97    
98     /* Whether classes of instructions do or don't clobber the native flags */
99     #define CLOBBER_MOV
100     #define CLOBBER_LEA
101     #define CLOBBER_CMOV
102     #define CLOBBER_POP
103     #define CLOBBER_PUSH
104     #define CLOBBER_SUB clobber_flags()
105     #define CLOBBER_SBB clobber_flags()
106     #define CLOBBER_CMP clobber_flags()
107     #define CLOBBER_ADD clobber_flags()
108     #define CLOBBER_ADC clobber_flags()
109     #define CLOBBER_AND clobber_flags()
110     #define CLOBBER_OR clobber_flags()
111     #define CLOBBER_XOR clobber_flags()
112    
113     #define CLOBBER_ROL clobber_flags()
114     #define CLOBBER_ROR clobber_flags()
115     #define CLOBBER_SHLL clobber_flags()
116     #define CLOBBER_SHRL clobber_flags()
117     #define CLOBBER_SHRA clobber_flags()
118     #define CLOBBER_TEST clobber_flags()
119     #define CLOBBER_CL16
120     #define CLOBBER_CL8
121     #define CLOBBER_SE16
122     #define CLOBBER_SE8
123     #define CLOBBER_ZE16
124     #define CLOBBER_ZE8
125     #define CLOBBER_SW16 clobber_flags()
126     #define CLOBBER_SW32
127     #define CLOBBER_SETCC
128     #define CLOBBER_MUL clobber_flags()
129     #define CLOBBER_BT clobber_flags()
130     #define CLOBBER_BSF clobber_flags()
131    
132 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
133     #if 0
134    
135     #if defined(__x86_64__)
136     #define X86_TARGET_64BIT 1
137     #endif
138     #define X86_FLAT_REGISTERS 0
139 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
140     #define X86_OPTIMIZE_ROTSHI 1
141 gbeauche 1.13 #include "codegen_x86.h"
142    
143     #define x86_emit_byte(B) emit_byte(B)
144     #define x86_emit_word(W) emit_word(W)
145     #define x86_emit_long(L) emit_long(L)
146     #define x86_get_target() get_target()
147     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
148    
149     static void jit_fail(const char *msg, const char *file, int line, const char *function)
150     {
151     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
152     function, file, line, msg);
153     abort();
154     }
155    
156     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
157     {
158     PUSHLr(r);
159     }
160     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
161    
162     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
163     {
164     POPLr(r);
165     }
166     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
167    
168     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
169     {
170     BTLir(i, r);
171     }
172     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
173    
174     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
175     {
176     BTLrr(b, r);
177     }
178     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
179    
180     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
181     {
182     BTCLir(i, r);
183     }
184     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
185    
186     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
187     {
188     BTCLrr(b, r);
189     }
190     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
191    
192     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
193     {
194     BTRLir(i, r);
195     }
196     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
197    
198     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
199     {
200     BTRLrr(b, r);
201     }
202     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
203    
204     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
205     {
206     BTSLir(i, r);
207     }
208     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
209    
210     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
211     {
212     BTSLrr(b, r);
213     }
214     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
215    
216     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
217     {
218     SUBWir(i, d);
219     }
220     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
221    
222     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
223     {
224     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
225     }
226     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
227    
228     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
229     {
230     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
231     }
232     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
233    
234     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
235     {
236     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
237     }
238     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
239    
240     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
241     {
242     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
243     }
244     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
245    
246     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
247     {
248     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
249     }
250     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
251    
252     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
253     {
254     ROLBir(i, r);
255     }
256     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
257    
258     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
259     {
260     ROLWir(i, r);
261     }
262     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
263    
264     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
265     {
266     ROLLir(i, r);
267     }
268     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
269    
270     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
271     {
272     ROLLrr(r, d);
273     }
274     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
275    
276     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
277     {
278     ROLWrr(r, d);
279     }
280     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
281    
282     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
283     {
284     ROLBrr(r, d);
285     }
286     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
287    
288     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
289     {
290     SHLLrr(r, d);
291     }
292     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
293    
294     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
295     {
296     SHLWrr(r, d);
297     }
298     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
299    
300     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
301     {
302     SHLBrr(r, d);
303     }
304     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
305    
306     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
307     {
308     RORBir(i, r);
309     }
310     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
311    
312     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
313     {
314     RORWir(i, r);
315     }
316     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
317    
318     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
319     {
320     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
321     }
322     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
323    
324     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
325     {
326     RORLir(i, r);
327     }
328     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
329    
330     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
331     {
332     RORLrr(r, d);
333     }
334     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
335    
336     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
337     {
338     RORWrr(r, d);
339     }
340     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
341    
342     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
343     {
344     RORBrr(r, d);
345     }
346     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
347    
348     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
349     {
350     SHRLrr(r, d);
351     }
352     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
353    
354     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
355     {
356     SHRWrr(r, d);
357     }
358     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
359    
360     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
361     {
362     SHRBrr(r, d);
363     }
364     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
365    
366     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
367     {
368 gbeauche 1.14 SARLrr(r, d);
369 gbeauche 1.13 }
370     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
371    
372     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
373     {
374 gbeauche 1.14 SARWrr(r, d);
375 gbeauche 1.13 }
376     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
377    
378     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
379     {
380 gbeauche 1.14 SARBrr(r, d);
381 gbeauche 1.13 }
382     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
383    
384     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
385     {
386     SHLLir(i, r);
387     }
388     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
389    
390     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
391     {
392     SHLWir(i, r);
393     }
394     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
395    
396     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
397     {
398     SHLBir(i, r);
399     }
400     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
401    
402     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
403     {
404     SHRLir(i, r);
405     }
406     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
407    
408     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
409     {
410     SHRWir(i, r);
411     }
412     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
413    
414     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
415     {
416     SHRBir(i, r);
417     }
418     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
419    
420     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
421     {
422 gbeauche 1.14 SARLir(i, r);
423 gbeauche 1.13 }
424     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
425    
426     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
427     {
428 gbeauche 1.14 SARWir(i, r);
429 gbeauche 1.13 }
430     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
431    
432     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
433     {
434 gbeauche 1.14 SARBir(i, r);
435 gbeauche 1.13 }
436     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
437    
438     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
439     {
440     SAHF();
441     }
442     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
443    
444     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
445     {
446     CPUID();
447     }
448     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
449    
450     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
451     {
452     LAHF();
453     }
454     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
455    
456     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
457     {
458     SETCCir(cc, d);
459     }
460     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
461    
462     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
463     {
464     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
465     }
466     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
467    
468     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
469     {
470 gbeauche 1.15 if (have_cmov)
471     CMOVLrr(cc, s, d);
472     else { /* replacement using branch and mov */
473     #if defined(__x86_64__)
474     write_log("x86-64 implementations are bound to have CMOV!\n");
475     abort();
476     #endif
477     JCCSii(cc^1, 2);
478     MOVLrr(s, d);
479     }
480 gbeauche 1.13 }
481     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
482    
483     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
484     {
485     BSFLrr(s, d);
486     }
487     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
488    
489     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
490     {
491     MOVSWLrr(s, d);
492     }
493     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
494    
495     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
496     {
497     MOVSBLrr(s, d);
498     }
499     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
500    
501     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
502     {
503     MOVZWLrr(s, d);
504     }
505     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
506    
507     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
508     {
509     MOVZBLrr(s, d);
510     }
511     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
512    
513     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
514     {
515 gbeauche 1.14 IMULLrr(s, d);
516 gbeauche 1.13 }
517     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
518    
519     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
520     {
521 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
522     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
523 gbeauche 1.13 abort();
524 gbeauche 1.14 }
525     IMULLr(s);
526 gbeauche 1.13 }
527     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
528    
529     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
530     {
531 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
532     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
533 gbeauche 1.13 abort();
534 gbeauche 1.14 }
535     MULLr(s);
536 gbeauche 1.13 }
537     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
538    
539     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
540     {
541 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
542 gbeauche 1.13 }
543     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
544    
545     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
546     {
547     MOVBrr(s, d);
548     }
549     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
550    
551     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
552     {
553     MOVWrr(s, d);
554     }
555     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
556    
557     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
558     {
559     MOVLmr(0, baser, index, factor, d);
560     }
561     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
562    
563     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
564     {
565     MOVWmr(0, baser, index, factor, d);
566     }
567     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
568    
569     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
570     {
571     MOVBmr(0, baser, index, factor, d);
572     }
573     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
574    
575     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
576     {
577     MOVLrm(s, 0, baser, index, factor);
578     }
579     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
580    
581     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
582     {
583     MOVWrm(s, 0, baser, index, factor);
584     }
585     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
586    
587     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
588     {
589     MOVBrm(s, 0, baser, index, factor);
590     }
591     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
592    
593     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
594     {
595     MOVLrm(s, base, baser, index, factor);
596     }
597     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
598    
599     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
600     {
601     MOVWrm(s, base, baser, index, factor);
602     }
603     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
604    
605     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
606     {
607     MOVBrm(s, base, baser, index, factor);
608     }
609     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
610    
611     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
612     {
613     MOVLmr(base, baser, index, factor, d);
614     }
615     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
616    
617     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
618     {
619     MOVWmr(base, baser, index, factor, d);
620     }
621     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
622    
623     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
624     {
625     MOVBmr(base, baser, index, factor, d);
626     }
627     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
628    
629     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
630     {
631     MOVLmr(base, X86_NOREG, index, factor, d);
632     }
633     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
634    
635     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
636     {
637 gbeauche 1.15 if (have_cmov)
638     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
639     else { /* replacement using branch and mov */
640     #if defined(__x86_64__)
641     write_log("x86-64 implementations are bound to have CMOV!\n");
642     abort();
643     #endif
644     JCCSii(cond^1, 7);
645     MOVLmr(base, X86_NOREG, index, factor, d);
646     }
647 gbeauche 1.13 }
648     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
649    
650     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
651     {
652 gbeauche 1.15 if (have_cmov)
653     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
654     else { /* replacement using branch and mov */
655     #if defined(__x86_64__)
656     write_log("x86-64 implementations are bound to have CMOV!\n");
657     abort();
658     #endif
659     JCCSii(cond^1, 6);
660     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
661     }
662 gbeauche 1.13 }
663     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
664    
665     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
666     {
667     MOVLmr(offset, s, X86_NOREG, 1, d);
668     }
669     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
670    
671     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
672     {
673     MOVWmr(offset, s, X86_NOREG, 1, d);
674     }
675     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
676    
677     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
678     {
679     MOVBmr(offset, s, X86_NOREG, 1, d);
680     }
681     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
682    
683     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
684     {
685     MOVLmr(offset, s, X86_NOREG, 1, d);
686     }
687     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
688    
689     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
690     {
691     MOVWmr(offset, s, X86_NOREG, 1, d);
692     }
693     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
694    
695     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
696     {
697     MOVBmr(offset, s, X86_NOREG, 1, d);
698     }
699     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
700    
701     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
702     {
703     MOVLim(i, offset, d, X86_NOREG, 1);
704     }
705     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
706    
707     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
708     {
709     MOVWim(i, offset, d, X86_NOREG, 1);
710     }
711     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
712    
713     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
714     {
715     MOVBim(i, offset, d, X86_NOREG, 1);
716     }
717     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
718    
719     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
720     {
721     MOVLrm(s, offset, d, X86_NOREG, 1);
722     }
723     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
724    
725     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
726     {
727     MOVWrm(s, offset, d, X86_NOREG, 1);
728     }
729     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
730    
731     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
732     {
733     MOVBrm(s, offset, d, X86_NOREG, 1);
734     }
735     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
736    
737     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
738     {
739     LEALmr(offset, s, X86_NOREG, 1, d);
740     }
741     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
742    
743     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
744     {
745     LEALmr(offset, s, index, factor, d);
746     }
747     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
748    
749     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
750     {
751     LEALmr(0, s, index, factor, d);
752     }
753     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
754    
755     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
756     {
757     MOVLrm(s, offset, d, X86_NOREG, 1);
758     }
759     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
760    
761     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
762     {
763     MOVWrm(s, offset, d, X86_NOREG, 1);
764     }
765     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
766    
767     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
768     {
769     MOVBrm(s, offset, d, X86_NOREG, 1);
770     }
771     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
772    
773     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
774     {
775     BSWAPLr(r);
776     }
777     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
778    
779     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
780     {
781     ROLWir(8, r);
782     }
783     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
784    
785     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
786     {
787     MOVLrr(s, d);
788     }
789     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
790    
791     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
792     {
793     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
794     }
795     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
796    
797     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
798     {
799     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
800     }
801     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
802    
803     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
804     {
805     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
806     }
807     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
808    
809     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
810     {
811     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
812     }
813     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
814    
815     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
816     {
817     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
818     }
819     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
820    
821     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
822     {
823     MOVLir(s, d);
824     }
825     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
826    
827     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
828     {
829     MOVWir(s, d);
830     }
831     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
832    
833     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
834     {
835     MOVBir(s, d);
836     }
837     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
838    
839     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
840     {
841     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
842     }
843     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
844    
845     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
846     {
847     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
848     }
849     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
850    
851     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
852     {
853     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
854     }
855     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
856    
857     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
858     {
859     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
860     }
861     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
862    
863     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
864     {
865     TESTLir(i, d);
866     }
867     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
868    
869     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
870     {
871     TESTLrr(s, d);
872     }
873     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
874    
875     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
876     {
877     TESTWrr(s, d);
878     }
879     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
880    
881     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
882     {
883     TESTBrr(s, d);
884     }
885     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
886    
887     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
888     {
889     ANDLir(i, d);
890     }
891     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
892    
893     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
894     {
895     ANDWir(i, d);
896     }
897     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
898    
899     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
900     {
901     ANDLrr(s, d);
902     }
903     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
904    
905     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
906     {
907     ANDWrr(s, d);
908     }
909     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
910    
911     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
912     {
913     ANDBrr(s, d);
914     }
915     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
916    
917     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
918     {
919     ORLir(i, d);
920     }
921     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
922    
923     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
924     {
925     ORLrr(s, d);
926     }
927     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
928    
929     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
930     {
931     ORWrr(s, d);
932     }
933     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
934    
935     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
936     {
937     ORBrr(s, d);
938     }
939     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
940    
941     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
942     {
943     ADCLrr(s, d);
944     }
945     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
946    
947     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
948     {
949     ADCWrr(s, d);
950     }
951     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
952    
953     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
954     {
955     ADCBrr(s, d);
956     }
957     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
958    
959     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
960     {
961     ADDLrr(s, d);
962     }
963     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
964    
965     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
966     {
967     ADDWrr(s, d);
968     }
969     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
970    
971     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
972     {
973     ADDBrr(s, d);
974     }
975     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
976    
977     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
978     {
979     SUBLir(i, d);
980     }
981     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
982    
983     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
984     {
985     SUBBir(i, d);
986     }
987     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
988    
989     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
990     {
991     ADDLir(i, d);
992     }
993     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
994    
995     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
996     {
997     ADDWir(i, d);
998     }
999     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1000    
1001     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1002     {
1003     ADDBir(i, d);
1004     }
1005     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1006    
1007     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1008     {
1009     SBBLrr(s, d);
1010     }
1011     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1012    
1013     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1014     {
1015     SBBWrr(s, d);
1016     }
1017     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1018    
1019     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1020     {
1021     SBBBrr(s, d);
1022     }
1023     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1024    
1025     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1026     {
1027     SUBLrr(s, d);
1028     }
1029     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1030    
1031     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1032     {
1033     SUBWrr(s, d);
1034     }
1035     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1036    
1037     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1038     {
1039     SUBBrr(s, d);
1040     }
1041     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1042    
1043     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1044     {
1045     CMPLrr(s, d);
1046     }
1047     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1048    
1049     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1050     {
1051     CMPLir(i, r);
1052     }
1053     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1054    
1055     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1056     {
1057     CMPWrr(s, d);
1058     }
1059     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1060    
1061     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1062     {
1063     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1064     }
1065     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1066    
1067     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1068     {
1069     CMPBir(i, d);
1070     }
1071     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1072    
1073     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1074     {
1075     CMPBrr(s, d);
1076     }
1077     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1078    
1079     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1080     {
1081     CMPLmr(offset, X86_NOREG, index, factor, d);
1082     }
1083     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1084    
1085     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1086     {
1087     XORLrr(s, d);
1088     }
1089     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1090    
1091     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1092     {
1093     XORWrr(s, d);
1094     }
1095     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1096    
1097     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1098     {
1099     XORBrr(s, d);
1100     }
1101     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1102    
1103     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1104     {
1105     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1106     }
1107     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1108    
1109     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1110     {
1111     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1112     }
1113     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1114    
1115     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1116     {
1117     XCHGLrr(r2, r1);
1118     }
1119     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1120    
1121     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1122     {
1123 gbeauche 1.18 PUSHF();
1124 gbeauche 1.13 }
1125     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1126    
1127     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1128     {
1129 gbeauche 1.18 POPF();
1130 gbeauche 1.13 }
1131     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1132    
1133     #else
1134    
1135 gbeauche 1.2 const bool optimize_accum = true;
1136 gbeauche 1.1 const bool optimize_imm8 = true;
1137     const bool optimize_shift_once = true;
1138    
1139     /*************************************************************************
1140     * Actual encoding of the instructions on the target CPU *
1141     *************************************************************************/
1142    
1143 gbeauche 1.2 static __inline__ int isaccum(int r)
1144     {
1145     return (r == EAX_INDEX);
1146     }
1147    
1148 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1149     {
1150     return (x>=-128 && x<=127);
1151     }
1152    
1153     static __inline__ int isword(uae_s32 x)
1154     {
1155     return (x>=-32768 && x<=32767);
1156     }
1157    
1158     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1159     {
1160     emit_byte(0x50+r);
1161     }
1162     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1163    
1164     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1165     {
1166     emit_byte(0x58+r);
1167     }
1168     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1169    
1170     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1171     {
1172     emit_byte(0x0f);
1173     emit_byte(0xba);
1174     emit_byte(0xe0+r);
1175     emit_byte(i);
1176     }
1177     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1178    
1179     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1180     {
1181     emit_byte(0x0f);
1182     emit_byte(0xa3);
1183     emit_byte(0xc0+8*b+r);
1184     }
1185     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1186    
1187     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1188     {
1189     emit_byte(0x0f);
1190     emit_byte(0xba);
1191     emit_byte(0xf8+r);
1192     emit_byte(i);
1193     }
1194     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1195    
1196     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1197     {
1198     emit_byte(0x0f);
1199     emit_byte(0xbb);
1200     emit_byte(0xc0+8*b+r);
1201     }
1202     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1203    
1204    
1205     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1206     {
1207     emit_byte(0x0f);
1208     emit_byte(0xba);
1209     emit_byte(0xf0+r);
1210     emit_byte(i);
1211     }
1212     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1213    
1214     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1215     {
1216     emit_byte(0x0f);
1217     emit_byte(0xb3);
1218     emit_byte(0xc0+8*b+r);
1219     }
1220     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1221    
1222     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1223     {
1224     emit_byte(0x0f);
1225     emit_byte(0xba);
1226     emit_byte(0xe8+r);
1227     emit_byte(i);
1228     }
1229     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1230    
1231     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1232     {
1233     emit_byte(0x0f);
1234     emit_byte(0xab);
1235     emit_byte(0xc0+8*b+r);
1236     }
1237     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1238    
1239     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1240     {
1241     emit_byte(0x66);
1242     if (isbyte(i)) {
1243     emit_byte(0x83);
1244     emit_byte(0xe8+d);
1245     emit_byte(i);
1246     }
1247     else {
1248 gbeauche 1.2 if (optimize_accum && isaccum(d))
1249     emit_byte(0x2d);
1250     else {
1251 gbeauche 1.1 emit_byte(0x81);
1252     emit_byte(0xe8+d);
1253 gbeauche 1.2 }
1254 gbeauche 1.1 emit_word(i);
1255     }
1256     }
1257     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1258    
1259    
1260     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1261     {
1262     emit_byte(0x8b);
1263     emit_byte(0x05+8*d);
1264     emit_long(s);
1265     }
1266     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1267    
1268     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1269     {
1270     emit_byte(0xc7);
1271     emit_byte(0x05);
1272     emit_long(d);
1273     emit_long(s);
1274     }
1275     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1276    
1277     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1278     {
1279     emit_byte(0x66);
1280     emit_byte(0xc7);
1281     emit_byte(0x05);
1282     emit_long(d);
1283     emit_word(s);
1284     }
1285     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1286    
1287     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1288     {
1289     emit_byte(0xc6);
1290     emit_byte(0x05);
1291     emit_long(d);
1292     emit_byte(s);
1293     }
1294     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1295    
1296     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1297     {
1298     if (optimize_shift_once && (i == 1)) {
1299     emit_byte(0xd0);
1300     emit_byte(0x05);
1301     emit_long(d);
1302     }
1303     else {
1304     emit_byte(0xc0);
1305     emit_byte(0x05);
1306     emit_long(d);
1307     emit_byte(i);
1308     }
1309     }
1310     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1311    
1312     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1313     {
1314     if (optimize_shift_once && (i == 1)) {
1315     emit_byte(0xd0);
1316     emit_byte(0xc0+r);
1317     }
1318     else {
1319     emit_byte(0xc0);
1320     emit_byte(0xc0+r);
1321     emit_byte(i);
1322     }
1323     }
1324     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1325    
1326     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1327     {
1328     emit_byte(0x66);
1329     emit_byte(0xc1);
1330     emit_byte(0xc0+r);
1331     emit_byte(i);
1332     }
1333     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1334    
1335     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1336     {
1337     if (optimize_shift_once && (i == 1)) {
1338     emit_byte(0xd1);
1339     emit_byte(0xc0+r);
1340     }
1341     else {
1342     emit_byte(0xc1);
1343     emit_byte(0xc0+r);
1344     emit_byte(i);
1345     }
1346     }
1347     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1348    
1349     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1350     {
1351     emit_byte(0xd3);
1352     emit_byte(0xc0+d);
1353     }
1354     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1355    
1356     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1357     {
1358     emit_byte(0x66);
1359     emit_byte(0xd3);
1360     emit_byte(0xc0+d);
1361     }
1362     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1363    
1364     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1365     {
1366     emit_byte(0xd2);
1367     emit_byte(0xc0+d);
1368     }
1369     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1370    
1371     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1372     {
1373     emit_byte(0xd3);
1374     emit_byte(0xe0+d);
1375     }
1376     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1377    
1378     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1379     {
1380     emit_byte(0x66);
1381     emit_byte(0xd3);
1382     emit_byte(0xe0+d);
1383     }
1384     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1385    
1386     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1387     {
1388     emit_byte(0xd2);
1389     emit_byte(0xe0+d);
1390     }
1391     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1392    
1393     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1394     {
1395     if (optimize_shift_once && (i == 1)) {
1396     emit_byte(0xd0);
1397     emit_byte(0xc8+r);
1398     }
1399     else {
1400     emit_byte(0xc0);
1401     emit_byte(0xc8+r);
1402     emit_byte(i);
1403     }
1404     }
1405     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1406    
1407     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1408     {
1409     emit_byte(0x66);
1410     emit_byte(0xc1);
1411     emit_byte(0xc8+r);
1412     emit_byte(i);
1413     }
1414     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1415    
1416     // gb-- used for making an fpcr value in compemu_fpp.cpp
1417     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1418     {
1419     emit_byte(0x0b);
1420     emit_byte(0x05+8*d);
1421     emit_long(s);
1422     }
1423     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1424    
1425     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1426     {
1427     if (optimize_shift_once && (i == 1)) {
1428     emit_byte(0xd1);
1429     emit_byte(0xc8+r);
1430     }
1431     else {
1432     emit_byte(0xc1);
1433     emit_byte(0xc8+r);
1434     emit_byte(i);
1435     }
1436     }
1437     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1438    
1439     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1440     {
1441     emit_byte(0xd3);
1442     emit_byte(0xc8+d);
1443     }
1444     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1445    
1446     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1447     {
1448     emit_byte(0x66);
1449     emit_byte(0xd3);
1450     emit_byte(0xc8+d);
1451     }
1452     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1453    
1454     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1455     {
1456     emit_byte(0xd2);
1457     emit_byte(0xc8+d);
1458     }
1459     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1460    
1461     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1462     {
1463     emit_byte(0xd3);
1464     emit_byte(0xe8+d);
1465     }
1466     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1467    
1468     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1469     {
1470     emit_byte(0x66);
1471     emit_byte(0xd3);
1472     emit_byte(0xe8+d);
1473     }
1474     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1475    
1476     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1477     {
1478     emit_byte(0xd2);
1479     emit_byte(0xe8+d);
1480     }
1481     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1482    
1483     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1484     {
1485     emit_byte(0xd3);
1486     emit_byte(0xf8+d);
1487     }
1488     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1489    
1490     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1491     {
1492     emit_byte(0x66);
1493     emit_byte(0xd3);
1494     emit_byte(0xf8+d);
1495     }
1496     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1497    
1498     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1499     {
1500     emit_byte(0xd2);
1501     emit_byte(0xf8+d);
1502     }
1503     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1504    
1505     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1506     {
1507     if (optimize_shift_once && (i == 1)) {
1508     emit_byte(0xd1);
1509     emit_byte(0xe0+r);
1510     }
1511     else {
1512     emit_byte(0xc1);
1513     emit_byte(0xe0+r);
1514     emit_byte(i);
1515     }
1516     }
1517     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1518    
1519     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1520     {
1521     emit_byte(0x66);
1522     emit_byte(0xc1);
1523     emit_byte(0xe0+r);
1524     emit_byte(i);
1525     }
1526     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1527    
1528     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1529     {
1530     if (optimize_shift_once && (i == 1)) {
1531     emit_byte(0xd0);
1532     emit_byte(0xe0+r);
1533     }
1534     else {
1535     emit_byte(0xc0);
1536     emit_byte(0xe0+r);
1537     emit_byte(i);
1538     }
1539     }
1540     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1541    
1542     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1543     {
1544     if (optimize_shift_once && (i == 1)) {
1545     emit_byte(0xd1);
1546     emit_byte(0xe8+r);
1547     }
1548     else {
1549     emit_byte(0xc1);
1550     emit_byte(0xe8+r);
1551     emit_byte(i);
1552     }
1553     }
1554     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1555    
1556     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1557     {
1558     emit_byte(0x66);
1559     emit_byte(0xc1);
1560     emit_byte(0xe8+r);
1561     emit_byte(i);
1562     }
1563     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1564    
1565     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1566     {
1567     if (optimize_shift_once && (i == 1)) {
1568     emit_byte(0xd0);
1569     emit_byte(0xe8+r);
1570     }
1571     else {
1572     emit_byte(0xc0);
1573     emit_byte(0xe8+r);
1574     emit_byte(i);
1575     }
1576     }
1577     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1578    
1579     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1580     {
1581     if (optimize_shift_once && (i == 1)) {
1582     emit_byte(0xd1);
1583     emit_byte(0xf8+r);
1584     }
1585     else {
1586     emit_byte(0xc1);
1587     emit_byte(0xf8+r);
1588     emit_byte(i);
1589     }
1590     }
1591     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1592    
1593     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1594     {
1595     emit_byte(0x66);
1596     emit_byte(0xc1);
1597     emit_byte(0xf8+r);
1598     emit_byte(i);
1599     }
1600     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1601    
1602     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1603     {
1604     if (optimize_shift_once && (i == 1)) {
1605     emit_byte(0xd0);
1606     emit_byte(0xf8+r);
1607     }
1608     else {
1609     emit_byte(0xc0);
1610     emit_byte(0xf8+r);
1611     emit_byte(i);
1612     }
1613     }
1614     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1615    
1616     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1617     {
1618     emit_byte(0x9e);
1619     }
1620     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1621    
1622     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1623     {
1624     emit_byte(0x0f);
1625     emit_byte(0xa2);
1626     }
1627     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1628    
1629     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1630     {
1631     emit_byte(0x9f);
1632     }
1633     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1634    
1635     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1636     {
1637     emit_byte(0x0f);
1638     emit_byte(0x90+cc);
1639     emit_byte(0xc0+d);
1640     }
1641     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1642    
1643     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1644     {
1645     emit_byte(0x0f);
1646     emit_byte(0x90+cc);
1647     emit_byte(0x05);
1648     emit_long(d);
1649     }
1650     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1651    
1652     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1653     {
1654     if (have_cmov) {
1655     emit_byte(0x0f);
1656     emit_byte(0x40+cc);
1657     emit_byte(0xc0+8*d+s);
1658     }
1659     else { /* replacement using branch and mov */
1660     int uncc=(cc^1);
1661     emit_byte(0x70+uncc);
1662     emit_byte(2); /* skip next 2 bytes if not cc=true */
1663     emit_byte(0x89);
1664     emit_byte(0xc0+8*s+d);
1665     }
1666     }
1667     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1668    
1669     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1670     {
1671     emit_byte(0x0f);
1672     emit_byte(0xbc);
1673     emit_byte(0xc0+8*d+s);
1674     }
1675     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1676    
1677     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1678     {
1679     emit_byte(0x0f);
1680     emit_byte(0xbf);
1681     emit_byte(0xc0+8*d+s);
1682     }
1683     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1684    
1685     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1686     {
1687     emit_byte(0x0f);
1688     emit_byte(0xbe);
1689     emit_byte(0xc0+8*d+s);
1690     }
1691     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1692    
1693     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1694     {
1695     emit_byte(0x0f);
1696     emit_byte(0xb7);
1697     emit_byte(0xc0+8*d+s);
1698     }
1699     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1700    
1701     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1702     {
1703     emit_byte(0x0f);
1704     emit_byte(0xb6);
1705     emit_byte(0xc0+8*d+s);
1706     }
1707     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1708    
1709     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1710     {
1711     emit_byte(0x0f);
1712     emit_byte(0xaf);
1713     emit_byte(0xc0+8*d+s);
1714     }
1715     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1716    
1717     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1718     {
1719     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1720     abort();
1721     emit_byte(0xf7);
1722     emit_byte(0xea);
1723     }
1724     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1725    
1726     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1727     {
1728     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1729     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1730     abort();
1731     }
1732     emit_byte(0xf7);
1733     emit_byte(0xe2);
1734     }
1735     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1736    
1737     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1738     {
1739     abort(); /* %^$&%^$%#^ x86! */
1740     emit_byte(0x0f);
1741     emit_byte(0xaf);
1742     emit_byte(0xc0+8*d+s);
1743     }
1744     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1745    
1746     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1747     {
1748     emit_byte(0x88);
1749     emit_byte(0xc0+8*s+d);
1750     }
1751     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1752    
1753     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1754     {
1755     emit_byte(0x66);
1756     emit_byte(0x89);
1757     emit_byte(0xc0+8*s+d);
1758     }
1759     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1760    
1761     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1762     {
1763     int isebp=(baser==5)?0x40:0;
1764     int fi;
1765    
1766     switch(factor) {
1767     case 1: fi=0; break;
1768     case 2: fi=1; break;
1769     case 4: fi=2; break;
1770     case 8: fi=3; break;
1771     default: abort();
1772     }
1773    
1774    
1775     emit_byte(0x8b);
1776     emit_byte(0x04+8*d+isebp);
1777     emit_byte(baser+8*index+0x40*fi);
1778     if (isebp)
1779     emit_byte(0x00);
1780     }
1781     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1782    
1783     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1784     {
1785     int fi;
1786     int isebp;
1787    
1788     switch(factor) {
1789     case 1: fi=0; break;
1790     case 2: fi=1; break;
1791     case 4: fi=2; break;
1792     case 8: fi=3; break;
1793     default: abort();
1794     }
1795     isebp=(baser==5)?0x40:0;
1796    
1797     emit_byte(0x66);
1798     emit_byte(0x8b);
1799     emit_byte(0x04+8*d+isebp);
1800     emit_byte(baser+8*index+0x40*fi);
1801     if (isebp)
1802     emit_byte(0x00);
1803     }
1804     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1805    
1806     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1807     {
1808     int fi;
1809     int isebp;
1810    
1811     switch(factor) {
1812     case 1: fi=0; break;
1813     case 2: fi=1; break;
1814     case 4: fi=2; break;
1815     case 8: fi=3; break;
1816     default: abort();
1817     }
1818     isebp=(baser==5)?0x40:0;
1819    
1820     emit_byte(0x8a);
1821     emit_byte(0x04+8*d+isebp);
1822     emit_byte(baser+8*index+0x40*fi);
1823     if (isebp)
1824     emit_byte(0x00);
1825     }
1826     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1827    
1828     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1829     {
1830     int fi;
1831     int isebp;
1832    
1833     switch(factor) {
1834     case 1: fi=0; break;
1835     case 2: fi=1; break;
1836     case 4: fi=2; break;
1837     case 8: fi=3; break;
1838     default: abort();
1839     }
1840    
1841    
1842     isebp=(baser==5)?0x40:0;
1843    
1844     emit_byte(0x89);
1845     emit_byte(0x04+8*s+isebp);
1846     emit_byte(baser+8*index+0x40*fi);
1847     if (isebp)
1848     emit_byte(0x00);
1849     }
1850     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1851    
1852     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1853     {
1854     int fi;
1855     int isebp;
1856    
1857     switch(factor) {
1858     case 1: fi=0; break;
1859     case 2: fi=1; break;
1860     case 4: fi=2; break;
1861     case 8: fi=3; break;
1862     default: abort();
1863     }
1864     isebp=(baser==5)?0x40:0;
1865    
1866     emit_byte(0x66);
1867     emit_byte(0x89);
1868     emit_byte(0x04+8*s+isebp);
1869     emit_byte(baser+8*index+0x40*fi);
1870     if (isebp)
1871     emit_byte(0x00);
1872     }
1873     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1874    
1875     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1876     {
1877     int fi;
1878     int isebp;
1879    
1880     switch(factor) {
1881     case 1: fi=0; break;
1882     case 2: fi=1; break;
1883     case 4: fi=2; break;
1884     case 8: fi=3; break;
1885     default: abort();
1886     }
1887     isebp=(baser==5)?0x40:0;
1888    
1889     emit_byte(0x88);
1890     emit_byte(0x04+8*s+isebp);
1891     emit_byte(baser+8*index+0x40*fi);
1892     if (isebp)
1893     emit_byte(0x00);
1894     }
1895     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1896    
1897     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1898     {
1899     int fi;
1900    
1901     switch(factor) {
1902     case 1: fi=0; break;
1903     case 2: fi=1; break;
1904     case 4: fi=2; break;
1905     case 8: fi=3; break;
1906     default: abort();
1907     }
1908    
1909     emit_byte(0x89);
1910     emit_byte(0x84+8*s);
1911     emit_byte(baser+8*index+0x40*fi);
1912     emit_long(base);
1913     }
1914     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1915    
1916     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1917     {
1918     int fi;
1919    
1920     switch(factor) {
1921     case 1: fi=0; break;
1922     case 2: fi=1; break;
1923     case 4: fi=2; break;
1924     case 8: fi=3; break;
1925     default: abort();
1926     }
1927    
1928     emit_byte(0x66);
1929     emit_byte(0x89);
1930     emit_byte(0x84+8*s);
1931     emit_byte(baser+8*index+0x40*fi);
1932     emit_long(base);
1933     }
1934     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1935    
1936     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1937     {
1938     int fi;
1939    
1940     switch(factor) {
1941     case 1: fi=0; break;
1942     case 2: fi=1; break;
1943     case 4: fi=2; break;
1944     case 8: fi=3; break;
1945     default: abort();
1946     }
1947    
1948     emit_byte(0x88);
1949     emit_byte(0x84+8*s);
1950     emit_byte(baser+8*index+0x40*fi);
1951     emit_long(base);
1952     }
1953     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1954    
1955     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
1956     {
1957     int fi;
1958    
1959     switch(factor) {
1960     case 1: fi=0; break;
1961     case 2: fi=1; break;
1962     case 4: fi=2; break;
1963     case 8: fi=3; break;
1964     default: abort();
1965     }
1966    
1967     emit_byte(0x8b);
1968     emit_byte(0x84+8*d);
1969     emit_byte(baser+8*index+0x40*fi);
1970     emit_long(base);
1971     }
1972     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
1973    
1974     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
1975     {
1976     int fi;
1977    
1978     switch(factor) {
1979     case 1: fi=0; break;
1980     case 2: fi=1; break;
1981     case 4: fi=2; break;
1982     case 8: fi=3; break;
1983     default: abort();
1984     }
1985    
1986     emit_byte(0x66);
1987     emit_byte(0x8b);
1988     emit_byte(0x84+8*d);
1989     emit_byte(baser+8*index+0x40*fi);
1990     emit_long(base);
1991     }
1992     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
1993    
1994     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
1995     {
1996     int fi;
1997    
1998     switch(factor) {
1999     case 1: fi=0; break;
2000     case 2: fi=1; break;
2001     case 4: fi=2; break;
2002     case 8: fi=3; break;
2003     default: abort();
2004     }
2005    
2006     emit_byte(0x8a);
2007     emit_byte(0x84+8*d);
2008     emit_byte(baser+8*index+0x40*fi);
2009     emit_long(base);
2010     }
2011     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2012    
2013     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2014     {
2015     int fi;
2016     switch(factor) {
2017     case 1: fi=0; break;
2018     case 2: fi=1; break;
2019     case 4: fi=2; break;
2020     case 8: fi=3; break;
2021     default:
2022     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2023     abort();
2024     }
2025     emit_byte(0x8b);
2026     emit_byte(0x04+8*d);
2027     emit_byte(0x05+8*index+64*fi);
2028     emit_long(base);
2029     }
2030     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2031    
2032     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2033     {
2034     int fi;
2035     switch(factor) {
2036     case 1: fi=0; break;
2037     case 2: fi=1; break;
2038     case 4: fi=2; break;
2039     case 8: fi=3; break;
2040     default:
2041     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2042     abort();
2043     }
2044     if (have_cmov) {
2045     emit_byte(0x0f);
2046     emit_byte(0x40+cond);
2047     emit_byte(0x04+8*d);
2048     emit_byte(0x05+8*index+64*fi);
2049     emit_long(base);
2050     }
2051     else { /* replacement using branch and mov */
2052     int uncc=(cond^1);
2053     emit_byte(0x70+uncc);
2054     emit_byte(7); /* skip next 7 bytes if not cc=true */
2055     emit_byte(0x8b);
2056     emit_byte(0x04+8*d);
2057     emit_byte(0x05+8*index+64*fi);
2058     emit_long(base);
2059     }
2060     }
2061     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2062    
2063     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2064     {
2065     if (have_cmov) {
2066     emit_byte(0x0f);
2067     emit_byte(0x40+cond);
2068     emit_byte(0x05+8*d);
2069     emit_long(mem);
2070     }
2071     else { /* replacement using branch and mov */
2072     int uncc=(cond^1);
2073     emit_byte(0x70+uncc);
2074     emit_byte(6); /* skip next 6 bytes if not cc=true */
2075     emit_byte(0x8b);
2076     emit_byte(0x05+8*d);
2077     emit_long(mem);
2078     }
2079     }
2080     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2081    
2082     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2083     {
2084 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2085 gbeauche 1.1 emit_byte(0x8b);
2086     emit_byte(0x40+8*d+s);
2087     emit_byte(offset);
2088     }
2089     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2090    
2091     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2092     {
2093 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2094 gbeauche 1.1 emit_byte(0x66);
2095     emit_byte(0x8b);
2096     emit_byte(0x40+8*d+s);
2097     emit_byte(offset);
2098     }
2099     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2100    
2101     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2102     {
2103 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2104 gbeauche 1.1 emit_byte(0x8a);
2105     emit_byte(0x40+8*d+s);
2106     emit_byte(offset);
2107     }
2108     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2109    
2110     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2111     {
2112     emit_byte(0x8b);
2113     emit_byte(0x80+8*d+s);
2114     emit_long(offset);
2115     }
2116     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2117    
2118     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2119     {
2120     emit_byte(0x66);
2121     emit_byte(0x8b);
2122     emit_byte(0x80+8*d+s);
2123     emit_long(offset);
2124     }
2125     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2126    
2127     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2128     {
2129     emit_byte(0x8a);
2130     emit_byte(0x80+8*d+s);
2131     emit_long(offset);
2132     }
2133     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2134    
2135     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2136     {
2137 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2138 gbeauche 1.1 emit_byte(0xc7);
2139     emit_byte(0x40+d);
2140     emit_byte(offset);
2141     emit_long(i);
2142     }
2143     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2144    
2145     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2146     {
2147 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2148 gbeauche 1.1 emit_byte(0x66);
2149     emit_byte(0xc7);
2150     emit_byte(0x40+d);
2151     emit_byte(offset);
2152     emit_word(i);
2153     }
2154     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2155    
2156     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2157     {
2158 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2159 gbeauche 1.1 emit_byte(0xc6);
2160     emit_byte(0x40+d);
2161     emit_byte(offset);
2162     emit_byte(i);
2163     }
2164     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2165    
2166     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2167     {
2168 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2169 gbeauche 1.1 emit_byte(0x89);
2170     emit_byte(0x40+8*s+d);
2171     emit_byte(offset);
2172     }
2173     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2174    
2175     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2176     {
2177 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2178 gbeauche 1.1 emit_byte(0x66);
2179     emit_byte(0x89);
2180     emit_byte(0x40+8*s+d);
2181     emit_byte(offset);
2182     }
2183     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2184    
2185     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2186     {
2187 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2188 gbeauche 1.1 emit_byte(0x88);
2189     emit_byte(0x40+8*s+d);
2190     emit_byte(offset);
2191     }
2192     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2193    
2194     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2195     {
2196     if (optimize_imm8 && isbyte(offset)) {
2197     emit_byte(0x8d);
2198     emit_byte(0x40+8*d+s);
2199     emit_byte(offset);
2200     }
2201     else {
2202     emit_byte(0x8d);
2203     emit_byte(0x80+8*d+s);
2204     emit_long(offset);
2205     }
2206     }
2207     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2208    
2209     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2210     {
2211     int fi;
2212    
2213     switch(factor) {
2214     case 1: fi=0; break;
2215     case 2: fi=1; break;
2216     case 4: fi=2; break;
2217     case 8: fi=3; break;
2218     default: abort();
2219     }
2220    
2221     if (optimize_imm8 && isbyte(offset)) {
2222     emit_byte(0x8d);
2223     emit_byte(0x44+8*d);
2224     emit_byte(0x40*fi+8*index+s);
2225     emit_byte(offset);
2226     }
2227     else {
2228     emit_byte(0x8d);
2229     emit_byte(0x84+8*d);
2230     emit_byte(0x40*fi+8*index+s);
2231     emit_long(offset);
2232     }
2233     }
2234     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2235    
2236     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2237     {
2238     int isebp=(s==5)?0x40:0;
2239     int fi;
2240    
2241     switch(factor) {
2242     case 1: fi=0; break;
2243     case 2: fi=1; break;
2244     case 4: fi=2; break;
2245     case 8: fi=3; break;
2246     default: abort();
2247     }
2248    
2249     emit_byte(0x8d);
2250     emit_byte(0x04+8*d+isebp);
2251     emit_byte(0x40*fi+8*index+s);
2252     if (isebp)
2253     emit_byte(0);
2254     }
2255     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2256    
2257     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2258     {
2259     if (optimize_imm8 && isbyte(offset)) {
2260     emit_byte(0x89);
2261     emit_byte(0x40+8*s+d);
2262     emit_byte(offset);
2263     }
2264     else {
2265     emit_byte(0x89);
2266     emit_byte(0x80+8*s+d);
2267     emit_long(offset);
2268     }
2269     }
2270     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2271    
2272     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2273     {
2274     emit_byte(0x66);
2275     emit_byte(0x89);
2276     emit_byte(0x80+8*s+d);
2277     emit_long(offset);
2278     }
2279     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2280    
2281     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2282     {
2283     if (optimize_imm8 && isbyte(offset)) {
2284     emit_byte(0x88);
2285     emit_byte(0x40+8*s+d);
2286     emit_byte(offset);
2287     }
2288     else {
2289     emit_byte(0x88);
2290     emit_byte(0x80+8*s+d);
2291     emit_long(offset);
2292     }
2293     }
2294     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2295    
2296     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2297     {
2298     emit_byte(0x0f);
2299     emit_byte(0xc8+r);
2300     }
2301     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2302    
2303     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2304     {
2305     emit_byte(0x66);
2306     emit_byte(0xc1);
2307     emit_byte(0xc0+r);
2308     emit_byte(0x08);
2309     }
2310     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2311    
2312     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2313     {
2314     emit_byte(0x89);
2315     emit_byte(0xc0+8*s+d);
2316     }
2317     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2318    
2319     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2320     {
2321     emit_byte(0x89);
2322     emit_byte(0x05+8*s);
2323     emit_long(d);
2324     }
2325     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2326    
2327     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2328     {
2329     emit_byte(0x66);
2330     emit_byte(0x89);
2331     emit_byte(0x05+8*s);
2332     emit_long(d);
2333     }
2334     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2335    
2336     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2337     {
2338     emit_byte(0x66);
2339     emit_byte(0x8b);
2340     emit_byte(0x05+8*d);
2341     emit_long(s);
2342     }
2343     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2344    
2345     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2346     {
2347     emit_byte(0x88);
2348     emit_byte(0x05+8*s);
2349     emit_long(d);
2350     }
2351     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2352    
2353     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2354     {
2355     emit_byte(0x8a);
2356     emit_byte(0x05+8*d);
2357     emit_long(s);
2358     }
2359     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2360    
2361     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2362     {
2363     emit_byte(0xb8+d);
2364     emit_long(s);
2365     }
2366     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2367    
2368     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2369     {
2370     emit_byte(0x66);
2371     emit_byte(0xb8+d);
2372     emit_word(s);
2373     }
2374     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2375    
2376     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2377     {
2378     emit_byte(0xb0+d);
2379     emit_byte(s);
2380     }
2381     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2382    
2383     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2384     {
2385     emit_byte(0x81);
2386     emit_byte(0x15);
2387     emit_long(d);
2388     emit_long(s);
2389     }
2390     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2391    
2392     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2393     {
2394     if (optimize_imm8 && isbyte(s)) {
2395     emit_byte(0x83);
2396     emit_byte(0x05);
2397     emit_long(d);
2398     emit_byte(s);
2399     }
2400     else {
2401     emit_byte(0x81);
2402     emit_byte(0x05);
2403     emit_long(d);
2404     emit_long(s);
2405     }
2406     }
2407     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2408    
2409     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2410     {
2411     emit_byte(0x66);
2412     emit_byte(0x81);
2413     emit_byte(0x05);
2414     emit_long(d);
2415     emit_word(s);
2416     }
2417     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2418    
2419     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2420     {
2421     emit_byte(0x80);
2422     emit_byte(0x05);
2423     emit_long(d);
2424     emit_byte(s);
2425     }
2426     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2427    
2428     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2429     {
2430 gbeauche 1.2 if (optimize_accum && isaccum(d))
2431     emit_byte(0xa9);
2432     else {
2433 gbeauche 1.1 emit_byte(0xf7);
2434     emit_byte(0xc0+d);
2435 gbeauche 1.2 }
2436 gbeauche 1.1 emit_long(i);
2437     }
2438     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2439    
2440     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2441     {
2442     emit_byte(0x85);
2443     emit_byte(0xc0+8*s+d);
2444     }
2445     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2446    
2447     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2448     {
2449     emit_byte(0x66);
2450     emit_byte(0x85);
2451     emit_byte(0xc0+8*s+d);
2452     }
2453     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2454    
2455     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2456     {
2457     emit_byte(0x84);
2458     emit_byte(0xc0+8*s+d);
2459     }
2460     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2461    
2462     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2463     {
2464     if (optimize_imm8 && isbyte(i)) {
2465 gbeauche 1.2 emit_byte(0x83);
2466     emit_byte(0xe0+d);
2467     emit_byte(i);
2468 gbeauche 1.1 }
2469     else {
2470 gbeauche 1.2 if (optimize_accum && isaccum(d))
2471     emit_byte(0x25);
2472     else {
2473     emit_byte(0x81);
2474     emit_byte(0xe0+d);
2475     }
2476     emit_long(i);
2477 gbeauche 1.1 }
2478     }
2479     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2480    
2481     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2482     {
2483 gbeauche 1.2 emit_byte(0x66);
2484     if (optimize_imm8 && isbyte(i)) {
2485     emit_byte(0x83);
2486     emit_byte(0xe0+d);
2487     emit_byte(i);
2488     }
2489     else {
2490     if (optimize_accum && isaccum(d))
2491     emit_byte(0x25);
2492     else {
2493     emit_byte(0x81);
2494     emit_byte(0xe0+d);
2495     }
2496     emit_word(i);
2497     }
2498 gbeauche 1.1 }
2499     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2500    
2501     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2502     {
2503     emit_byte(0x21);
2504     emit_byte(0xc0+8*s+d);
2505     }
2506     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2507    
2508     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2509     {
2510     emit_byte(0x66);
2511     emit_byte(0x21);
2512     emit_byte(0xc0+8*s+d);
2513     }
2514     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2515    
2516     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2517     {
2518     emit_byte(0x20);
2519     emit_byte(0xc0+8*s+d);
2520     }
2521     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2522    
2523     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2524     {
2525     if (optimize_imm8 && isbyte(i)) {
2526     emit_byte(0x83);
2527     emit_byte(0xc8+d);
2528     emit_byte(i);
2529     }
2530     else {
2531 gbeauche 1.2 if (optimize_accum && isaccum(d))
2532     emit_byte(0x0d);
2533     else {
2534 gbeauche 1.1 emit_byte(0x81);
2535     emit_byte(0xc8+d);
2536 gbeauche 1.2 }
2537 gbeauche 1.1 emit_long(i);
2538     }
2539     }
2540     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2541    
2542     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2543     {
2544     emit_byte(0x09);
2545     emit_byte(0xc0+8*s+d);
2546     }
2547     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2548    
2549     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2550     {
2551     emit_byte(0x66);
2552     emit_byte(0x09);
2553     emit_byte(0xc0+8*s+d);
2554     }
2555     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2556    
2557     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2558     {
2559     emit_byte(0x08);
2560     emit_byte(0xc0+8*s+d);
2561     }
2562     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2563    
2564     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2565     {
2566     emit_byte(0x11);
2567     emit_byte(0xc0+8*s+d);
2568     }
2569     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2570    
2571     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2572     {
2573     emit_byte(0x66);
2574     emit_byte(0x11);
2575     emit_byte(0xc0+8*s+d);
2576     }
2577     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2578    
2579     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2580     {
2581     emit_byte(0x10);
2582     emit_byte(0xc0+8*s+d);
2583     }
2584     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2585    
2586     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2587     {
2588     emit_byte(0x01);
2589     emit_byte(0xc0+8*s+d);
2590     }
2591     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2592    
2593     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2594     {
2595     emit_byte(0x66);
2596     emit_byte(0x01);
2597     emit_byte(0xc0+8*s+d);
2598     }
2599     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2600    
2601     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2602     {
2603     emit_byte(0x00);
2604     emit_byte(0xc0+8*s+d);
2605     }
2606     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2607    
2608     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2609     {
2610     if (isbyte(i)) {
2611     emit_byte(0x83);
2612     emit_byte(0xe8+d);
2613     emit_byte(i);
2614     }
2615     else {
2616 gbeauche 1.2 if (optimize_accum && isaccum(d))
2617     emit_byte(0x2d);
2618     else {
2619 gbeauche 1.1 emit_byte(0x81);
2620     emit_byte(0xe8+d);
2621 gbeauche 1.2 }
2622 gbeauche 1.1 emit_long(i);
2623     }
2624     }
2625     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2626    
2627     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2628     {
2629 gbeauche 1.2 if (optimize_accum && isaccum(d))
2630     emit_byte(0x2c);
2631     else {
2632 gbeauche 1.1 emit_byte(0x80);
2633     emit_byte(0xe8+d);
2634 gbeauche 1.2 }
2635 gbeauche 1.1 emit_byte(i);
2636     }
2637     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2638    
2639     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2640     {
2641     if (isbyte(i)) {
2642     emit_byte(0x83);
2643     emit_byte(0xc0+d);
2644     emit_byte(i);
2645     }
2646     else {
2647 gbeauche 1.2 if (optimize_accum && isaccum(d))
2648     emit_byte(0x05);
2649     else {
2650 gbeauche 1.1 emit_byte(0x81);
2651     emit_byte(0xc0+d);
2652 gbeauche 1.2 }
2653 gbeauche 1.1 emit_long(i);
2654     }
2655     }
2656     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2657    
2658     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2659     {
2660 gbeauche 1.2 emit_byte(0x66);
2661 gbeauche 1.1 if (isbyte(i)) {
2662     emit_byte(0x83);
2663     emit_byte(0xc0+d);
2664     emit_byte(i);
2665     }
2666     else {
2667 gbeauche 1.2 if (optimize_accum && isaccum(d))
2668     emit_byte(0x05);
2669     else {
2670 gbeauche 1.1 emit_byte(0x81);
2671     emit_byte(0xc0+d);
2672 gbeauche 1.2 }
2673 gbeauche 1.1 emit_word(i);
2674     }
2675     }
2676     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2677    
2678     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2679     {
2680 gbeauche 1.2 if (optimize_accum && isaccum(d))
2681     emit_byte(0x04);
2682     else {
2683     emit_byte(0x80);
2684     emit_byte(0xc0+d);
2685     }
2686 gbeauche 1.1 emit_byte(i);
2687     }
2688     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2689    
2690     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2691     {
2692     emit_byte(0x19);
2693     emit_byte(0xc0+8*s+d);
2694     }
2695     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2696    
2697     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2698     {
2699     emit_byte(0x66);
2700     emit_byte(0x19);
2701     emit_byte(0xc0+8*s+d);
2702     }
2703     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2704    
2705     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2706     {
2707     emit_byte(0x18);
2708     emit_byte(0xc0+8*s+d);
2709     }
2710     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2711    
2712     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2713     {
2714     emit_byte(0x29);
2715     emit_byte(0xc0+8*s+d);
2716     }
2717     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2718    
2719     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2720     {
2721     emit_byte(0x66);
2722     emit_byte(0x29);
2723     emit_byte(0xc0+8*s+d);
2724     }
2725     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2726    
2727     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2728     {
2729     emit_byte(0x28);
2730     emit_byte(0xc0+8*s+d);
2731     }
2732     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2733    
2734     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2735     {
2736     emit_byte(0x39);
2737     emit_byte(0xc0+8*s+d);
2738     }
2739     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2740    
2741     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2742     {
2743     if (optimize_imm8 && isbyte(i)) {
2744     emit_byte(0x83);
2745     emit_byte(0xf8+r);
2746     emit_byte(i);
2747     }
2748     else {
2749 gbeauche 1.2 if (optimize_accum && isaccum(r))
2750     emit_byte(0x3d);
2751     else {
2752 gbeauche 1.1 emit_byte(0x81);
2753     emit_byte(0xf8+r);
2754 gbeauche 1.2 }
2755 gbeauche 1.1 emit_long(i);
2756     }
2757     }
2758     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2759    
2760     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2761     {
2762     emit_byte(0x66);
2763     emit_byte(0x39);
2764     emit_byte(0xc0+8*s+d);
2765     }
2766     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2767    
2768 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2769     {
2770     emit_byte(0x80);
2771     emit_byte(0x3d);
2772     emit_long(d);
2773     emit_byte(s);
2774     }
2775     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2776    
2777 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2778     {
2779 gbeauche 1.2 if (optimize_accum && isaccum(d))
2780     emit_byte(0x3c);
2781     else {
2782 gbeauche 1.1 emit_byte(0x80);
2783     emit_byte(0xf8+d);
2784 gbeauche 1.2 }
2785 gbeauche 1.1 emit_byte(i);
2786     }
2787     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2788    
2789     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2790     {
2791     emit_byte(0x38);
2792     emit_byte(0xc0+8*s+d);
2793     }
2794     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2795    
2796     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2797     {
2798     int fi;
2799    
2800     switch(factor) {
2801     case 1: fi=0; break;
2802     case 2: fi=1; break;
2803     case 4: fi=2; break;
2804     case 8: fi=3; break;
2805     default: abort();
2806     }
2807     emit_byte(0x39);
2808     emit_byte(0x04+8*d);
2809     emit_byte(5+8*index+0x40*fi);
2810     emit_long(offset);
2811     }
2812     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2813    
2814     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2815     {
2816     emit_byte(0x31);
2817     emit_byte(0xc0+8*s+d);
2818     }
2819     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2820    
2821     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2822     {
2823     emit_byte(0x66);
2824     emit_byte(0x31);
2825     emit_byte(0xc0+8*s+d);
2826     }
2827     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2828    
2829     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2830     {
2831     emit_byte(0x30);
2832     emit_byte(0xc0+8*s+d);
2833     }
2834     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2835    
2836     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2837     {
2838     if (optimize_imm8 && isbyte(s)) {
2839     emit_byte(0x83);
2840     emit_byte(0x2d);
2841     emit_long(d);
2842     emit_byte(s);
2843     }
2844     else {
2845     emit_byte(0x81);
2846     emit_byte(0x2d);
2847     emit_long(d);
2848     emit_long(s);
2849     }
2850     }
2851     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2852    
2853     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2854     {
2855     if (optimize_imm8 && isbyte(s)) {
2856     emit_byte(0x83);
2857     emit_byte(0x3d);
2858     emit_long(d);
2859     emit_byte(s);
2860     }
2861     else {
2862     emit_byte(0x81);
2863     emit_byte(0x3d);
2864     emit_long(d);
2865     emit_long(s);
2866     }
2867     }
2868     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2869    
2870     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2871     {
2872     emit_byte(0x87);
2873     emit_byte(0xc0+8*r1+r2);
2874     }
2875     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2876    
2877     /*************************************************************************
2878     * FIXME: mem access modes probably wrong *
2879     *************************************************************************/
2880    
2881     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2882     {
2883     emit_byte(0x9c);
2884     }
2885     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2886    
2887     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2888     {
2889     emit_byte(0x9d);
2890     }
2891     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2892 gbeauche 1.13
2893     #endif
2894 gbeauche 1.1
2895     /*************************************************************************
2896     * Unoptimizable stuff --- jump *
2897     *************************************************************************/
2898    
2899     static __inline__ void raw_call_r(R4 r)
2900     {
2901     emit_byte(0xff);
2902     emit_byte(0xd0+r);
2903 gbeauche 1.5 }
2904    
2905     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2906     {
2907     int mu;
2908     switch(m) {
2909     case 1: mu=0; break;
2910     case 2: mu=1; break;
2911     case 4: mu=2; break;
2912     case 8: mu=3; break;
2913     default: abort();
2914     }
2915     emit_byte(0xff);
2916     emit_byte(0x14);
2917     emit_byte(0x05+8*r+0x40*mu);
2918     emit_long(base);
2919 gbeauche 1.1 }
2920    
2921     static __inline__ void raw_jmp_r(R4 r)
2922     {
2923     emit_byte(0xff);
2924     emit_byte(0xe0+r);
2925     }
2926    
2927     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2928     {
2929     int mu;
2930     switch(m) {
2931     case 1: mu=0; break;
2932     case 2: mu=1; break;
2933     case 4: mu=2; break;
2934     case 8: mu=3; break;
2935     default: abort();
2936     }
2937     emit_byte(0xff);
2938     emit_byte(0x24);
2939     emit_byte(0x05+8*r+0x40*mu);
2940     emit_long(base);
2941     }
2942    
2943     static __inline__ void raw_jmp_m(uae_u32 base)
2944     {
2945     emit_byte(0xff);
2946     emit_byte(0x25);
2947     emit_long(base);
2948     }
2949    
2950    
2951     static __inline__ void raw_call(uae_u32 t)
2952     {
2953     emit_byte(0xe8);
2954     emit_long(t-(uae_u32)target-4);
2955     }
2956    
2957     static __inline__ void raw_jmp(uae_u32 t)
2958     {
2959     emit_byte(0xe9);
2960     emit_long(t-(uae_u32)target-4);
2961     }
2962    
2963     static __inline__ void raw_jl(uae_u32 t)
2964     {
2965     emit_byte(0x0f);
2966     emit_byte(0x8c);
2967     emit_long(t-(uae_u32)target-4);
2968     }
2969    
2970     static __inline__ void raw_jz(uae_u32 t)
2971     {
2972     emit_byte(0x0f);
2973     emit_byte(0x84);
2974     emit_long(t-(uae_u32)target-4);
2975     }
2976    
2977     static __inline__ void raw_jnz(uae_u32 t)
2978     {
2979     emit_byte(0x0f);
2980     emit_byte(0x85);
2981     emit_long(t-(uae_u32)target-4);
2982     }
2983    
2984     static __inline__ void raw_jnz_l_oponly(void)
2985     {
2986     emit_byte(0x0f);
2987     emit_byte(0x85);
2988     }
2989    
2990     static __inline__ void raw_jcc_l_oponly(int cc)
2991     {
2992     emit_byte(0x0f);
2993     emit_byte(0x80+cc);
2994     }
2995    
2996     static __inline__ void raw_jnz_b_oponly(void)
2997     {
2998     emit_byte(0x75);
2999     }
3000    
3001     static __inline__ void raw_jz_b_oponly(void)
3002     {
3003     emit_byte(0x74);
3004     }
3005    
3006     static __inline__ void raw_jcc_b_oponly(int cc)
3007     {
3008     emit_byte(0x70+cc);
3009     }
3010    
3011     static __inline__ void raw_jmp_l_oponly(void)
3012     {
3013     emit_byte(0xe9);
3014     }
3015    
3016     static __inline__ void raw_jmp_b_oponly(void)
3017     {
3018     emit_byte(0xeb);
3019     }
3020    
3021     static __inline__ void raw_ret(void)
3022     {
3023     emit_byte(0xc3);
3024     }
3025    
3026     static __inline__ void raw_nop(void)
3027     {
3028     emit_byte(0x90);
3029     }
3030    
3031 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3032     {
3033     /* Source: GNU Binutils 2.12.90.0.15 */
3034     /* Various efficient no-op patterns for aligning code labels.
3035     Note: Don't try to assemble the instructions in the comments.
3036     0L and 0w are not legal. */
3037     static const uae_u8 f32_1[] =
3038     {0x90}; /* nop */
3039     static const uae_u8 f32_2[] =
3040     {0x89,0xf6}; /* movl %esi,%esi */
3041     static const uae_u8 f32_3[] =
3042     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3043     static const uae_u8 f32_4[] =
3044     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3045     static const uae_u8 f32_5[] =
3046     {0x90, /* nop */
3047     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3048     static const uae_u8 f32_6[] =
3049     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3050     static const uae_u8 f32_7[] =
3051     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3052     static const uae_u8 f32_8[] =
3053     {0x90, /* nop */
3054     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3055     static const uae_u8 f32_9[] =
3056     {0x89,0xf6, /* movl %esi,%esi */
3057     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3058     static const uae_u8 f32_10[] =
3059     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3060     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3061     static const uae_u8 f32_11[] =
3062     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3063     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3064     static const uae_u8 f32_12[] =
3065     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3066     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3067     static const uae_u8 f32_13[] =
3068     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3069     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3070     static const uae_u8 f32_14[] =
3071     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3072     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3073     static const uae_u8 f32_15[] =
3074     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3075     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3076     static const uae_u8 f32_16[] =
3077     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3078     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3079     static const uae_u8 *const f32_patt[] = {
3080     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3081     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3082     };
3083    
3084     int nloops = nbytes / 16;
3085     while (nloops-- > 0)
3086     emit_block(f32_16, sizeof(f32_16));
3087    
3088     nbytes %= 16;
3089     if (nbytes)
3090     emit_block(f32_patt[nbytes - 1], nbytes);
3091     }
3092    
3093 gbeauche 1.1
3094     /*************************************************************************
3095     * Flag handling, to and fro UAE flag register *
3096     *************************************************************************/
3097    
3098     #ifdef SAHF_SETO_PROFITABLE
3099    
3100     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3101    
3102     static __inline__ void raw_flags_to_reg(int r)
3103     {
3104     raw_lahf(0); /* Most flags in AH */
3105     //raw_setcc(r,0); /* V flag in AL */
3106     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
3107    
3108     #if 1 /* Let's avoid those nasty partial register stalls */
3109     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
3110     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
3111     //live.state[FLAGTMP].status=CLEAN;
3112     live.state[FLAGTMP].status=INMEM;
3113     live.state[FLAGTMP].realreg=-1;
3114     /* We just "evicted" FLAGTMP. */
3115     if (live.nat[r].nholds!=1) {
3116     /* Huh? */
3117     abort();
3118     }
3119     live.nat[r].nholds=0;
3120     #endif
3121     }
3122    
3123     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3124     static __inline__ void raw_reg_to_flags(int r)
3125     {
3126     raw_cmp_b_ri(r,-127); /* set V */
3127     raw_sahf(0);
3128     }
3129    
3130     #else
3131    
3132     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3133     static __inline__ void raw_flags_to_reg(int r)
3134     {
3135     raw_pushfl();
3136     raw_pop_l_r(r);
3137     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
3138     // live.state[FLAGTMP].status=CLEAN;
3139     live.state[FLAGTMP].status=INMEM;
3140     live.state[FLAGTMP].realreg=-1;
3141     /* We just "evicted" FLAGTMP. */
3142     if (live.nat[r].nholds!=1) {
3143     /* Huh? */
3144     abort();
3145     }
3146     live.nat[r].nholds=0;
3147     }
3148    
3149     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3150     static __inline__ void raw_reg_to_flags(int r)
3151     {
3152     raw_push_l_r(r);
3153     raw_popfl();
3154     }
3155    
3156     #endif
3157    
3158     /* Apparently, there are enough instructions between flag store and
3159     flag reload to avoid the partial memory stall */
3160     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3161     {
3162     #if 1
3163     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3164     #else
3165     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3166     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
3167     #endif
3168     }
3169    
3170     /* FLAGX is byte sized, and we *do* write it at that size */
3171     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3172     {
3173     if (live.nat[target].canbyte)
3174     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
3175     else if (live.nat[target].canword)
3176     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
3177     else
3178     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3179     }
3180    
3181 gbeauche 1.11 #define NATIVE_FLAG_Z 0x40
3182     static __inline__ void raw_flags_set_zero(int f, int r, int t)
3183     {
3184     // FIXME: this is really suboptimal
3185     raw_pushfl();
3186     raw_pop_l_r(f);
3187     raw_and_l_ri(f,~NATIVE_FLAG_Z);
3188     raw_test_l_rr(r,r);
3189     raw_mov_l_ri(r,0);
3190     raw_mov_l_ri(t,NATIVE_FLAG_Z);
3191     raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
3192     raw_or_l(f,r);
3193     raw_push_l_r(f);
3194     raw_popfl();
3195     }
3196 gbeauche 1.1
3197     static __inline__ void raw_inc_sp(int off)
3198     {
3199 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
3200 gbeauche 1.1 }
3201    
3202     /*************************************************************************
3203     * Handling mistaken direct memory access *
3204     *************************************************************************/
3205    
3206     // gb-- I don't need that part for JIT Basilisk II
3207     #if defined(NATMEM_OFFSET) && 0
3208     #include <asm/sigcontext.h>
3209     #include <signal.h>
3210    
3211     #define SIG_READ 1
3212     #define SIG_WRITE 2
3213    
3214     static int in_handler=0;
3215     static uae_u8 veccode[256];
3216    
3217     static void vec(int x, struct sigcontext sc)
3218     {
3219     uae_u8* i=(uae_u8*)sc.eip;
3220     uae_u32 addr=sc.cr2;
3221     int r=-1;
3222     int size=4;
3223     int dir=-1;
3224     int len=0;
3225     int j;
3226    
3227     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3228     if (!canbang)
3229     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3230     if (in_handler)
3231     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3232    
3233     if (canbang && i>=compiled_code && i<=current_compile_p) {
3234     if (*i==0x66) {
3235     i++;
3236     size=2;
3237     len++;
3238     }
3239    
3240     switch(i[0]) {
3241     case 0x8a:
3242     if ((i[1]&0xc0)==0x80) {
3243     r=(i[1]>>3)&7;
3244     dir=SIG_READ;
3245     size=1;
3246     len+=6;
3247     break;
3248     }
3249     break;
3250     case 0x88:
3251     if ((i[1]&0xc0)==0x80) {
3252     r=(i[1]>>3)&7;
3253     dir=SIG_WRITE;
3254     size=1;
3255     len+=6;
3256     break;
3257     }
3258     break;
3259     case 0x8b:
3260     if ((i[1]&0xc0)==0x80) {
3261     r=(i[1]>>3)&7;
3262     dir=SIG_READ;
3263     len+=6;
3264     break;
3265     }
3266     if ((i[1]&0xc0)==0x40) {
3267     r=(i[1]>>3)&7;
3268     dir=SIG_READ;
3269     len+=3;
3270     break;
3271     }
3272     break;
3273     case 0x89:
3274     if ((i[1]&0xc0)==0x80) {
3275     r=(i[1]>>3)&7;
3276     dir=SIG_WRITE;
3277     len+=6;
3278     break;
3279     }
3280     if ((i[1]&0xc0)==0x40) {
3281     r=(i[1]>>3)&7;
3282     dir=SIG_WRITE;
3283     len+=3;
3284     break;
3285     }
3286     break;
3287     }
3288     }
3289    
3290     if (r!=-1) {
3291     void* pr=NULL;
3292     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3293    
3294     switch(r) {
3295     case 0: pr=&(sc.eax); break;
3296     case 1: pr=&(sc.ecx); break;
3297     case 2: pr=&(sc.edx); break;
3298     case 3: pr=&(sc.ebx); break;
3299     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3300     case 5: pr=(size>1)?
3301     (void*)(&(sc.ebp)):
3302     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3303     case 6: pr=(size>1)?
3304     (void*)(&(sc.esi)):
3305     (void*)(((uae_u8*)&(sc.edx))+1); break;
3306     case 7: pr=(size>1)?
3307     (void*)(&(sc.edi)):
3308     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3309     default: abort();
3310     }
3311     if (pr) {
3312     blockinfo* bi;
3313    
3314     if (currprefs.comp_oldsegv) {
3315     addr-=NATMEM_OFFSET;
3316    
3317     if ((addr>=0x10000000 && addr<0x40000000) ||
3318     (addr>=0x50000000)) {
3319     write_log("Suspicious address in %x SEGV handler.\n",addr);
3320     }
3321     if (dir==SIG_READ) {
3322     switch(size) {
3323     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3324     case 2: *((uae_u16*)pr)=get_word(addr); break;
3325     case 4: *((uae_u32*)pr)=get_long(addr); break;
3326     default: abort();
3327     }
3328     }
3329     else { /* write */
3330     switch(size) {
3331     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3332     case 2: put_word(addr,*((uae_u16*)pr)); break;
3333     case 4: put_long(addr,*((uae_u32*)pr)); break;
3334     default: abort();
3335     }
3336     }
3337     write_log("Handled one access!\n");
3338     fflush(stdout);
3339     segvcount++;
3340     sc.eip+=len;
3341     }
3342     else {
3343     void* tmp=target;
3344     int i;
3345     uae_u8 vecbuf[5];
3346    
3347     addr-=NATMEM_OFFSET;
3348    
3349     if ((addr>=0x10000000 && addr<0x40000000) ||
3350     (addr>=0x50000000)) {
3351     write_log("Suspicious address in %x SEGV handler.\n",addr);
3352     }
3353    
3354     target=(uae_u8*)sc.eip;
3355     for (i=0;i<5;i++)
3356     vecbuf[i]=target[i];
3357     emit_byte(0xe9);
3358     emit_long((uae_u32)veccode-(uae_u32)target-4);
3359     write_log("Create jump to %p\n",veccode);
3360    
3361     write_log("Handled one access!\n");
3362     fflush(stdout);
3363     segvcount++;
3364    
3365     target=veccode;
3366    
3367     if (dir==SIG_READ) {
3368     switch(size) {
3369     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3370     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3371     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3372     default: abort();
3373     }
3374     }
3375     else { /* write */
3376     switch(size) {
3377     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3378     case 2: put_word(addr,*((uae_u16*)pr)); break;
3379     case 4: put_long(addr,*((uae_u32*)pr)); break;
3380     default: abort();
3381     }
3382     }
3383     for (i=0;i<5;i++)
3384     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3385     raw_mov_l_mi((uae_u32)&in_handler,0);
3386     emit_byte(0xe9);
3387     emit_long(sc.eip+len-(uae_u32)target-4);
3388     in_handler=1;
3389     target=tmp;
3390     }
3391     bi=active;
3392     while (bi) {
3393     if (bi->handler &&
3394     (uae_u8*)bi->direct_handler<=i &&
3395     (uae_u8*)bi->nexthandler>i) {
3396     write_log("deleted trigger (%p<%p<%p) %p\n",
3397     bi->handler,
3398     i,
3399     bi->nexthandler,
3400     bi->pc_p);
3401     invalidate_block(bi);
3402     raise_in_cl_list(bi);
3403     set_special(0);
3404     return;
3405     }
3406     bi=bi->next;
3407     }
3408     /* Not found in the active list. Might be a rom routine that
3409     is in the dormant list */
3410     bi=dormant;
3411     while (bi) {
3412     if (bi->handler &&
3413     (uae_u8*)bi->direct_handler<=i &&
3414     (uae_u8*)bi->nexthandler>i) {
3415     write_log("deleted trigger (%p<%p<%p) %p\n",
3416     bi->handler,
3417     i,
3418     bi->nexthandler,
3419     bi->pc_p);
3420     invalidate_block(bi);
3421     raise_in_cl_list(bi);
3422     set_special(0);
3423     return;
3424     }
3425     bi=bi->next;
3426     }
3427     write_log("Huh? Could not find trigger!\n");
3428     return;
3429     }
3430     }
3431     write_log("Can't handle access!\n");
3432     for (j=0;j<10;j++) {
3433     write_log("instruction byte %2d is %02x\n",j,i[j]);
3434     }
3435     write_log("Please send the above info (starting at \"fault address\") to\n"
3436     "bmeyer@csse.monash.edu.au\n"
3437     "This shouldn't happen ;-)\n");
3438     fflush(stdout);
3439     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3440     }
3441     #endif
3442    
3443    
3444     /*************************************************************************
3445     * Checking for CPU features *
3446     *************************************************************************/
3447    
3448 gbeauche 1.3 struct cpuinfo_x86 {
3449     uae_u8 x86; // CPU family
3450     uae_u8 x86_vendor; // CPU vendor
3451     uae_u8 x86_processor; // CPU canonical processor type
3452     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3453     uae_u32 x86_hwcap;
3454     uae_u8 x86_model;
3455     uae_u8 x86_mask;
3456     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3457     char x86_vendor_id[16];
3458     };
3459     struct cpuinfo_x86 cpuinfo;
3460    
3461     enum {
3462     X86_VENDOR_INTEL = 0,
3463     X86_VENDOR_CYRIX = 1,
3464     X86_VENDOR_AMD = 2,
3465     X86_VENDOR_UMC = 3,
3466     X86_VENDOR_NEXGEN = 4,
3467     X86_VENDOR_CENTAUR = 5,
3468     X86_VENDOR_RISE = 6,
3469     X86_VENDOR_TRANSMETA = 7,
3470     X86_VENDOR_NSC = 8,
3471     X86_VENDOR_UNKNOWN = 0xff
3472     };
3473    
3474     enum {
3475     X86_PROCESSOR_I386, /* 80386 */
3476     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3477     X86_PROCESSOR_PENTIUM,
3478     X86_PROCESSOR_PENTIUMPRO,
3479     X86_PROCESSOR_K6,
3480     X86_PROCESSOR_ATHLON,
3481     X86_PROCESSOR_PENTIUM4,
3482 gbeauche 1.16 X86_PROCESSOR_K8,
3483 gbeauche 1.3 X86_PROCESSOR_max
3484     };
3485    
3486     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3487     "80386",
3488     "80486",
3489     "Pentium",
3490     "PentiumPro",
3491     "K6",
3492     "Athlon",
3493 gbeauche 1.16 "Pentium4",
3494     "K8"
3495 gbeauche 1.3 };
3496    
3497     static struct ptt {
3498     const int align_loop;
3499     const int align_loop_max_skip;
3500     const int align_jump;
3501     const int align_jump_max_skip;
3502     const int align_func;
3503     }
3504     x86_alignments[X86_PROCESSOR_max] = {
3505     { 4, 3, 4, 3, 4 },
3506     { 16, 15, 16, 15, 16 },
3507     { 16, 7, 16, 7, 16 },
3508     { 16, 15, 16, 7, 16 },
3509     { 32, 7, 32, 7, 32 },
3510 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3511 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3512     { 16, 7, 16, 7, 16 }
3513 gbeauche 1.3 };
3514 gbeauche 1.1
3515 gbeauche 1.3 static void
3516     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3517 gbeauche 1.1 {
3518 gbeauche 1.3 char *v = c->x86_vendor_id;
3519    
3520     if (!strcmp(v, "GenuineIntel"))
3521     c->x86_vendor = X86_VENDOR_INTEL;
3522     else if (!strcmp(v, "AuthenticAMD"))
3523     c->x86_vendor = X86_VENDOR_AMD;
3524     else if (!strcmp(v, "CyrixInstead"))
3525     c->x86_vendor = X86_VENDOR_CYRIX;
3526     else if (!strcmp(v, "Geode by NSC"))
3527     c->x86_vendor = X86_VENDOR_NSC;
3528     else if (!strcmp(v, "UMC UMC UMC "))
3529     c->x86_vendor = X86_VENDOR_UMC;
3530     else if (!strcmp(v, "CentaurHauls"))
3531     c->x86_vendor = X86_VENDOR_CENTAUR;
3532     else if (!strcmp(v, "NexGenDriven"))
3533     c->x86_vendor = X86_VENDOR_NEXGEN;
3534     else if (!strcmp(v, "RiseRiseRise"))
3535     c->x86_vendor = X86_VENDOR_RISE;
3536     else if (!strcmp(v, "GenuineTMx86") ||
3537     !strcmp(v, "TransmetaCPU"))
3538     c->x86_vendor = X86_VENDOR_TRANSMETA;
3539     else
3540     c->x86_vendor = X86_VENDOR_UNKNOWN;
3541     }
3542 gbeauche 1.1
3543 gbeauche 1.3 static void
3544     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3545     {
3546     static uae_u8 cpuid_space[256];
3547     uae_u8* tmp=get_target();
3548 gbeauche 1.1
3549 gbeauche 1.3 set_target(cpuid_space);
3550     raw_push_l_r(0); /* eax */
3551     raw_push_l_r(1); /* ecx */
3552     raw_push_l_r(2); /* edx */
3553     raw_push_l_r(3); /* ebx */
3554     raw_mov_l_rm(0,(uae_u32)&op);
3555     raw_cpuid(0);
3556     if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
3557     if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
3558     if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
3559     if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
3560     raw_pop_l_r(3);
3561     raw_pop_l_r(2);
3562     raw_pop_l_r(1);
3563     raw_pop_l_r(0);
3564     raw_ret();
3565     set_target(tmp);
3566 gbeauche 1.1
3567 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3568 gbeauche 1.1 }
3569    
3570 gbeauche 1.3 static void
3571     raw_init_cpu(void)
3572 gbeauche 1.1 {
3573 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3574    
3575     /* Defaults */
3576 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3577 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3578     c->cpuid_level = -1; /* CPUID not detected */
3579     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3580     c->x86_vendor_id[0] = '\0'; /* Unset */
3581     c->x86_hwcap = 0;
3582    
3583     /* Get vendor name */
3584     c->x86_vendor_id[12] = '\0';
3585     cpuid(0x00000000,
3586     (uae_u32 *)&c->cpuid_level,
3587     (uae_u32 *)&c->x86_vendor_id[0],
3588     (uae_u32 *)&c->x86_vendor_id[8],
3589     (uae_u32 *)&c->x86_vendor_id[4]);
3590     x86_get_cpu_vendor(c);
3591    
3592     /* Intel-defined flags: level 0x00000001 */
3593     c->x86_brand_id = 0;
3594     if ( c->cpuid_level >= 0x00000001 ) {
3595     uae_u32 tfms, brand_id;
3596     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3597     c->x86 = (tfms >> 8) & 15;
3598     c->x86_model = (tfms >> 4) & 15;
3599     c->x86_brand_id = brand_id & 0xff;
3600     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
3601     (c->x86 == 0xf)) {
3602     /* AMD Extended Family and Model Values */
3603     c->x86 += (tfms >> 20) & 0xff;
3604     c->x86_model += (tfms >> 12) & 0xf0;
3605     }
3606     c->x86_mask = tfms & 15;
3607     } else {
3608     /* Have CPUID level 0 only - unheard of */
3609     c->x86 = 4;
3610     }
3611    
3612 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3613     uae_u32 xlvl;
3614     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3615     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3616     if ( xlvl >= 0x80000001 ) {
3617     uae_u32 features;
3618     cpuid(0x80000001, NULL, NULL, NULL, &features);
3619     if (features & (1 << 29)) {
3620     /* Assume x86-64 if long mode is supported */
3621     c->x86_processor = X86_PROCESSOR_K8;
3622     }
3623     }
3624     }
3625    
3626 gbeauche 1.3 /* Canonicalize processor ID */
3627     switch (c->x86) {
3628     case 3:
3629     c->x86_processor = X86_PROCESSOR_I386;
3630     break;
3631     case 4:
3632     c->x86_processor = X86_PROCESSOR_I486;
3633     break;
3634     case 5:
3635     if (c->x86_vendor == X86_VENDOR_AMD)
3636     c->x86_processor = X86_PROCESSOR_K6;
3637     else
3638     c->x86_processor = X86_PROCESSOR_PENTIUM;
3639     break;
3640     case 6:
3641     if (c->x86_vendor == X86_VENDOR_AMD)
3642     c->x86_processor = X86_PROCESSOR_ATHLON;
3643     else
3644     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3645     break;
3646     case 15:
3647     if (c->x86_vendor == X86_VENDOR_INTEL) {
3648 gbeauche 1.16 /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3649 gbeauche 1.3 if (c->x86_brand_id >= 8)
3650     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3651     }
3652 gbeauche 1.16 if (c->x86_vendor == X86_VENDOR_AMD) {
3653     /* Assume an Athlon processor if family == 15 and it was not
3654     detected as an x86-64 so far */
3655     if (c->x86_processor == X86_PROCESSOR_max)
3656     c->x86_processor = X86_PROCESSOR_ATHLON;
3657     }
3658 gbeauche 1.3 break;
3659     }
3660     if (c->x86_processor == X86_PROCESSOR_max) {
3661     fprintf(stderr, "Error: unknown processor type\n");
3662     fprintf(stderr, " Family : %d\n", c->x86);
3663     fprintf(stderr, " Model : %d\n", c->x86_model);
3664     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3665 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3666 gbeauche 1.3 if (c->x86_brand_id)
3667     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3668     abort();
3669     }
3670    
3671     /* Have CMOV support? */
3672 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3673 gbeauche 1.3
3674     /* Can the host CPU suffer from partial register stalls? */
3675     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3676     #if 1
3677     /* It appears that partial register writes are a bad idea even on
3678 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3679     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3680 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3681     have_rat_stall = true;
3682 gbeauche 1.1 #endif
3683 gbeauche 1.3
3684     /* Alignments */
3685     if (tune_alignment) {
3686     align_loops = x86_alignments[c->x86_processor].align_loop;
3687     align_jumps = x86_alignments[c->x86_processor].align_jump;
3688     }
3689    
3690     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3691     c->cpuid_level, c->x86_vendor_id,
3692     x86_processor_string_table[c->x86_processor]);
3693 gbeauche 1.1 }
3694    
3695 gbeauche 1.10 static bool target_check_bsf(void)
3696     {
3697     bool mismatch = false;
3698     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3699     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3700     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3701     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3702     for (int value = -1; value <= 1; value++) {
3703     int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3704     int tmp = value;
3705     __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3706 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3707 gbeauche 1.10 int OF = (flags >> 11) & 1;
3708     int SF = (flags >> 7) & 1;
3709     int ZF = (flags >> 6) & 1;
3710     int CF = flags & 1;
3711     tmp = (value == 0);
3712     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3713     mismatch = true;
3714     }
3715     }}}}
3716     if (mismatch)
3717     write_log("Target CPU defines all flags on BSF instruction\n");
3718     return !mismatch;
3719     }
3720    
3721 gbeauche 1.1
3722     /*************************************************************************
3723     * FPU stuff *
3724     *************************************************************************/
3725    
3726    
3727     static __inline__ void raw_fp_init(void)
3728     {
3729     int i;
3730    
3731     for (i=0;i<N_FREGS;i++)
3732     live.spos[i]=-2;
3733     live.tos=-1; /* Stack is empty */
3734     }
3735    
3736     static __inline__ void raw_fp_cleanup_drop(void)
3737     {
3738     #if 0
3739     /* using FINIT instead of popping all the entries.
3740     Seems to have side effects --- there is display corruption in
3741     Quake when this is used */
3742     if (live.tos>1) {
3743     emit_byte(0x9b);
3744     emit_byte(0xdb);
3745     emit_byte(0xe3);
3746     live.tos=-1;
3747     }
3748     #endif
3749     while (live.tos>=1) {
3750     emit_byte(0xde);
3751     emit_byte(0xd9);
3752     live.tos-=2;
3753     }
3754     while (live.tos>=0) {
3755     emit_byte(0xdd);
3756     emit_byte(0xd8);
3757     live.tos--;
3758     }
3759     raw_fp_init();
3760     }
3761    
3762     static __inline__ void make_tos(int r)
3763     {
3764     int p,q;
3765    
3766     if (live.spos[r]<0) { /* Register not yet on stack */
3767     emit_byte(0xd9);
3768     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3769     live.tos++;
3770     live.spos[r]=live.tos;
3771     live.onstack[live.tos]=r;
3772     return;
3773     }
3774     /* Register is on stack */
3775     if (live.tos==live.spos[r])
3776     return;
3777     p=live.spos[r];
3778     q=live.onstack[live.tos];
3779    
3780     emit_byte(0xd9);
3781     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3782     live.onstack[live.tos]=r;
3783     live.spos[r]=live.tos;
3784     live.onstack[p]=q;
3785     live.spos[q]=p;
3786     }
3787    
3788     static __inline__ void make_tos2(int r, int r2)
3789     {
3790     int q;
3791    
3792     make_tos(r2); /* Put the reg that's supposed to end up in position2
3793     on top */
3794    
3795     if (live.spos[r]<0) { /* Register not yet on stack */
3796     make_tos(r); /* This will extend the stack */
3797     return;
3798     }
3799     /* Register is on stack */
3800     emit_byte(0xd9);
3801     emit_byte(0xc9); /* Move r2 into position 2 */
3802    
3803     q=live.onstack[live.tos-1];
3804     live.onstack[live.tos]=q;
3805     live.spos[q]=live.tos;
3806     live.onstack[live.tos-1]=r2;
3807     live.spos[r2]=live.tos-1;
3808    
3809     make_tos(r); /* And r into 1 */
3810     }
3811    
3812     static __inline__ int stackpos(int r)
3813     {
3814     if (live.spos[r]<0)
3815     abort();
3816     if (live.tos<live.spos[r]) {
3817     printf("Looking for spos for fnreg %d\n",r);
3818     abort();
3819     }
3820     return live.tos-live.spos[r];
3821     }
3822    
3823     static __inline__ void usereg(int r)
3824     {
3825     if (live.spos[r]<0)
3826     make_tos(r);
3827     }
3828    
3829     /* This is called with one FP value in a reg *above* tos, which it will
3830     pop off the stack if necessary */
3831     static __inline__ void tos_make(int r)
3832     {
3833     if (live.spos[r]<0) {
3834     live.tos++;
3835     live.spos[r]=live.tos;
3836     live.onstack[live.tos]=r;
3837     return;
3838     }
3839     emit_byte(0xdd);
3840     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3841     and pop it*/
3842     }
3843    
3844    
3845     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3846     {
3847     make_tos(r);
3848     emit_byte(0xdd);
3849     emit_byte(0x15);
3850     emit_long(m);
3851     }
3852     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3853    
3854     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
3855     {
3856     make_tos(r);
3857     emit_byte(0xdd);
3858     emit_byte(0x1d);
3859     emit_long(m);
3860     live.onstack[live.tos]=-1;
3861     live.tos--;
3862     live.spos[r]=-2;
3863     }
3864     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3865    
3866     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3867     {
3868     emit_byte(0xdd);
3869     emit_byte(0x05);
3870     emit_long(m);
3871     tos_make(r);
3872     }
3873     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3874    
3875     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3876     {
3877     emit_byte(0xdb);
3878     emit_byte(0x05);
3879     emit_long(m);
3880     tos_make(r);
3881     }
3882     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3883    
3884     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
3885     {
3886     make_tos(r);
3887     emit_byte(0xdb);
3888     emit_byte(0x15);
3889     emit_long(m);
3890     }
3891     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
3892    
3893     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
3894     {
3895     emit_byte(0xd9);
3896     emit_byte(0x05);
3897     emit_long(m);
3898     tos_make(r);
3899     }
3900     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
3901    
3902     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
3903     {
3904     make_tos(r);
3905     emit_byte(0xd9);
3906     emit_byte(0x15);
3907     emit_long(m);
3908     }
3909     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
3910    
3911     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3912     {
3913     int rs;
3914    
3915     /* Stupid x87 can't write a long double to mem without popping the
3916     stack! */
3917     usereg(r);
3918     rs=stackpos(r);
3919     emit_byte(0xd9); /* Get a copy to the top of stack */
3920     emit_byte(0xc0+rs);
3921    
3922     emit_byte(0xdb); /* store and pop it */
3923     emit_byte(0x3d);
3924     emit_long(m);
3925     }
3926     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3927    
3928     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
3929     {
3930     int rs;
3931    
3932     make_tos(r);
3933     emit_byte(0xdb); /* store and pop it */
3934     emit_byte(0x3d);
3935     emit_long(m);
3936     live.onstack[live.tos]=-1;
3937     live.tos--;
3938     live.spos[r]=-2;
3939     }
3940     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3941    
3942     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
3943     {
3944     emit_byte(0xdb);
3945     emit_byte(0x2d);
3946     emit_long(m);
3947     tos_make(r);
3948     }
3949     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
3950    
3951     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
3952     {
3953     emit_byte(0xd9);
3954     emit_byte(0xeb);
3955     tos_make(r);
3956     }
3957     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
3958    
3959     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
3960     {
3961     emit_byte(0xd9);
3962     emit_byte(0xec);
3963     tos_make(r);
3964     }
3965     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
3966    
3967     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
3968     {
3969     emit_byte(0xd9);
3970     emit_byte(0xea);
3971     tos_make(r);
3972     }
3973     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
3974    
3975     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
3976     {
3977     emit_byte(0xd9);
3978     emit_byte(0xed);
3979     tos_make(r);
3980     }
3981     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
3982    
3983     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
3984     {
3985     emit_byte(0xd9);
3986     emit_byte(0xe8);
3987     tos_make(r);
3988     }
3989     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
3990    
3991     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
3992     {
3993     emit_byte(0xd9);
3994     emit_byte(0xee);
3995     tos_make(r);
3996     }
3997     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
3998    
3999     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4000     {
4001     int ds;
4002    
4003     usereg(s);
4004     ds=stackpos(s);
4005     if (ds==0 && live.spos[d]>=0) {
4006     /* source is on top of stack, and we already have the dest */
4007     int dd=stackpos(d);
4008     emit_byte(0xdd);
4009     emit_byte(0xd0+dd);
4010     }
4011     else {
4012     emit_byte(0xd9);
4013     emit_byte(0xc0+ds); /* duplicate source on tos */
4014     tos_make(d); /* store to destination, pop if necessary */
4015     }
4016     }
4017     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4018    
4019     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4020     {
4021     emit_byte(0xd9);
4022     emit_byte(0xa8+index);
4023     emit_long(base);
4024     }
4025     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4026    
4027    
4028     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4029     {
4030     int ds;
4031    
4032     if (d!=s) {
4033     usereg(s);
4034     ds=stackpos(s);
4035     emit_byte(0xd9);
4036     emit_byte(0xc0+ds); /* duplicate source */
4037     emit_byte(0xd9);
4038     emit_byte(0xfa); /* take square root */
4039     tos_make(d); /* store to destination */
4040     }
4041     else {
4042     make_tos(d);
4043     emit_byte(0xd9);
4044     emit_byte(0xfa); /* take square root */
4045     }
4046     }
4047     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4048    
4049     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4050     {
4051     int ds;
4052    
4053     if (d!=s) {
4054     usereg(s);
4055     ds=stackpos(s);
4056     emit_byte(0xd9);
4057     emit_byte(0xc0+ds); /* duplicate source */
4058     emit_byte(0xd9);
4059     emit_byte(0xe1); /* take fabs */
4060     tos_make(d); /* store to destination */
4061     }
4062     else {
4063     make_tos(d);
4064     emit_byte(0xd9);
4065     emit_byte(0xe1); /* take fabs */
4066     }
4067     }
4068     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4069    
4070     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4071     {
4072     int ds;
4073    
4074     if (d!=s) {
4075     usereg(s);
4076     ds=stackpos(s);
4077     emit_byte(0xd9);
4078     emit_byte(0xc0+ds); /* duplicate source */
4079     emit_byte(0xd9);
4080     emit_byte(0xfc); /* take frndint */
4081     tos_make(d); /* store to destination */
4082     }
4083     else {
4084     make_tos(d);
4085     emit_byte(0xd9);
4086     emit_byte(0xfc); /* take frndint */
4087     }
4088     }
4089     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4090    
4091     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4092     {
4093     int ds;
4094    
4095     if (d!=s) {
4096     usereg(s);
4097     ds=stackpos(s);
4098     emit_byte(0xd9);
4099     emit_byte(0xc0+ds); /* duplicate source */
4100     emit_byte(0xd9);
4101     emit_byte(0xff); /* take cos */
4102     tos_make(d); /* store to destination */
4103     }
4104     else {
4105     make_tos(d);
4106     emit_byte(0xd9);
4107     emit_byte(0xff); /* take cos */
4108     }
4109     }
4110     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4111    
4112     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4113     {
4114     int ds;
4115    
4116     if (d!=s) {
4117     usereg(s);
4118     ds=stackpos(s);
4119     emit_byte(0xd9);
4120     emit_byte(0xc0+ds); /* duplicate source */
4121     emit_byte(0xd9);
4122     emit_byte(0xfe); /* take sin */
4123     tos_make(d); /* store to destination */
4124     }
4125     else {
4126     make_tos(d);
4127     emit_byte(0xd9);
4128     emit_byte(0xfe); /* take sin */
4129     }
4130     }
4131     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4132    
4133     double one=1;
4134     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4135     {
4136     int ds;
4137    
4138     usereg(s);
4139     ds=stackpos(s);
4140     emit_byte(0xd9);
4141     emit_byte(0xc0+ds); /* duplicate source */
4142    
4143     emit_byte(0xd9);
4144     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4145     emit_byte(0xd9);
4146     emit_byte(0xfc); /* rndint */
4147     emit_byte(0xd9);
4148     emit_byte(0xc9); /* swap top two elements */
4149     emit_byte(0xd8);
4150     emit_byte(0xe1); /* subtract rounded from original */
4151     emit_byte(0xd9);
4152     emit_byte(0xf0); /* f2xm1 */
4153     emit_byte(0xdc);
4154     emit_byte(0x05);
4155     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
4156     emit_byte(0xd9);
4157     emit_byte(0xfd); /* and scale it */
4158     emit_byte(0xdd);
4159     emit_byte(0xd9); /* take he rounded value off */
4160     tos_make(d); /* store to destination */
4161     }
4162     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4163    
4164     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4165     {
4166     int ds;
4167    
4168     usereg(s);
4169     ds=stackpos(s);
4170     emit_byte(0xd9);
4171     emit_byte(0xc0+ds); /* duplicate source */
4172     emit_byte(0xd9);
4173     emit_byte(0xea); /* fldl2e */
4174     emit_byte(0xde);
4175     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4176    
4177     emit_byte(0xd9);
4178     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4179     emit_byte(0xd9);
4180     emit_byte(0xfc); /* rndint */
4181     emit_byte(0xd9);
4182     emit_byte(0xc9); /* swap top two elements */
4183     emit_byte(0xd8);
4184     emit_byte(0xe1); /* subtract rounded from original */
4185     emit_byte(0xd9);
4186     emit_byte(0xf0); /* f2xm1 */
4187     emit_byte(0xdc);
4188     emit_byte(0x05);
4189     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
4190     emit_byte(0xd9);
4191     emit_byte(0xfd); /* and scale it */
4192     emit_byte(0xdd);
4193     emit_byte(0xd9); /* take he rounded value off */
4194     tos_make(d); /* store to destination */
4195     }
4196     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4197    
4198     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4199     {
4200     int ds;
4201    
4202     usereg(s);
4203     ds=stackpos(s);
4204     emit_byte(0xd9);
4205     emit_byte(0xc0+ds); /* duplicate source */
4206     emit_byte(0xd9);
4207     emit_byte(0xe8); /* push '1' */
4208     emit_byte(0xd9);
4209     emit_byte(0xc9); /* swap top two */
4210     emit_byte(0xd9);
4211     emit_byte(0xf1); /* take 1*log2(x) */
4212     tos_make(d); /* store to destination */
4213     }
4214     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4215    
4216    
4217     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4218     {
4219     int ds;
4220    
4221     if (d!=s) {
4222     usereg(s);
4223     ds=stackpos(s);
4224     emit_byte(0xd9);
4225     emit_byte(0xc0+ds); /* duplicate source */
4226     emit_byte(0xd9);
4227     emit_byte(0xe0); /* take fchs */
4228     tos_make(d); /* store to destination */
4229     }
4230     else {
4231     make_tos(d);
4232     emit_byte(0xd9);
4233     emit_byte(0xe0); /* take fchs */
4234     }
4235     }
4236     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4237    
4238     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4239     {
4240     int ds;
4241    
4242     usereg(s);
4243     usereg(d);
4244    
4245     if (live.spos[s]==live.tos) {
4246     /* Source is on top of stack */
4247     ds=stackpos(d);
4248     emit_byte(0xdc);
4249     emit_byte(0xc0+ds); /* add source to dest*/
4250     }
4251     else {
4252     make_tos(d);
4253     ds=stackpos(s);
4254    
4255     emit_byte(0xd8);
4256     emit_byte(0xc0+ds); /* add source to dest*/
4257     }
4258     }
4259     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4260    
4261     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4262     {
4263     int ds;
4264    
4265     usereg(s);
4266     usereg(d);
4267    
4268     if (live.spos[s]==live.tos) {
4269     /* Source is on top of stack */
4270     ds=stackpos(d);
4271     emit_byte(0xdc);
4272     emit_byte(0xe8+ds); /* sub source from dest*/
4273     }
4274     else {
4275     make_tos(d);
4276     ds=stackpos(s);
4277    
4278     emit_byte(0xd8);
4279     emit_byte(0xe0+ds); /* sub src from dest */
4280     }
4281     }
4282     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4283    
4284     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4285     {
4286     int ds;
4287    
4288     usereg(s);
4289     usereg(d);
4290    
4291     make_tos(d);
4292     ds=stackpos(s);
4293    
4294     emit_byte(0xdd);
4295     emit_byte(0xe0+ds); /* cmp dest with source*/
4296     }
4297     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4298    
4299     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4300     {
4301     int ds;
4302    
4303     usereg(s);
4304     usereg(d);
4305    
4306     if (live.spos[s]==live.tos) {
4307     /* Source is on top of stack */
4308     ds=stackpos(d);
4309     emit_byte(0xdc);
4310     emit_byte(0xc8+ds); /* mul dest by source*/
4311     }
4312     else {
4313     make_tos(d);
4314     ds=stackpos(s);
4315    
4316     emit_byte(0xd8);
4317     emit_byte(0xc8+ds); /* mul dest by source*/
4318     }
4319     }
4320     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4321    
4322     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4323     {
4324     int ds;
4325    
4326     usereg(s);
4327     usereg(d);
4328    
4329     if (live.spos[s]==live.tos) {
4330     /* Source is on top of stack */
4331     ds=stackpos(d);
4332     emit_byte(0xdc);
4333     emit_byte(0xf8+ds); /* div dest by source */
4334     }
4335     else {
4336     make_tos(d);
4337     ds=stackpos(s);
4338    
4339     emit_byte(0xd8);
4340     emit_byte(0xf0+ds); /* div dest by source*/
4341     }
4342     }
4343     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4344    
4345     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4346     {
4347     int ds;
4348    
4349     usereg(s);
4350     usereg(d);
4351    
4352     make_tos2(d,s);
4353     ds=stackpos(s);
4354    
4355     if (ds!=1) {
4356     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4357     abort();
4358     }
4359     emit_byte(0xd9);
4360     emit_byte(0xf8); /* take rem from dest by source */
4361     }
4362     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4363    
4364     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4365     {
4366     int ds;
4367    
4368     usereg(s);
4369     usereg(d);
4370    
4371     make_tos2(d,s);
4372     ds=stackpos(s);
4373    
4374     if (ds!=1) {
4375     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4376     abort();
4377     }
4378     emit_byte(0xd9);
4379     emit_byte(0xf5); /* take rem1 from dest by source */
4380     }
4381     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4382    
4383    
4384     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4385     {
4386     make_tos(r);
4387     emit_byte(0xd9); /* ftst */
4388     emit_byte(0xe4);
4389     }
4390     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4391    
4392     /* %eax register is clobbered if target processor doesn't support fucomi */
4393     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4394     #define FFLAG_NREG EAX_INDEX
4395    
4396     static __inline__ void raw_fflags_into_flags(int r)
4397     {
4398     int p;
4399    
4400     usereg(r);
4401     p=stackpos(r);
4402    
4403     emit_byte(0xd9);
4404     emit_byte(0xee); /* Push 0 */
4405     emit_byte(0xd9);
4406     emit_byte(0xc9+p); /* swap top two around */
4407     if (have_cmov) {
4408     // gb-- fucomi is for P6 cores only, not K6-2 then...
4409     emit_byte(0xdb);
4410     emit_byte(0xe9+p); /* fucomi them */
4411     }
4412     else {
4413     emit_byte(0xdd);
4414     emit_byte(0xe1+p); /* fucom them */
4415     emit_byte(0x9b);
4416     emit_byte(0xdf);
4417     emit_byte(0xe0); /* fstsw ax */
4418     raw_sahf(0); /* sahf */
4419     }
4420     emit_byte(0xdd);
4421     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4422     }