ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.20
Committed: 2004-11-01T16:01:51Z (19 years, 10 months ago) by gbeauche
Branch: MAIN
Changes since 1.19: +107 -42 lines
Log Message:
revive and fix almost two-year old port to x86_64

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 cebix 1.19 * Adaptation for Basilisk II and improvements, copyright 2000-2004
7 gbeauche 1.6 * Gwenole Beauchesne
8     *
9 cebix 1.19 * Basilisk II (C) 1997-2004 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45 gbeauche 1.20 #if defined(__x86_64__)
46     #define R8_INDEX 8
47     #define R9_INDEX 9
48     #define R10_INDEX 10
49     #define R11_INDEX 11
50     #define R12_INDEX 12
51     #define R13_INDEX 13
52     #define R14_INDEX 14
53     #define R15_INDEX 15
54     #endif
55 gbeauche 1.1
56     /* The register in which subroutines return an integer return value */
57 gbeauche 1.20 #define REG_RESULT EAX_INDEX
58 gbeauche 1.1
59     /* The registers subroutines take their first and second argument in */
60     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61     /* Handle the _fastcall parameters of ECX and EDX */
62 gbeauche 1.20 #define REG_PAR1 ECX_INDEX
63     #define REG_PAR2 EDX_INDEX
64     #elif defined(__x86_64__)
65     #define REG_PAR1 EDI_INDEX
66     #define REG_PAR2 ESI_INDEX
67 gbeauche 1.1 #else
68 gbeauche 1.20 #define REG_PAR1 EAX_INDEX
69     #define REG_PAR2 EDX_INDEX
70 gbeauche 1.1 #endif
71    
72 gbeauche 1.20 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 gbeauche 1.1 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 gbeauche 1.20 #define REG_PC_TMP EAX_INDEX
75 gbeauche 1.1 #else
76 gbeauche 1.20 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 gbeauche 1.1 #endif
78    
79 gbeauche 1.20 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 gbeauche 1.1 -1 if any reg will do */
81 gbeauche 1.20 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82     #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83 gbeauche 1.1
84     uae_s8 always_used[]={4,-1};
85 gbeauche 1.20 #if defined(__x86_64__)
86     uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87     uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88     #else
89 gbeauche 1.1 uae_s8 can_byte[]={0,1,2,3,-1};
90     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 gbeauche 1.20 #endif
92 gbeauche 1.1
93 gbeauche 1.17 #if USE_OPTIMIZED_CALLS
94     /* Make sure interpretive core does not use cpuopti */
95     uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 gbeauche 1.20 #error FIXME: code not ready
97 gbeauche 1.17 #else
98 gbeauche 1.1 /* cpuopti mutate instruction handlers to assume registers are saved
99     by the caller */
100 gbeauche 1.20 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 gbeauche 1.17 #endif
102 gbeauche 1.1
103     /* This *should* be the same as call_saved. But:
104     - We might not really know which registers are saved, and which aren't,
105     so we need to preserve some, but don't want to rely on everyone else
106     also saving those registers
107     - Special registers (such like the stack pointer) should not be "preserved"
108     by pushing, even though they are "saved" across function calls
109     */
110 gbeauche 1.20 uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1};
111 gbeauche 1.1
112     /* Whether classes of instructions do or don't clobber the native flags */
113     #define CLOBBER_MOV
114     #define CLOBBER_LEA
115     #define CLOBBER_CMOV
116     #define CLOBBER_POP
117     #define CLOBBER_PUSH
118     #define CLOBBER_SUB clobber_flags()
119     #define CLOBBER_SBB clobber_flags()
120     #define CLOBBER_CMP clobber_flags()
121     #define CLOBBER_ADD clobber_flags()
122     #define CLOBBER_ADC clobber_flags()
123     #define CLOBBER_AND clobber_flags()
124     #define CLOBBER_OR clobber_flags()
125     #define CLOBBER_XOR clobber_flags()
126    
127     #define CLOBBER_ROL clobber_flags()
128     #define CLOBBER_ROR clobber_flags()
129     #define CLOBBER_SHLL clobber_flags()
130     #define CLOBBER_SHRL clobber_flags()
131     #define CLOBBER_SHRA clobber_flags()
132     #define CLOBBER_TEST clobber_flags()
133     #define CLOBBER_CL16
134     #define CLOBBER_CL8
135 gbeauche 1.20 #define CLOBBER_SE32
136 gbeauche 1.1 #define CLOBBER_SE16
137     #define CLOBBER_SE8
138 gbeauche 1.20 #define CLOBBER_ZE32
139 gbeauche 1.1 #define CLOBBER_ZE16
140     #define CLOBBER_ZE8
141     #define CLOBBER_SW16 clobber_flags()
142     #define CLOBBER_SW32
143     #define CLOBBER_SETCC
144     #define CLOBBER_MUL clobber_flags()
145     #define CLOBBER_BT clobber_flags()
146     #define CLOBBER_BSF clobber_flags()
147    
148 gbeauche 1.13 /* FIXME: disabled until that's proofread. */
149 gbeauche 1.20 #if defined(__x86_64__)
150     #define USE_NEW_RTASM 1
151     #endif
152    
153     #if USE_NEW_RTASM
154 gbeauche 1.13
155     #if defined(__x86_64__)
156     #define X86_TARGET_64BIT 1
157     #endif
158     #define X86_FLAT_REGISTERS 0
159 gbeauche 1.14 #define X86_OPTIMIZE_ALU 1
160     #define X86_OPTIMIZE_ROTSHI 1
161 gbeauche 1.13 #include "codegen_x86.h"
162    
163     #define x86_emit_byte(B) emit_byte(B)
164     #define x86_emit_word(W) emit_word(W)
165     #define x86_emit_long(L) emit_long(L)
166 gbeauche 1.20 #define x86_emit_quad(Q) emit_quad(Q)
167 gbeauche 1.13 #define x86_get_target() get_target()
168     #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
169    
170     static void jit_fail(const char *msg, const char *file, int line, const char *function)
171     {
172     fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
173     function, file, line, msg);
174     abort();
175     }
176    
177     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
178     {
179 gbeauche 1.20 #if defined(__x86_64__)
180     PUSHQr(r);
181     #else
182 gbeauche 1.13 PUSHLr(r);
183 gbeauche 1.20 #endif
184 gbeauche 1.13 }
185     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
186    
187     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
188     {
189 gbeauche 1.20 #if defined(__x86_64__)
190     POPQr(r);
191     #else
192 gbeauche 1.13 POPLr(r);
193 gbeauche 1.20 #endif
194 gbeauche 1.13 }
195     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
196    
197     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
198     {
199     BTLir(i, r);
200     }
201     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
202    
203     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
204     {
205     BTLrr(b, r);
206     }
207     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
208    
209     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
210     {
211     BTCLir(i, r);
212     }
213     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
214    
215     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
216     {
217     BTCLrr(b, r);
218     }
219     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
220    
221     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
222     {
223     BTRLir(i, r);
224     }
225     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
226    
227     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
228     {
229     BTRLrr(b, r);
230     }
231     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
232    
233     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
234     {
235     BTSLir(i, r);
236     }
237     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
238    
239     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
240     {
241     BTSLrr(b, r);
242     }
243     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
244    
245     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
246     {
247     SUBWir(i, d);
248     }
249     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
250    
251     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
252     {
253     MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
254     }
255     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
256    
257     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
258     {
259     MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
260     }
261     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
262    
263     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
264     {
265     MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
266     }
267     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
268    
269     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
270     {
271     MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
272     }
273     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
274    
275     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
276     {
277     ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
278     }
279     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
280    
281     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
282     {
283     ROLBir(i, r);
284     }
285     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
286    
287     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
288     {
289     ROLWir(i, r);
290     }
291     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
292    
293     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
294     {
295     ROLLir(i, r);
296     }
297     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
298    
299     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
300     {
301     ROLLrr(r, d);
302     }
303     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
304    
305     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
306     {
307     ROLWrr(r, d);
308     }
309     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
310    
311     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
312     {
313     ROLBrr(r, d);
314     }
315     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
316    
317     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
318     {
319     SHLLrr(r, d);
320     }
321     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
322    
323     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
324     {
325     SHLWrr(r, d);
326     }
327     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
328    
329     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
330     {
331     SHLBrr(r, d);
332     }
333     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
334    
335     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
336     {
337     RORBir(i, r);
338     }
339     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
340    
341     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
342     {
343     RORWir(i, r);
344     }
345     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
346    
347     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
348     {
349     ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
350     }
351     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
352    
353     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
354     {
355     RORLir(i, r);
356     }
357     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
358    
359     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
360     {
361     RORLrr(r, d);
362     }
363     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
364    
365     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
366     {
367     RORWrr(r, d);
368     }
369     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
370    
371     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
372     {
373     RORBrr(r, d);
374     }
375     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
376    
377     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
378     {
379     SHRLrr(r, d);
380     }
381     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
382    
383     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
384     {
385     SHRWrr(r, d);
386     }
387     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
388    
389     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
390     {
391     SHRBrr(r, d);
392     }
393     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
394    
395     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
396     {
397 gbeauche 1.14 SARLrr(r, d);
398 gbeauche 1.13 }
399     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
400    
401     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
402     {
403 gbeauche 1.14 SARWrr(r, d);
404 gbeauche 1.13 }
405     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
406    
407     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
408     {
409 gbeauche 1.14 SARBrr(r, d);
410 gbeauche 1.13 }
411     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
412    
413     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
414     {
415     SHLLir(i, r);
416     }
417     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
418    
419     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
420     {
421     SHLWir(i, r);
422     }
423     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
424    
425     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
426     {
427     SHLBir(i, r);
428     }
429     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
430    
431     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
432     {
433     SHRLir(i, r);
434     }
435     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
436    
437     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
438     {
439     SHRWir(i, r);
440     }
441     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
442    
443     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
444     {
445     SHRBir(i, r);
446     }
447     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
448    
449     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
450     {
451 gbeauche 1.14 SARLir(i, r);
452 gbeauche 1.13 }
453     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
454    
455     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
456     {
457 gbeauche 1.14 SARWir(i, r);
458 gbeauche 1.13 }
459     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
460    
461     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
462     {
463 gbeauche 1.14 SARBir(i, r);
464 gbeauche 1.13 }
465     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
466    
467     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
468     {
469     SAHF();
470     }
471     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
472    
473     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
474     {
475     CPUID();
476     }
477     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
478    
479     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
480     {
481     LAHF();
482     }
483     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
484    
485     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
486     {
487     SETCCir(cc, d);
488     }
489     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
490    
491     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
492     {
493     SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
494     }
495     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
496    
497     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
498     {
499 gbeauche 1.15 if (have_cmov)
500     CMOVLrr(cc, s, d);
501     else { /* replacement using branch and mov */
502     #if defined(__x86_64__)
503     write_log("x86-64 implementations are bound to have CMOV!\n");
504     abort();
505     #endif
506     JCCSii(cc^1, 2);
507     MOVLrr(s, d);
508     }
509 gbeauche 1.13 }
510     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
511    
512     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
513     {
514     BSFLrr(s, d);
515     }
516     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
517    
518 gbeauche 1.20 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
519     {
520     MOVSLQrr(s, d);
521     }
522     LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
523    
524 gbeauche 1.13 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
525     {
526     MOVSWLrr(s, d);
527     }
528     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
529    
530     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
531     {
532     MOVSBLrr(s, d);
533     }
534     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
535    
536     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
537     {
538     MOVZWLrr(s, d);
539     }
540     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
541    
542     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
543     {
544     MOVZBLrr(s, d);
545     }
546     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
547    
548     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
549     {
550 gbeauche 1.14 IMULLrr(s, d);
551 gbeauche 1.13 }
552     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
553    
554     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
555     {
556 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
557     write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
558 gbeauche 1.13 abort();
559 gbeauche 1.14 }
560     IMULLr(s);
561 gbeauche 1.13 }
562     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
563    
564     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
565     {
566 gbeauche 1.14 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
567     write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
568 gbeauche 1.13 abort();
569 gbeauche 1.14 }
570     MULLr(s);
571 gbeauche 1.13 }
572     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
573    
574     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
575     {
576 gbeauche 1.14 abort(); /* %^$&%^$%#^ x86! */
577 gbeauche 1.13 }
578     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
579    
580     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
581     {
582     MOVBrr(s, d);
583     }
584     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
585    
586     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
587     {
588     MOVWrr(s, d);
589     }
590     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
591    
592     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
593     {
594     MOVLmr(0, baser, index, factor, d);
595     }
596     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
597    
598     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
599     {
600     MOVWmr(0, baser, index, factor, d);
601     }
602     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
603    
604     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
605     {
606     MOVBmr(0, baser, index, factor, d);
607     }
608     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
609    
610     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
611     {
612     MOVLrm(s, 0, baser, index, factor);
613     }
614     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
615    
616     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
617     {
618     MOVWrm(s, 0, baser, index, factor);
619     }
620     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
621    
622     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
623     {
624     MOVBrm(s, 0, baser, index, factor);
625     }
626     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
627    
628     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
629     {
630     MOVLrm(s, base, baser, index, factor);
631     }
632     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
633    
634     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
635     {
636     MOVWrm(s, base, baser, index, factor);
637     }
638     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
639    
640     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
641     {
642     MOVBrm(s, base, baser, index, factor);
643     }
644     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
645    
646     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
647     {
648     MOVLmr(base, baser, index, factor, d);
649     }
650     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
651    
652     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
653     {
654     MOVWmr(base, baser, index, factor, d);
655     }
656     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
657    
658     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
659     {
660     MOVBmr(base, baser, index, factor, d);
661     }
662     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
663    
664     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
665     {
666     MOVLmr(base, X86_NOREG, index, factor, d);
667     }
668     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
669    
670     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
671     {
672 gbeauche 1.15 if (have_cmov)
673     CMOVLmr(cond, base, X86_NOREG, index, factor, d);
674     else { /* replacement using branch and mov */
675     #if defined(__x86_64__)
676     write_log("x86-64 implementations are bound to have CMOV!\n");
677     abort();
678     #endif
679     JCCSii(cond^1, 7);
680     MOVLmr(base, X86_NOREG, index, factor, d);
681     }
682 gbeauche 1.13 }
683     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
684    
685     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
686     {
687 gbeauche 1.15 if (have_cmov)
688     CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
689     else { /* replacement using branch and mov */
690     #if defined(__x86_64__)
691     write_log("x86-64 implementations are bound to have CMOV!\n");
692     abort();
693     #endif
694     JCCSii(cond^1, 6);
695     MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
696     }
697 gbeauche 1.13 }
698     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
699    
700     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
701     {
702     MOVLmr(offset, s, X86_NOREG, 1, d);
703     }
704     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
705    
706     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
707     {
708     MOVWmr(offset, s, X86_NOREG, 1, d);
709     }
710     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
711    
712     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
713     {
714     MOVBmr(offset, s, X86_NOREG, 1, d);
715     }
716     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
717    
718     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
719     {
720     MOVLmr(offset, s, X86_NOREG, 1, d);
721     }
722     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
723    
724     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
725     {
726     MOVWmr(offset, s, X86_NOREG, 1, d);
727     }
728     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
729    
730     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
731     {
732     MOVBmr(offset, s, X86_NOREG, 1, d);
733     }
734     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
735    
736     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
737     {
738     MOVLim(i, offset, d, X86_NOREG, 1);
739     }
740     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
741    
742     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
743     {
744     MOVWim(i, offset, d, X86_NOREG, 1);
745     }
746     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
747    
748     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
749     {
750     MOVBim(i, offset, d, X86_NOREG, 1);
751     }
752     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
753    
754     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
755     {
756     MOVLrm(s, offset, d, X86_NOREG, 1);
757     }
758     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
759    
760     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
761     {
762     MOVWrm(s, offset, d, X86_NOREG, 1);
763     }
764     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
765    
766     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
767     {
768     MOVBrm(s, offset, d, X86_NOREG, 1);
769     }
770     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
771    
772     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
773     {
774     LEALmr(offset, s, X86_NOREG, 1, d);
775     }
776     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
777    
778     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
779     {
780     LEALmr(offset, s, index, factor, d);
781     }
782     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
783    
784     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
785     {
786     LEALmr(0, s, index, factor, d);
787     }
788     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
789    
790     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
791     {
792     MOVLrm(s, offset, d, X86_NOREG, 1);
793     }
794     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
795    
796     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
797     {
798     MOVWrm(s, offset, d, X86_NOREG, 1);
799     }
800     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
801    
802     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
803     {
804     MOVBrm(s, offset, d, X86_NOREG, 1);
805     }
806     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
807    
808     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
809     {
810     BSWAPLr(r);
811     }
812     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
813    
814     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
815     {
816     ROLWir(8, r);
817     }
818     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
819    
820     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
821     {
822     MOVLrr(s, d);
823     }
824     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
825    
826     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
827     {
828     MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
829     }
830     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
831    
832     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
833     {
834     MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
835     }
836     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
837    
838     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
839     {
840     MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
841     }
842     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
843    
844     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
845     {
846     MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
847     }
848     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
849    
850     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
851     {
852     MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
853     }
854     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
855    
856     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
857     {
858     MOVLir(s, d);
859     }
860     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
861    
862     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
863     {
864     MOVWir(s, d);
865     }
866     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
867    
868     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
869     {
870     MOVBir(s, d);
871     }
872     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
873    
874     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
875     {
876     ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
877     }
878     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
879    
880     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
881     {
882     ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
883     }
884     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
885    
886     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
887     {
888     ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
889     }
890     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
891    
892     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
893     {
894     ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
895     }
896     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
897    
898     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
899     {
900     TESTLir(i, d);
901     }
902     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
903    
904     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
905     {
906     TESTLrr(s, d);
907     }
908     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
909    
910     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
911     {
912     TESTWrr(s, d);
913     }
914     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
915    
916     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
917     {
918     TESTBrr(s, d);
919     }
920     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
921    
922     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
923     {
924     ANDLir(i, d);
925     }
926     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
927    
928     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
929     {
930     ANDWir(i, d);
931     }
932     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
933    
934     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
935     {
936     ANDLrr(s, d);
937     }
938     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
939    
940     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
941     {
942     ANDWrr(s, d);
943     }
944     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
945    
946     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
947     {
948     ANDBrr(s, d);
949     }
950     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
951    
952     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
953     {
954     ORLir(i, d);
955     }
956     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
957    
958     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
959     {
960     ORLrr(s, d);
961     }
962     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
963    
964     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
965     {
966     ORWrr(s, d);
967     }
968     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
969    
970     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
971     {
972     ORBrr(s, d);
973     }
974     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
975    
976     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
977     {
978     ADCLrr(s, d);
979     }
980     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
981    
982     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
983     {
984     ADCWrr(s, d);
985     }
986     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
987    
988     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
989     {
990     ADCBrr(s, d);
991     }
992     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
993    
994     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
995     {
996     ADDLrr(s, d);
997     }
998     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
999    
1000     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1001     {
1002     ADDWrr(s, d);
1003     }
1004     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1005    
1006     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1007     {
1008     ADDBrr(s, d);
1009     }
1010     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1011    
1012     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1013     {
1014     SUBLir(i, d);
1015     }
1016     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1017    
1018     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1019     {
1020     SUBBir(i, d);
1021     }
1022     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1023    
1024     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1025     {
1026     ADDLir(i, d);
1027     }
1028     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1029    
1030     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1031     {
1032     ADDWir(i, d);
1033     }
1034     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1035    
1036     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1037     {
1038     ADDBir(i, d);
1039     }
1040     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1041    
1042     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1043     {
1044     SBBLrr(s, d);
1045     }
1046     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1047    
1048     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1049     {
1050     SBBWrr(s, d);
1051     }
1052     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1053    
1054     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1055     {
1056     SBBBrr(s, d);
1057     }
1058     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1059    
1060     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1061     {
1062     SUBLrr(s, d);
1063     }
1064     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1065    
1066     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1067     {
1068     SUBWrr(s, d);
1069     }
1070     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1071    
1072     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1073     {
1074     SUBBrr(s, d);
1075     }
1076     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1077    
1078     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1079     {
1080     CMPLrr(s, d);
1081     }
1082     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1083    
1084     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1085     {
1086     CMPLir(i, r);
1087     }
1088     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1089    
1090     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1091     {
1092     CMPWrr(s, d);
1093     }
1094     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1095    
1096     LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1097     {
1098     CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1099     }
1100     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1101    
1102     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1103     {
1104     CMPBir(i, d);
1105     }
1106     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1107    
1108     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1109     {
1110     CMPBrr(s, d);
1111     }
1112     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1113    
1114     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1115     {
1116     CMPLmr(offset, X86_NOREG, index, factor, d);
1117     }
1118     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1119    
1120     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1121     {
1122     XORLrr(s, d);
1123     }
1124     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1125    
1126     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1127     {
1128     XORWrr(s, d);
1129     }
1130     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1131    
1132     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1133     {
1134     XORBrr(s, d);
1135     }
1136     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1137    
1138     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1139     {
1140     SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1141     }
1142     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1143    
1144     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1145     {
1146     CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1147     }
1148     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1149    
1150     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1151     {
1152     XCHGLrr(r2, r1);
1153     }
1154     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1155    
1156     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1157     {
1158 gbeauche 1.18 PUSHF();
1159 gbeauche 1.13 }
1160     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1161    
1162     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1163     {
1164 gbeauche 1.18 POPF();
1165 gbeauche 1.13 }
1166     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1167    
1168     #else
1169    
1170 gbeauche 1.2 const bool optimize_accum = true;
1171 gbeauche 1.1 const bool optimize_imm8 = true;
1172     const bool optimize_shift_once = true;
1173    
1174     /*************************************************************************
1175     * Actual encoding of the instructions on the target CPU *
1176     *************************************************************************/
1177    
1178 gbeauche 1.2 static __inline__ int isaccum(int r)
1179     {
1180     return (r == EAX_INDEX);
1181     }
1182    
1183 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
1184     {
1185     return (x>=-128 && x<=127);
1186     }
1187    
1188     static __inline__ int isword(uae_s32 x)
1189     {
1190     return (x>=-32768 && x<=32767);
1191     }
1192    
1193     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1194     {
1195     emit_byte(0x50+r);
1196     }
1197     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1198    
1199     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1200     {
1201     emit_byte(0x58+r);
1202     }
1203     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1204    
1205     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1206     {
1207     emit_byte(0x0f);
1208     emit_byte(0xba);
1209     emit_byte(0xe0+r);
1210     emit_byte(i);
1211     }
1212     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1213    
1214     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1215     {
1216     emit_byte(0x0f);
1217     emit_byte(0xa3);
1218     emit_byte(0xc0+8*b+r);
1219     }
1220     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1221    
1222     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1223     {
1224     emit_byte(0x0f);
1225     emit_byte(0xba);
1226     emit_byte(0xf8+r);
1227     emit_byte(i);
1228     }
1229     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1230    
1231     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1232     {
1233     emit_byte(0x0f);
1234     emit_byte(0xbb);
1235     emit_byte(0xc0+8*b+r);
1236     }
1237     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1238    
1239    
1240     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1241     {
1242     emit_byte(0x0f);
1243     emit_byte(0xba);
1244     emit_byte(0xf0+r);
1245     emit_byte(i);
1246     }
1247     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1248    
1249     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1250     {
1251     emit_byte(0x0f);
1252     emit_byte(0xb3);
1253     emit_byte(0xc0+8*b+r);
1254     }
1255     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1256    
1257     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1258     {
1259     emit_byte(0x0f);
1260     emit_byte(0xba);
1261     emit_byte(0xe8+r);
1262     emit_byte(i);
1263     }
1264     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1265    
1266     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1267     {
1268     emit_byte(0x0f);
1269     emit_byte(0xab);
1270     emit_byte(0xc0+8*b+r);
1271     }
1272     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1273    
1274     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1275     {
1276     emit_byte(0x66);
1277     if (isbyte(i)) {
1278     emit_byte(0x83);
1279     emit_byte(0xe8+d);
1280     emit_byte(i);
1281     }
1282     else {
1283 gbeauche 1.2 if (optimize_accum && isaccum(d))
1284     emit_byte(0x2d);
1285     else {
1286 gbeauche 1.1 emit_byte(0x81);
1287     emit_byte(0xe8+d);
1288 gbeauche 1.2 }
1289 gbeauche 1.1 emit_word(i);
1290     }
1291     }
1292     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1293    
1294    
1295     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1296     {
1297     emit_byte(0x8b);
1298     emit_byte(0x05+8*d);
1299     emit_long(s);
1300     }
1301     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1302    
1303     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1304     {
1305     emit_byte(0xc7);
1306     emit_byte(0x05);
1307     emit_long(d);
1308     emit_long(s);
1309     }
1310     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1311    
1312     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1313     {
1314     emit_byte(0x66);
1315     emit_byte(0xc7);
1316     emit_byte(0x05);
1317     emit_long(d);
1318     emit_word(s);
1319     }
1320     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1321    
1322     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1323     {
1324     emit_byte(0xc6);
1325     emit_byte(0x05);
1326     emit_long(d);
1327     emit_byte(s);
1328     }
1329     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1330    
1331     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1332     {
1333     if (optimize_shift_once && (i == 1)) {
1334     emit_byte(0xd0);
1335     emit_byte(0x05);
1336     emit_long(d);
1337     }
1338     else {
1339     emit_byte(0xc0);
1340     emit_byte(0x05);
1341     emit_long(d);
1342     emit_byte(i);
1343     }
1344     }
1345     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1346    
1347     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1348     {
1349     if (optimize_shift_once && (i == 1)) {
1350     emit_byte(0xd0);
1351     emit_byte(0xc0+r);
1352     }
1353     else {
1354     emit_byte(0xc0);
1355     emit_byte(0xc0+r);
1356     emit_byte(i);
1357     }
1358     }
1359     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1360    
1361     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1362     {
1363     emit_byte(0x66);
1364     emit_byte(0xc1);
1365     emit_byte(0xc0+r);
1366     emit_byte(i);
1367     }
1368     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1369    
1370     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1371     {
1372     if (optimize_shift_once && (i == 1)) {
1373     emit_byte(0xd1);
1374     emit_byte(0xc0+r);
1375     }
1376     else {
1377     emit_byte(0xc1);
1378     emit_byte(0xc0+r);
1379     emit_byte(i);
1380     }
1381     }
1382     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1383    
1384     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1385     {
1386     emit_byte(0xd3);
1387     emit_byte(0xc0+d);
1388     }
1389     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1390    
1391     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1392     {
1393     emit_byte(0x66);
1394     emit_byte(0xd3);
1395     emit_byte(0xc0+d);
1396     }
1397     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1398    
1399     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1400     {
1401     emit_byte(0xd2);
1402     emit_byte(0xc0+d);
1403     }
1404     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1405    
1406     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1407     {
1408     emit_byte(0xd3);
1409     emit_byte(0xe0+d);
1410     }
1411     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1412    
1413     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1414     {
1415     emit_byte(0x66);
1416     emit_byte(0xd3);
1417     emit_byte(0xe0+d);
1418     }
1419     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1420    
1421     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1422     {
1423     emit_byte(0xd2);
1424     emit_byte(0xe0+d);
1425     }
1426     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1427    
1428     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1429     {
1430     if (optimize_shift_once && (i == 1)) {
1431     emit_byte(0xd0);
1432     emit_byte(0xc8+r);
1433     }
1434     else {
1435     emit_byte(0xc0);
1436     emit_byte(0xc8+r);
1437     emit_byte(i);
1438     }
1439     }
1440     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1441    
1442     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1443     {
1444     emit_byte(0x66);
1445     emit_byte(0xc1);
1446     emit_byte(0xc8+r);
1447     emit_byte(i);
1448     }
1449     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1450    
1451     // gb-- used for making an fpcr value in compemu_fpp.cpp
1452     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1453     {
1454     emit_byte(0x0b);
1455     emit_byte(0x05+8*d);
1456     emit_long(s);
1457     }
1458     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1459    
1460     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1461     {
1462     if (optimize_shift_once && (i == 1)) {
1463     emit_byte(0xd1);
1464     emit_byte(0xc8+r);
1465     }
1466     else {
1467     emit_byte(0xc1);
1468     emit_byte(0xc8+r);
1469     emit_byte(i);
1470     }
1471     }
1472     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1473    
1474     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1475     {
1476     emit_byte(0xd3);
1477     emit_byte(0xc8+d);
1478     }
1479     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1480    
1481     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1482     {
1483     emit_byte(0x66);
1484     emit_byte(0xd3);
1485     emit_byte(0xc8+d);
1486     }
1487     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1488    
1489     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1490     {
1491     emit_byte(0xd2);
1492     emit_byte(0xc8+d);
1493     }
1494     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1495    
1496     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1497     {
1498     emit_byte(0xd3);
1499     emit_byte(0xe8+d);
1500     }
1501     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1502    
1503     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1504     {
1505     emit_byte(0x66);
1506     emit_byte(0xd3);
1507     emit_byte(0xe8+d);
1508     }
1509     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1510    
1511     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1512     {
1513     emit_byte(0xd2);
1514     emit_byte(0xe8+d);
1515     }
1516     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1517    
1518     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1519     {
1520     emit_byte(0xd3);
1521     emit_byte(0xf8+d);
1522     }
1523     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1524    
1525     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1526     {
1527     emit_byte(0x66);
1528     emit_byte(0xd3);
1529     emit_byte(0xf8+d);
1530     }
1531     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1532    
1533     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1534     {
1535     emit_byte(0xd2);
1536     emit_byte(0xf8+d);
1537     }
1538     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1539    
1540     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1541     {
1542     if (optimize_shift_once && (i == 1)) {
1543     emit_byte(0xd1);
1544     emit_byte(0xe0+r);
1545     }
1546     else {
1547     emit_byte(0xc1);
1548     emit_byte(0xe0+r);
1549     emit_byte(i);
1550     }
1551     }
1552     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1553    
1554     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1555     {
1556     emit_byte(0x66);
1557     emit_byte(0xc1);
1558     emit_byte(0xe0+r);
1559     emit_byte(i);
1560     }
1561     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1562    
1563     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1564     {
1565     if (optimize_shift_once && (i == 1)) {
1566     emit_byte(0xd0);
1567     emit_byte(0xe0+r);
1568     }
1569     else {
1570     emit_byte(0xc0);
1571     emit_byte(0xe0+r);
1572     emit_byte(i);
1573     }
1574     }
1575     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1576    
1577     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1578     {
1579     if (optimize_shift_once && (i == 1)) {
1580     emit_byte(0xd1);
1581     emit_byte(0xe8+r);
1582     }
1583     else {
1584     emit_byte(0xc1);
1585     emit_byte(0xe8+r);
1586     emit_byte(i);
1587     }
1588     }
1589     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1590    
1591     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1592     {
1593     emit_byte(0x66);
1594     emit_byte(0xc1);
1595     emit_byte(0xe8+r);
1596     emit_byte(i);
1597     }
1598     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1599    
1600     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1601     {
1602     if (optimize_shift_once && (i == 1)) {
1603     emit_byte(0xd0);
1604     emit_byte(0xe8+r);
1605     }
1606     else {
1607     emit_byte(0xc0);
1608     emit_byte(0xe8+r);
1609     emit_byte(i);
1610     }
1611     }
1612     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1613    
1614     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1615     {
1616     if (optimize_shift_once && (i == 1)) {
1617     emit_byte(0xd1);
1618     emit_byte(0xf8+r);
1619     }
1620     else {
1621     emit_byte(0xc1);
1622     emit_byte(0xf8+r);
1623     emit_byte(i);
1624     }
1625     }
1626     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1627    
1628     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1629     {
1630     emit_byte(0x66);
1631     emit_byte(0xc1);
1632     emit_byte(0xf8+r);
1633     emit_byte(i);
1634     }
1635     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1636    
1637     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1638     {
1639     if (optimize_shift_once && (i == 1)) {
1640     emit_byte(0xd0);
1641     emit_byte(0xf8+r);
1642     }
1643     else {
1644     emit_byte(0xc0);
1645     emit_byte(0xf8+r);
1646     emit_byte(i);
1647     }
1648     }
1649     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1650    
1651     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1652     {
1653     emit_byte(0x9e);
1654     }
1655     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1656    
1657     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1658     {
1659     emit_byte(0x0f);
1660     emit_byte(0xa2);
1661     }
1662     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1663    
1664     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1665     {
1666     emit_byte(0x9f);
1667     }
1668     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1669    
1670     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1671     {
1672     emit_byte(0x0f);
1673     emit_byte(0x90+cc);
1674     emit_byte(0xc0+d);
1675     }
1676     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1677    
1678     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1679     {
1680     emit_byte(0x0f);
1681     emit_byte(0x90+cc);
1682     emit_byte(0x05);
1683     emit_long(d);
1684     }
1685     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1686    
1687     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1688     {
1689     if (have_cmov) {
1690     emit_byte(0x0f);
1691     emit_byte(0x40+cc);
1692     emit_byte(0xc0+8*d+s);
1693     }
1694     else { /* replacement using branch and mov */
1695     int uncc=(cc^1);
1696     emit_byte(0x70+uncc);
1697     emit_byte(2); /* skip next 2 bytes if not cc=true */
1698     emit_byte(0x89);
1699     emit_byte(0xc0+8*s+d);
1700     }
1701     }
1702     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1703    
1704     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1705     {
1706     emit_byte(0x0f);
1707     emit_byte(0xbc);
1708     emit_byte(0xc0+8*d+s);
1709     }
1710     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1711    
1712     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1713     {
1714     emit_byte(0x0f);
1715     emit_byte(0xbf);
1716     emit_byte(0xc0+8*d+s);
1717     }
1718     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1719    
1720     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1721     {
1722     emit_byte(0x0f);
1723     emit_byte(0xbe);
1724     emit_byte(0xc0+8*d+s);
1725     }
1726     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1727    
1728     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1729     {
1730     emit_byte(0x0f);
1731     emit_byte(0xb7);
1732     emit_byte(0xc0+8*d+s);
1733     }
1734     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1735    
1736     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1737     {
1738     emit_byte(0x0f);
1739     emit_byte(0xb6);
1740     emit_byte(0xc0+8*d+s);
1741     }
1742     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1743    
1744     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1745     {
1746     emit_byte(0x0f);
1747     emit_byte(0xaf);
1748     emit_byte(0xc0+8*d+s);
1749     }
1750     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1751    
1752     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1753     {
1754     if (d!=MUL_NREG1 || s!=MUL_NREG2)
1755     abort();
1756     emit_byte(0xf7);
1757     emit_byte(0xea);
1758     }
1759     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1760    
1761     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1762     {
1763     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1764     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1765     abort();
1766     }
1767     emit_byte(0xf7);
1768     emit_byte(0xe2);
1769     }
1770     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1771    
1772     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1773     {
1774     abort(); /* %^$&%^$%#^ x86! */
1775     emit_byte(0x0f);
1776     emit_byte(0xaf);
1777     emit_byte(0xc0+8*d+s);
1778     }
1779     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1780    
1781     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1782     {
1783     emit_byte(0x88);
1784     emit_byte(0xc0+8*s+d);
1785     }
1786     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1787    
1788     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1789     {
1790     emit_byte(0x66);
1791     emit_byte(0x89);
1792     emit_byte(0xc0+8*s+d);
1793     }
1794     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1795    
1796     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1797     {
1798     int isebp=(baser==5)?0x40:0;
1799     int fi;
1800    
1801     switch(factor) {
1802     case 1: fi=0; break;
1803     case 2: fi=1; break;
1804     case 4: fi=2; break;
1805     case 8: fi=3; break;
1806     default: abort();
1807     }
1808    
1809    
1810     emit_byte(0x8b);
1811     emit_byte(0x04+8*d+isebp);
1812     emit_byte(baser+8*index+0x40*fi);
1813     if (isebp)
1814     emit_byte(0x00);
1815     }
1816     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1817    
1818     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1819     {
1820     int fi;
1821     int isebp;
1822    
1823     switch(factor) {
1824     case 1: fi=0; break;
1825     case 2: fi=1; break;
1826     case 4: fi=2; break;
1827     case 8: fi=3; break;
1828     default: abort();
1829     }
1830     isebp=(baser==5)?0x40:0;
1831    
1832     emit_byte(0x66);
1833     emit_byte(0x8b);
1834     emit_byte(0x04+8*d+isebp);
1835     emit_byte(baser+8*index+0x40*fi);
1836     if (isebp)
1837     emit_byte(0x00);
1838     }
1839     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1840    
1841     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1842     {
1843     int fi;
1844     int isebp;
1845    
1846     switch(factor) {
1847     case 1: fi=0; break;
1848     case 2: fi=1; break;
1849     case 4: fi=2; break;
1850     case 8: fi=3; break;
1851     default: abort();
1852     }
1853     isebp=(baser==5)?0x40:0;
1854    
1855     emit_byte(0x8a);
1856     emit_byte(0x04+8*d+isebp);
1857     emit_byte(baser+8*index+0x40*fi);
1858     if (isebp)
1859     emit_byte(0x00);
1860     }
1861     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1862    
1863     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1864     {
1865     int fi;
1866     int isebp;
1867    
1868     switch(factor) {
1869     case 1: fi=0; break;
1870     case 2: fi=1; break;
1871     case 4: fi=2; break;
1872     case 8: fi=3; break;
1873     default: abort();
1874     }
1875    
1876    
1877     isebp=(baser==5)?0x40:0;
1878    
1879     emit_byte(0x89);
1880     emit_byte(0x04+8*s+isebp);
1881     emit_byte(baser+8*index+0x40*fi);
1882     if (isebp)
1883     emit_byte(0x00);
1884     }
1885     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1886    
1887     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1888     {
1889     int fi;
1890     int isebp;
1891    
1892     switch(factor) {
1893     case 1: fi=0; break;
1894     case 2: fi=1; break;
1895     case 4: fi=2; break;
1896     case 8: fi=3; break;
1897     default: abort();
1898     }
1899     isebp=(baser==5)?0x40:0;
1900    
1901     emit_byte(0x66);
1902     emit_byte(0x89);
1903     emit_byte(0x04+8*s+isebp);
1904     emit_byte(baser+8*index+0x40*fi);
1905     if (isebp)
1906     emit_byte(0x00);
1907     }
1908     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1909    
1910     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1911     {
1912     int fi;
1913     int isebp;
1914    
1915     switch(factor) {
1916     case 1: fi=0; break;
1917     case 2: fi=1; break;
1918     case 4: fi=2; break;
1919     case 8: fi=3; break;
1920     default: abort();
1921     }
1922     isebp=(baser==5)?0x40:0;
1923    
1924     emit_byte(0x88);
1925     emit_byte(0x04+8*s+isebp);
1926     emit_byte(baser+8*index+0x40*fi);
1927     if (isebp)
1928     emit_byte(0x00);
1929     }
1930     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1931    
1932     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1933     {
1934     int fi;
1935    
1936     switch(factor) {
1937     case 1: fi=0; break;
1938     case 2: fi=1; break;
1939     case 4: fi=2; break;
1940     case 8: fi=3; break;
1941     default: abort();
1942     }
1943    
1944     emit_byte(0x89);
1945     emit_byte(0x84+8*s);
1946     emit_byte(baser+8*index+0x40*fi);
1947     emit_long(base);
1948     }
1949     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1950    
1951     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1952     {
1953     int fi;
1954    
1955     switch(factor) {
1956     case 1: fi=0; break;
1957     case 2: fi=1; break;
1958     case 4: fi=2; break;
1959     case 8: fi=3; break;
1960     default: abort();
1961     }
1962    
1963     emit_byte(0x66);
1964     emit_byte(0x89);
1965     emit_byte(0x84+8*s);
1966     emit_byte(baser+8*index+0x40*fi);
1967     emit_long(base);
1968     }
1969     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1970    
1971     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1972     {
1973     int fi;
1974    
1975     switch(factor) {
1976     case 1: fi=0; break;
1977     case 2: fi=1; break;
1978     case 4: fi=2; break;
1979     case 8: fi=3; break;
1980     default: abort();
1981     }
1982    
1983     emit_byte(0x88);
1984     emit_byte(0x84+8*s);
1985     emit_byte(baser+8*index+0x40*fi);
1986     emit_long(base);
1987     }
1988     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
1989    
1990     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
1991     {
1992     int fi;
1993    
1994     switch(factor) {
1995     case 1: fi=0; break;
1996     case 2: fi=1; break;
1997     case 4: fi=2; break;
1998     case 8: fi=3; break;
1999     default: abort();
2000     }
2001    
2002     emit_byte(0x8b);
2003     emit_byte(0x84+8*d);
2004     emit_byte(baser+8*index+0x40*fi);
2005     emit_long(base);
2006     }
2007     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2008    
2009     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2010     {
2011     int fi;
2012    
2013     switch(factor) {
2014     case 1: fi=0; break;
2015     case 2: fi=1; break;
2016     case 4: fi=2; break;
2017     case 8: fi=3; break;
2018     default: abort();
2019     }
2020    
2021     emit_byte(0x66);
2022     emit_byte(0x8b);
2023     emit_byte(0x84+8*d);
2024     emit_byte(baser+8*index+0x40*fi);
2025     emit_long(base);
2026     }
2027     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2028    
2029     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2030     {
2031     int fi;
2032    
2033     switch(factor) {
2034     case 1: fi=0; break;
2035     case 2: fi=1; break;
2036     case 4: fi=2; break;
2037     case 8: fi=3; break;
2038     default: abort();
2039     }
2040    
2041     emit_byte(0x8a);
2042     emit_byte(0x84+8*d);
2043     emit_byte(baser+8*index+0x40*fi);
2044     emit_long(base);
2045     }
2046     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2047    
2048     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2049     {
2050     int fi;
2051     switch(factor) {
2052     case 1: fi=0; break;
2053     case 2: fi=1; break;
2054     case 4: fi=2; break;
2055     case 8: fi=3; break;
2056     default:
2057     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2058     abort();
2059     }
2060     emit_byte(0x8b);
2061     emit_byte(0x04+8*d);
2062     emit_byte(0x05+8*index+64*fi);
2063     emit_long(base);
2064     }
2065     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2066    
2067     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2068     {
2069     int fi;
2070     switch(factor) {
2071     case 1: fi=0; break;
2072     case 2: fi=1; break;
2073     case 4: fi=2; break;
2074     case 8: fi=3; break;
2075     default:
2076     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2077     abort();
2078     }
2079     if (have_cmov) {
2080     emit_byte(0x0f);
2081     emit_byte(0x40+cond);
2082     emit_byte(0x04+8*d);
2083     emit_byte(0x05+8*index+64*fi);
2084     emit_long(base);
2085     }
2086     else { /* replacement using branch and mov */
2087     int uncc=(cond^1);
2088     emit_byte(0x70+uncc);
2089     emit_byte(7); /* skip next 7 bytes if not cc=true */
2090     emit_byte(0x8b);
2091     emit_byte(0x04+8*d);
2092     emit_byte(0x05+8*index+64*fi);
2093     emit_long(base);
2094     }
2095     }
2096     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2097    
2098     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2099     {
2100     if (have_cmov) {
2101     emit_byte(0x0f);
2102     emit_byte(0x40+cond);
2103     emit_byte(0x05+8*d);
2104     emit_long(mem);
2105     }
2106     else { /* replacement using branch and mov */
2107     int uncc=(cond^1);
2108     emit_byte(0x70+uncc);
2109     emit_byte(6); /* skip next 6 bytes if not cc=true */
2110     emit_byte(0x8b);
2111     emit_byte(0x05+8*d);
2112     emit_long(mem);
2113     }
2114     }
2115     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2116    
2117     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2118     {
2119 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2120 gbeauche 1.1 emit_byte(0x8b);
2121     emit_byte(0x40+8*d+s);
2122     emit_byte(offset);
2123     }
2124     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2125    
2126     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2127     {
2128 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2129 gbeauche 1.1 emit_byte(0x66);
2130     emit_byte(0x8b);
2131     emit_byte(0x40+8*d+s);
2132     emit_byte(offset);
2133     }
2134     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2135    
2136     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2137     {
2138 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2139 gbeauche 1.1 emit_byte(0x8a);
2140     emit_byte(0x40+8*d+s);
2141     emit_byte(offset);
2142     }
2143     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2144    
2145     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2146     {
2147     emit_byte(0x8b);
2148     emit_byte(0x80+8*d+s);
2149     emit_long(offset);
2150     }
2151     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2152    
2153     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2154     {
2155     emit_byte(0x66);
2156     emit_byte(0x8b);
2157     emit_byte(0x80+8*d+s);
2158     emit_long(offset);
2159     }
2160     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2161    
2162     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2163     {
2164     emit_byte(0x8a);
2165     emit_byte(0x80+8*d+s);
2166     emit_long(offset);
2167     }
2168     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2169    
2170     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2171     {
2172 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2173 gbeauche 1.1 emit_byte(0xc7);
2174     emit_byte(0x40+d);
2175     emit_byte(offset);
2176     emit_long(i);
2177     }
2178     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2179    
2180     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2181     {
2182 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2183 gbeauche 1.1 emit_byte(0x66);
2184     emit_byte(0xc7);
2185     emit_byte(0x40+d);
2186     emit_byte(offset);
2187     emit_word(i);
2188     }
2189     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2190    
2191     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2192     {
2193 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2194 gbeauche 1.1 emit_byte(0xc6);
2195     emit_byte(0x40+d);
2196     emit_byte(offset);
2197     emit_byte(i);
2198     }
2199     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2200    
2201     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2202     {
2203 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2204 gbeauche 1.1 emit_byte(0x89);
2205     emit_byte(0x40+8*s+d);
2206     emit_byte(offset);
2207     }
2208     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2209    
2210     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2211     {
2212 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2213 gbeauche 1.1 emit_byte(0x66);
2214     emit_byte(0x89);
2215     emit_byte(0x40+8*s+d);
2216     emit_byte(offset);
2217     }
2218     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2219    
2220     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2221     {
2222 gbeauche 1.9 Dif(!isbyte(offset)) abort();
2223 gbeauche 1.1 emit_byte(0x88);
2224     emit_byte(0x40+8*s+d);
2225     emit_byte(offset);
2226     }
2227     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2228    
2229     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2230     {
2231     if (optimize_imm8 && isbyte(offset)) {
2232     emit_byte(0x8d);
2233     emit_byte(0x40+8*d+s);
2234     emit_byte(offset);
2235     }
2236     else {
2237     emit_byte(0x8d);
2238     emit_byte(0x80+8*d+s);
2239     emit_long(offset);
2240     }
2241     }
2242     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2243    
2244     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2245     {
2246     int fi;
2247    
2248     switch(factor) {
2249     case 1: fi=0; break;
2250     case 2: fi=1; break;
2251     case 4: fi=2; break;
2252     case 8: fi=3; break;
2253     default: abort();
2254     }
2255    
2256     if (optimize_imm8 && isbyte(offset)) {
2257     emit_byte(0x8d);
2258     emit_byte(0x44+8*d);
2259     emit_byte(0x40*fi+8*index+s);
2260     emit_byte(offset);
2261     }
2262     else {
2263     emit_byte(0x8d);
2264     emit_byte(0x84+8*d);
2265     emit_byte(0x40*fi+8*index+s);
2266     emit_long(offset);
2267     }
2268     }
2269     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2270    
2271     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2272     {
2273     int isebp=(s==5)?0x40:0;
2274     int fi;
2275    
2276     switch(factor) {
2277     case 1: fi=0; break;
2278     case 2: fi=1; break;
2279     case 4: fi=2; break;
2280     case 8: fi=3; break;
2281     default: abort();
2282     }
2283    
2284     emit_byte(0x8d);
2285     emit_byte(0x04+8*d+isebp);
2286     emit_byte(0x40*fi+8*index+s);
2287     if (isebp)
2288     emit_byte(0);
2289     }
2290     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2291    
2292     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2293     {
2294     if (optimize_imm8 && isbyte(offset)) {
2295     emit_byte(0x89);
2296     emit_byte(0x40+8*s+d);
2297     emit_byte(offset);
2298     }
2299     else {
2300     emit_byte(0x89);
2301     emit_byte(0x80+8*s+d);
2302     emit_long(offset);
2303     }
2304     }
2305     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2306    
2307     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2308     {
2309     emit_byte(0x66);
2310     emit_byte(0x89);
2311     emit_byte(0x80+8*s+d);
2312     emit_long(offset);
2313     }
2314     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2315    
2316     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2317     {
2318     if (optimize_imm8 && isbyte(offset)) {
2319     emit_byte(0x88);
2320     emit_byte(0x40+8*s+d);
2321     emit_byte(offset);
2322     }
2323     else {
2324     emit_byte(0x88);
2325     emit_byte(0x80+8*s+d);
2326     emit_long(offset);
2327     }
2328     }
2329     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2330    
2331     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2332     {
2333     emit_byte(0x0f);
2334     emit_byte(0xc8+r);
2335     }
2336     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2337    
2338     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2339     {
2340     emit_byte(0x66);
2341     emit_byte(0xc1);
2342     emit_byte(0xc0+r);
2343     emit_byte(0x08);
2344     }
2345     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2346    
2347     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2348     {
2349     emit_byte(0x89);
2350     emit_byte(0xc0+8*s+d);
2351     }
2352     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2353    
2354     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2355     {
2356     emit_byte(0x89);
2357     emit_byte(0x05+8*s);
2358     emit_long(d);
2359     }
2360     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2361    
2362     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2363     {
2364     emit_byte(0x66);
2365     emit_byte(0x89);
2366     emit_byte(0x05+8*s);
2367     emit_long(d);
2368     }
2369     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2370    
2371     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2372     {
2373     emit_byte(0x66);
2374     emit_byte(0x8b);
2375     emit_byte(0x05+8*d);
2376     emit_long(s);
2377     }
2378     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2379    
2380     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2381     {
2382     emit_byte(0x88);
2383     emit_byte(0x05+8*s);
2384     emit_long(d);
2385     }
2386     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2387    
2388     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2389     {
2390     emit_byte(0x8a);
2391     emit_byte(0x05+8*d);
2392     emit_long(s);
2393     }
2394     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2395    
2396     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2397     {
2398     emit_byte(0xb8+d);
2399     emit_long(s);
2400     }
2401     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2402    
2403     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2404     {
2405     emit_byte(0x66);
2406     emit_byte(0xb8+d);
2407     emit_word(s);
2408     }
2409     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2410    
2411     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2412     {
2413     emit_byte(0xb0+d);
2414     emit_byte(s);
2415     }
2416     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2417    
2418     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2419     {
2420     emit_byte(0x81);
2421     emit_byte(0x15);
2422     emit_long(d);
2423     emit_long(s);
2424     }
2425     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2426    
2427     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2428     {
2429     if (optimize_imm8 && isbyte(s)) {
2430     emit_byte(0x83);
2431     emit_byte(0x05);
2432     emit_long(d);
2433     emit_byte(s);
2434     }
2435     else {
2436     emit_byte(0x81);
2437     emit_byte(0x05);
2438     emit_long(d);
2439     emit_long(s);
2440     }
2441     }
2442     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2443    
2444     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2445     {
2446     emit_byte(0x66);
2447     emit_byte(0x81);
2448     emit_byte(0x05);
2449     emit_long(d);
2450     emit_word(s);
2451     }
2452     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2453    
2454     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2455     {
2456     emit_byte(0x80);
2457     emit_byte(0x05);
2458     emit_long(d);
2459     emit_byte(s);
2460     }
2461     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2462    
2463     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2464     {
2465 gbeauche 1.2 if (optimize_accum && isaccum(d))
2466     emit_byte(0xa9);
2467     else {
2468 gbeauche 1.1 emit_byte(0xf7);
2469     emit_byte(0xc0+d);
2470 gbeauche 1.2 }
2471 gbeauche 1.1 emit_long(i);
2472     }
2473     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2474    
2475     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2476     {
2477     emit_byte(0x85);
2478     emit_byte(0xc0+8*s+d);
2479     }
2480     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2481    
2482     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2483     {
2484     emit_byte(0x66);
2485     emit_byte(0x85);
2486     emit_byte(0xc0+8*s+d);
2487     }
2488     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2489    
2490     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2491     {
2492     emit_byte(0x84);
2493     emit_byte(0xc0+8*s+d);
2494     }
2495     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2496    
2497     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2498     {
2499     if (optimize_imm8 && isbyte(i)) {
2500 gbeauche 1.2 emit_byte(0x83);
2501     emit_byte(0xe0+d);
2502     emit_byte(i);
2503 gbeauche 1.1 }
2504     else {
2505 gbeauche 1.2 if (optimize_accum && isaccum(d))
2506     emit_byte(0x25);
2507     else {
2508     emit_byte(0x81);
2509     emit_byte(0xe0+d);
2510     }
2511     emit_long(i);
2512 gbeauche 1.1 }
2513     }
2514     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2515    
2516     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2517     {
2518 gbeauche 1.2 emit_byte(0x66);
2519     if (optimize_imm8 && isbyte(i)) {
2520     emit_byte(0x83);
2521     emit_byte(0xe0+d);
2522     emit_byte(i);
2523     }
2524     else {
2525     if (optimize_accum && isaccum(d))
2526     emit_byte(0x25);
2527     else {
2528     emit_byte(0x81);
2529     emit_byte(0xe0+d);
2530     }
2531     emit_word(i);
2532     }
2533 gbeauche 1.1 }
2534     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2535    
2536     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2537     {
2538     emit_byte(0x21);
2539     emit_byte(0xc0+8*s+d);
2540     }
2541     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2542    
2543     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2544     {
2545     emit_byte(0x66);
2546     emit_byte(0x21);
2547     emit_byte(0xc0+8*s+d);
2548     }
2549     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2550    
2551     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2552     {
2553     emit_byte(0x20);
2554     emit_byte(0xc0+8*s+d);
2555     }
2556     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2557    
2558     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2559     {
2560     if (optimize_imm8 && isbyte(i)) {
2561     emit_byte(0x83);
2562     emit_byte(0xc8+d);
2563     emit_byte(i);
2564     }
2565     else {
2566 gbeauche 1.2 if (optimize_accum && isaccum(d))
2567     emit_byte(0x0d);
2568     else {
2569 gbeauche 1.1 emit_byte(0x81);
2570     emit_byte(0xc8+d);
2571 gbeauche 1.2 }
2572 gbeauche 1.1 emit_long(i);
2573     }
2574     }
2575     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2576    
2577     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2578     {
2579     emit_byte(0x09);
2580     emit_byte(0xc0+8*s+d);
2581     }
2582     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2583    
2584     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2585     {
2586     emit_byte(0x66);
2587     emit_byte(0x09);
2588     emit_byte(0xc0+8*s+d);
2589     }
2590     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2591    
2592     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2593     {
2594     emit_byte(0x08);
2595     emit_byte(0xc0+8*s+d);
2596     }
2597     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2598    
2599     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2600     {
2601     emit_byte(0x11);
2602     emit_byte(0xc0+8*s+d);
2603     }
2604     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2605    
2606     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2607     {
2608     emit_byte(0x66);
2609     emit_byte(0x11);
2610     emit_byte(0xc0+8*s+d);
2611     }
2612     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2613    
2614     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2615     {
2616     emit_byte(0x10);
2617     emit_byte(0xc0+8*s+d);
2618     }
2619     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2620    
2621     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2622     {
2623     emit_byte(0x01);
2624     emit_byte(0xc0+8*s+d);
2625     }
2626     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2627    
2628     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2629     {
2630     emit_byte(0x66);
2631     emit_byte(0x01);
2632     emit_byte(0xc0+8*s+d);
2633     }
2634     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2635    
2636     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2637     {
2638     emit_byte(0x00);
2639     emit_byte(0xc0+8*s+d);
2640     }
2641     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2642    
2643     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2644     {
2645     if (isbyte(i)) {
2646     emit_byte(0x83);
2647     emit_byte(0xe8+d);
2648     emit_byte(i);
2649     }
2650     else {
2651 gbeauche 1.2 if (optimize_accum && isaccum(d))
2652     emit_byte(0x2d);
2653     else {
2654 gbeauche 1.1 emit_byte(0x81);
2655     emit_byte(0xe8+d);
2656 gbeauche 1.2 }
2657 gbeauche 1.1 emit_long(i);
2658     }
2659     }
2660     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2661    
2662     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2663     {
2664 gbeauche 1.2 if (optimize_accum && isaccum(d))
2665     emit_byte(0x2c);
2666     else {
2667 gbeauche 1.1 emit_byte(0x80);
2668     emit_byte(0xe8+d);
2669 gbeauche 1.2 }
2670 gbeauche 1.1 emit_byte(i);
2671     }
2672     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2673    
2674     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2675     {
2676     if (isbyte(i)) {
2677     emit_byte(0x83);
2678     emit_byte(0xc0+d);
2679     emit_byte(i);
2680     }
2681     else {
2682 gbeauche 1.2 if (optimize_accum && isaccum(d))
2683     emit_byte(0x05);
2684     else {
2685 gbeauche 1.1 emit_byte(0x81);
2686     emit_byte(0xc0+d);
2687 gbeauche 1.2 }
2688 gbeauche 1.1 emit_long(i);
2689     }
2690     }
2691     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2692    
2693     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2694     {
2695 gbeauche 1.2 emit_byte(0x66);
2696 gbeauche 1.1 if (isbyte(i)) {
2697     emit_byte(0x83);
2698     emit_byte(0xc0+d);
2699     emit_byte(i);
2700     }
2701     else {
2702 gbeauche 1.2 if (optimize_accum && isaccum(d))
2703     emit_byte(0x05);
2704     else {
2705 gbeauche 1.1 emit_byte(0x81);
2706     emit_byte(0xc0+d);
2707 gbeauche 1.2 }
2708 gbeauche 1.1 emit_word(i);
2709     }
2710     }
2711     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2712    
2713     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2714     {
2715 gbeauche 1.2 if (optimize_accum && isaccum(d))
2716     emit_byte(0x04);
2717     else {
2718     emit_byte(0x80);
2719     emit_byte(0xc0+d);
2720     }
2721 gbeauche 1.1 emit_byte(i);
2722     }
2723     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2724    
2725     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2726     {
2727     emit_byte(0x19);
2728     emit_byte(0xc0+8*s+d);
2729     }
2730     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2731    
2732     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2733     {
2734     emit_byte(0x66);
2735     emit_byte(0x19);
2736     emit_byte(0xc0+8*s+d);
2737     }
2738     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2739    
2740     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2741     {
2742     emit_byte(0x18);
2743     emit_byte(0xc0+8*s+d);
2744     }
2745     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2746    
2747     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2748     {
2749     emit_byte(0x29);
2750     emit_byte(0xc0+8*s+d);
2751     }
2752     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2753    
2754     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2755     {
2756     emit_byte(0x66);
2757     emit_byte(0x29);
2758     emit_byte(0xc0+8*s+d);
2759     }
2760     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2761    
2762     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2763     {
2764     emit_byte(0x28);
2765     emit_byte(0xc0+8*s+d);
2766     }
2767     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2768    
2769     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2770     {
2771     emit_byte(0x39);
2772     emit_byte(0xc0+8*s+d);
2773     }
2774     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2775    
2776     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2777     {
2778     if (optimize_imm8 && isbyte(i)) {
2779     emit_byte(0x83);
2780     emit_byte(0xf8+r);
2781     emit_byte(i);
2782     }
2783     else {
2784 gbeauche 1.2 if (optimize_accum && isaccum(r))
2785     emit_byte(0x3d);
2786     else {
2787 gbeauche 1.1 emit_byte(0x81);
2788     emit_byte(0xf8+r);
2789 gbeauche 1.2 }
2790 gbeauche 1.1 emit_long(i);
2791     }
2792     }
2793     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2794    
2795     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2796     {
2797     emit_byte(0x66);
2798     emit_byte(0x39);
2799     emit_byte(0xc0+8*s+d);
2800     }
2801     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2802    
2803 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2804     {
2805     emit_byte(0x80);
2806     emit_byte(0x3d);
2807     emit_long(d);
2808     emit_byte(s);
2809     }
2810     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2811    
2812 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2813     {
2814 gbeauche 1.2 if (optimize_accum && isaccum(d))
2815     emit_byte(0x3c);
2816     else {
2817 gbeauche 1.1 emit_byte(0x80);
2818     emit_byte(0xf8+d);
2819 gbeauche 1.2 }
2820 gbeauche 1.1 emit_byte(i);
2821     }
2822     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2823    
2824     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2825     {
2826     emit_byte(0x38);
2827     emit_byte(0xc0+8*s+d);
2828     }
2829     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2830    
2831     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2832     {
2833     int fi;
2834    
2835     switch(factor) {
2836     case 1: fi=0; break;
2837     case 2: fi=1; break;
2838     case 4: fi=2; break;
2839     case 8: fi=3; break;
2840     default: abort();
2841     }
2842     emit_byte(0x39);
2843     emit_byte(0x04+8*d);
2844     emit_byte(5+8*index+0x40*fi);
2845     emit_long(offset);
2846     }
2847     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2848    
2849     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2850     {
2851     emit_byte(0x31);
2852     emit_byte(0xc0+8*s+d);
2853     }
2854     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2855    
2856     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2857     {
2858     emit_byte(0x66);
2859     emit_byte(0x31);
2860     emit_byte(0xc0+8*s+d);
2861     }
2862     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2863    
2864     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2865     {
2866     emit_byte(0x30);
2867     emit_byte(0xc0+8*s+d);
2868     }
2869     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2870    
2871     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2872     {
2873     if (optimize_imm8 && isbyte(s)) {
2874     emit_byte(0x83);
2875     emit_byte(0x2d);
2876     emit_long(d);
2877     emit_byte(s);
2878     }
2879     else {
2880     emit_byte(0x81);
2881     emit_byte(0x2d);
2882     emit_long(d);
2883     emit_long(s);
2884     }
2885     }
2886     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2887    
2888     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2889     {
2890     if (optimize_imm8 && isbyte(s)) {
2891     emit_byte(0x83);
2892     emit_byte(0x3d);
2893     emit_long(d);
2894     emit_byte(s);
2895     }
2896     else {
2897     emit_byte(0x81);
2898     emit_byte(0x3d);
2899     emit_long(d);
2900     emit_long(s);
2901     }
2902     }
2903     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2904    
2905     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2906     {
2907     emit_byte(0x87);
2908     emit_byte(0xc0+8*r1+r2);
2909     }
2910     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2911    
2912     /*************************************************************************
2913     * FIXME: mem access modes probably wrong *
2914     *************************************************************************/
2915    
2916     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2917     {
2918     emit_byte(0x9c);
2919     }
2920     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2921    
2922     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2923     {
2924     emit_byte(0x9d);
2925     }
2926     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2927 gbeauche 1.13
2928     #endif
2929 gbeauche 1.1
2930     /*************************************************************************
2931     * Unoptimizable stuff --- jump *
2932     *************************************************************************/
2933    
2934     static __inline__ void raw_call_r(R4 r)
2935     {
2936 gbeauche 1.20 #if USE_NEW_RTASM
2937     CALLsr(r);
2938     #else
2939 gbeauche 1.1 emit_byte(0xff);
2940     emit_byte(0xd0+r);
2941 gbeauche 1.20 #endif
2942 gbeauche 1.5 }
2943    
2944     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2945     {
2946 gbeauche 1.20 #if USE_NEW_RTASM
2947     CALLsm(base, X86_NOREG, r, m);
2948     #else
2949 gbeauche 1.5 int mu;
2950     switch(m) {
2951     case 1: mu=0; break;
2952     case 2: mu=1; break;
2953     case 4: mu=2; break;
2954     case 8: mu=3; break;
2955     default: abort();
2956     }
2957     emit_byte(0xff);
2958     emit_byte(0x14);
2959     emit_byte(0x05+8*r+0x40*mu);
2960     emit_long(base);
2961 gbeauche 1.20 #endif
2962 gbeauche 1.1 }
2963    
2964     static __inline__ void raw_jmp_r(R4 r)
2965     {
2966 gbeauche 1.20 #if USE_NEW_RTASM
2967     JMPsr(r);
2968     #else
2969 gbeauche 1.1 emit_byte(0xff);
2970     emit_byte(0xe0+r);
2971 gbeauche 1.20 #endif
2972 gbeauche 1.1 }
2973    
2974     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2975     {
2976 gbeauche 1.20 #if USE_NEW_RTASM
2977     JMPsm(base, X86_NOREG, r, m);
2978     #else
2979 gbeauche 1.1 int mu;
2980     switch(m) {
2981     case 1: mu=0; break;
2982     case 2: mu=1; break;
2983     case 4: mu=2; break;
2984     case 8: mu=3; break;
2985     default: abort();
2986     }
2987     emit_byte(0xff);
2988     emit_byte(0x24);
2989     emit_byte(0x05+8*r+0x40*mu);
2990     emit_long(base);
2991 gbeauche 1.20 #endif
2992 gbeauche 1.1 }
2993    
2994     static __inline__ void raw_jmp_m(uae_u32 base)
2995     {
2996     emit_byte(0xff);
2997     emit_byte(0x25);
2998     emit_long(base);
2999     }
3000    
3001    
3002     static __inline__ void raw_call(uae_u32 t)
3003     {
3004 gbeauche 1.20 #if USE_NEW_RTASM
3005     CALLm(t);
3006     #else
3007 gbeauche 1.1 emit_byte(0xe8);
3008     emit_long(t-(uae_u32)target-4);
3009 gbeauche 1.20 #endif
3010 gbeauche 1.1 }
3011    
3012     static __inline__ void raw_jmp(uae_u32 t)
3013     {
3014 gbeauche 1.20 #if USE_NEW_RTASM
3015     JMPm(t);
3016     #else
3017 gbeauche 1.1 emit_byte(0xe9);
3018     emit_long(t-(uae_u32)target-4);
3019 gbeauche 1.20 #endif
3020 gbeauche 1.1 }
3021    
3022     static __inline__ void raw_jl(uae_u32 t)
3023     {
3024     emit_byte(0x0f);
3025     emit_byte(0x8c);
3026 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3027 gbeauche 1.1 }
3028    
3029     static __inline__ void raw_jz(uae_u32 t)
3030     {
3031     emit_byte(0x0f);
3032     emit_byte(0x84);
3033 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3034 gbeauche 1.1 }
3035    
3036     static __inline__ void raw_jnz(uae_u32 t)
3037     {
3038     emit_byte(0x0f);
3039     emit_byte(0x85);
3040 gbeauche 1.20 emit_long(t-(uintptr)target-4);
3041 gbeauche 1.1 }
3042    
3043     static __inline__ void raw_jnz_l_oponly(void)
3044     {
3045     emit_byte(0x0f);
3046     emit_byte(0x85);
3047     }
3048    
3049     static __inline__ void raw_jcc_l_oponly(int cc)
3050     {
3051     emit_byte(0x0f);
3052     emit_byte(0x80+cc);
3053     }
3054    
3055     static __inline__ void raw_jnz_b_oponly(void)
3056     {
3057     emit_byte(0x75);
3058     }
3059    
3060     static __inline__ void raw_jz_b_oponly(void)
3061     {
3062     emit_byte(0x74);
3063     }
3064    
3065     static __inline__ void raw_jcc_b_oponly(int cc)
3066     {
3067     emit_byte(0x70+cc);
3068     }
3069    
3070     static __inline__ void raw_jmp_l_oponly(void)
3071     {
3072     emit_byte(0xe9);
3073     }
3074    
3075     static __inline__ void raw_jmp_b_oponly(void)
3076     {
3077     emit_byte(0xeb);
3078     }
3079    
3080     static __inline__ void raw_ret(void)
3081     {
3082     emit_byte(0xc3);
3083     }
3084    
3085     static __inline__ void raw_nop(void)
3086     {
3087     emit_byte(0x90);
3088     }
3089    
3090 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
3091     {
3092     /* Source: GNU Binutils 2.12.90.0.15 */
3093     /* Various efficient no-op patterns for aligning code labels.
3094     Note: Don't try to assemble the instructions in the comments.
3095     0L and 0w are not legal. */
3096     static const uae_u8 f32_1[] =
3097     {0x90}; /* nop */
3098     static const uae_u8 f32_2[] =
3099     {0x89,0xf6}; /* movl %esi,%esi */
3100     static const uae_u8 f32_3[] =
3101     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3102     static const uae_u8 f32_4[] =
3103     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3104     static const uae_u8 f32_5[] =
3105     {0x90, /* nop */
3106     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3107     static const uae_u8 f32_6[] =
3108     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3109     static const uae_u8 f32_7[] =
3110     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3111     static const uae_u8 f32_8[] =
3112     {0x90, /* nop */
3113     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3114     static const uae_u8 f32_9[] =
3115     {0x89,0xf6, /* movl %esi,%esi */
3116     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3117     static const uae_u8 f32_10[] =
3118     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3119     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3120     static const uae_u8 f32_11[] =
3121     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3122     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3123     static const uae_u8 f32_12[] =
3124     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3125     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3126     static const uae_u8 f32_13[] =
3127     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3128     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3129     static const uae_u8 f32_14[] =
3130     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3131     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3132     static const uae_u8 f32_15[] =
3133     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3134     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3135     static const uae_u8 f32_16[] =
3136     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3137     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3138     static const uae_u8 *const f32_patt[] = {
3139     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3140     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3141     };
3142    
3143     int nloops = nbytes / 16;
3144     while (nloops-- > 0)
3145     emit_block(f32_16, sizeof(f32_16));
3146    
3147     nbytes %= 16;
3148     if (nbytes)
3149     emit_block(f32_patt[nbytes - 1], nbytes);
3150     }
3151    
3152 gbeauche 1.1
3153     /*************************************************************************
3154     * Flag handling, to and fro UAE flag register *
3155     *************************************************************************/
3156    
3157     #ifdef SAHF_SETO_PROFITABLE
3158    
3159     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3160    
3161     static __inline__ void raw_flags_to_reg(int r)
3162     {
3163     raw_lahf(0); /* Most flags in AH */
3164     //raw_setcc(r,0); /* V flag in AL */
3165 gbeauche 1.20 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3166 gbeauche 1.1
3167     #if 1 /* Let's avoid those nasty partial register stalls */
3168 gbeauche 1.20 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3169     raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3170 gbeauche 1.1 //live.state[FLAGTMP].status=CLEAN;
3171     live.state[FLAGTMP].status=INMEM;
3172     live.state[FLAGTMP].realreg=-1;
3173     /* We just "evicted" FLAGTMP. */
3174     if (live.nat[r].nholds!=1) {
3175     /* Huh? */
3176     abort();
3177     }
3178     live.nat[r].nholds=0;
3179     #endif
3180     }
3181    
3182     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3183     static __inline__ void raw_reg_to_flags(int r)
3184     {
3185     raw_cmp_b_ri(r,-127); /* set V */
3186     raw_sahf(0);
3187     }
3188    
3189     #else
3190    
3191     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3192     static __inline__ void raw_flags_to_reg(int r)
3193     {
3194     raw_pushfl();
3195     raw_pop_l_r(r);
3196 gbeauche 1.20 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3197 gbeauche 1.1 // live.state[FLAGTMP].status=CLEAN;
3198     live.state[FLAGTMP].status=INMEM;
3199     live.state[FLAGTMP].realreg=-1;
3200     /* We just "evicted" FLAGTMP. */
3201     if (live.nat[r].nholds!=1) {
3202     /* Huh? */
3203     abort();
3204     }
3205     live.nat[r].nholds=0;
3206     }
3207    
3208     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3209     static __inline__ void raw_reg_to_flags(int r)
3210     {
3211     raw_push_l_r(r);
3212     raw_popfl();
3213     }
3214    
3215     #endif
3216    
3217     /* Apparently, there are enough instructions between flag store and
3218     flag reload to avoid the partial memory stall */
3219     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3220     {
3221     #if 1
3222 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3223 gbeauche 1.1 #else
3224 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3225     raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3226 gbeauche 1.1 #endif
3227     }
3228    
3229     /* FLAGX is byte sized, and we *do* write it at that size */
3230     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3231     {
3232     if (live.nat[target].canbyte)
3233 gbeauche 1.20 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3234 gbeauche 1.1 else if (live.nat[target].canword)
3235 gbeauche 1.20 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3236 gbeauche 1.1 else
3237 gbeauche 1.20 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3238 gbeauche 1.1 }
3239    
3240 gbeauche 1.11 #define NATIVE_FLAG_Z 0x40
3241     static __inline__ void raw_flags_set_zero(int f, int r, int t)
3242     {
3243     // FIXME: this is really suboptimal
3244     raw_pushfl();
3245     raw_pop_l_r(f);
3246     raw_and_l_ri(f,~NATIVE_FLAG_Z);
3247     raw_test_l_rr(r,r);
3248     raw_mov_l_ri(r,0);
3249     raw_mov_l_ri(t,NATIVE_FLAG_Z);
3250     raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
3251     raw_or_l(f,r);
3252     raw_push_l_r(f);
3253     raw_popfl();
3254     }
3255 gbeauche 1.1
3256     static __inline__ void raw_inc_sp(int off)
3257     {
3258 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
3259 gbeauche 1.1 }
3260    
3261     /*************************************************************************
3262     * Handling mistaken direct memory access *
3263     *************************************************************************/
3264    
3265     // gb-- I don't need that part for JIT Basilisk II
3266     #if defined(NATMEM_OFFSET) && 0
3267     #include <asm/sigcontext.h>
3268     #include <signal.h>
3269    
3270     #define SIG_READ 1
3271     #define SIG_WRITE 2
3272    
3273     static int in_handler=0;
3274     static uae_u8 veccode[256];
3275    
3276     static void vec(int x, struct sigcontext sc)
3277     {
3278     uae_u8* i=(uae_u8*)sc.eip;
3279     uae_u32 addr=sc.cr2;
3280     int r=-1;
3281     int size=4;
3282     int dir=-1;
3283     int len=0;
3284     int j;
3285    
3286     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3287     if (!canbang)
3288     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3289     if (in_handler)
3290     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3291    
3292     if (canbang && i>=compiled_code && i<=current_compile_p) {
3293     if (*i==0x66) {
3294     i++;
3295     size=2;
3296     len++;
3297     }
3298    
3299     switch(i[0]) {
3300     case 0x8a:
3301     if ((i[1]&0xc0)==0x80) {
3302     r=(i[1]>>3)&7;
3303     dir=SIG_READ;
3304     size=1;
3305     len+=6;
3306     break;
3307     }
3308     break;
3309     case 0x88:
3310     if ((i[1]&0xc0)==0x80) {
3311     r=(i[1]>>3)&7;
3312     dir=SIG_WRITE;
3313     size=1;
3314     len+=6;
3315     break;
3316     }
3317     break;
3318     case 0x8b:
3319     if ((i[1]&0xc0)==0x80) {
3320     r=(i[1]>>3)&7;
3321     dir=SIG_READ;
3322     len+=6;
3323     break;
3324     }
3325     if ((i[1]&0xc0)==0x40) {
3326     r=(i[1]>>3)&7;
3327     dir=SIG_READ;
3328     len+=3;
3329     break;
3330     }
3331     break;
3332     case 0x89:
3333     if ((i[1]&0xc0)==0x80) {
3334     r=(i[1]>>3)&7;
3335     dir=SIG_WRITE;
3336     len+=6;
3337     break;
3338     }
3339     if ((i[1]&0xc0)==0x40) {
3340     r=(i[1]>>3)&7;
3341     dir=SIG_WRITE;
3342     len+=3;
3343     break;
3344     }
3345     break;
3346     }
3347     }
3348    
3349     if (r!=-1) {
3350     void* pr=NULL;
3351     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3352    
3353     switch(r) {
3354     case 0: pr=&(sc.eax); break;
3355     case 1: pr=&(sc.ecx); break;
3356     case 2: pr=&(sc.edx); break;
3357     case 3: pr=&(sc.ebx); break;
3358     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3359     case 5: pr=(size>1)?
3360     (void*)(&(sc.ebp)):
3361     (void*)(((uae_u8*)&(sc.ecx))+1); break;
3362     case 6: pr=(size>1)?
3363     (void*)(&(sc.esi)):
3364     (void*)(((uae_u8*)&(sc.edx))+1); break;
3365     case 7: pr=(size>1)?
3366     (void*)(&(sc.edi)):
3367     (void*)(((uae_u8*)&(sc.ebx))+1); break;
3368     default: abort();
3369     }
3370     if (pr) {
3371     blockinfo* bi;
3372    
3373     if (currprefs.comp_oldsegv) {
3374     addr-=NATMEM_OFFSET;
3375    
3376     if ((addr>=0x10000000 && addr<0x40000000) ||
3377     (addr>=0x50000000)) {
3378     write_log("Suspicious address in %x SEGV handler.\n",addr);
3379     }
3380     if (dir==SIG_READ) {
3381     switch(size) {
3382     case 1: *((uae_u8*)pr)=get_byte(addr); break;
3383     case 2: *((uae_u16*)pr)=get_word(addr); break;
3384     case 4: *((uae_u32*)pr)=get_long(addr); break;
3385     default: abort();
3386     }
3387     }
3388     else { /* write */
3389     switch(size) {
3390     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3391     case 2: put_word(addr,*((uae_u16*)pr)); break;
3392     case 4: put_long(addr,*((uae_u32*)pr)); break;
3393     default: abort();
3394     }
3395     }
3396     write_log("Handled one access!\n");
3397     fflush(stdout);
3398     segvcount++;
3399     sc.eip+=len;
3400     }
3401     else {
3402     void* tmp=target;
3403     int i;
3404     uae_u8 vecbuf[5];
3405    
3406     addr-=NATMEM_OFFSET;
3407    
3408     if ((addr>=0x10000000 && addr<0x40000000) ||
3409     (addr>=0x50000000)) {
3410     write_log("Suspicious address in %x SEGV handler.\n",addr);
3411     }
3412    
3413     target=(uae_u8*)sc.eip;
3414     for (i=0;i<5;i++)
3415     vecbuf[i]=target[i];
3416     emit_byte(0xe9);
3417 gbeauche 1.20 emit_long((uintptr)veccode-(uintptr)target-4);
3418 gbeauche 1.1 write_log("Create jump to %p\n",veccode);
3419    
3420     write_log("Handled one access!\n");
3421     fflush(stdout);
3422     segvcount++;
3423    
3424     target=veccode;
3425    
3426     if (dir==SIG_READ) {
3427     switch(size) {
3428     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3429     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3430     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3431     default: abort();
3432     }
3433     }
3434     else { /* write */
3435     switch(size) {
3436     case 1: put_byte(addr,*((uae_u8*)pr)); break;
3437     case 2: put_word(addr,*((uae_u16*)pr)); break;
3438     case 4: put_long(addr,*((uae_u32*)pr)); break;
3439     default: abort();
3440     }
3441     }
3442     for (i=0;i<5;i++)
3443     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3444 gbeauche 1.20 raw_mov_l_mi((uintptr)&in_handler,0);
3445 gbeauche 1.1 emit_byte(0xe9);
3446 gbeauche 1.20 emit_long(sc.eip+len-(uintptr)target-4);
3447 gbeauche 1.1 in_handler=1;
3448     target=tmp;
3449     }
3450     bi=active;
3451     while (bi) {
3452     if (bi->handler &&
3453     (uae_u8*)bi->direct_handler<=i &&
3454     (uae_u8*)bi->nexthandler>i) {
3455     write_log("deleted trigger (%p<%p<%p) %p\n",
3456     bi->handler,
3457     i,
3458     bi->nexthandler,
3459     bi->pc_p);
3460     invalidate_block(bi);
3461     raise_in_cl_list(bi);
3462     set_special(0);
3463     return;
3464     }
3465     bi=bi->next;
3466     }
3467     /* Not found in the active list. Might be a rom routine that
3468     is in the dormant list */
3469     bi=dormant;
3470     while (bi) {
3471     if (bi->handler &&
3472     (uae_u8*)bi->direct_handler<=i &&
3473     (uae_u8*)bi->nexthandler>i) {
3474     write_log("deleted trigger (%p<%p<%p) %p\n",
3475     bi->handler,
3476     i,
3477     bi->nexthandler,
3478     bi->pc_p);
3479     invalidate_block(bi);
3480     raise_in_cl_list(bi);
3481     set_special(0);
3482     return;
3483     }
3484     bi=bi->next;
3485     }
3486     write_log("Huh? Could not find trigger!\n");
3487     return;
3488     }
3489     }
3490     write_log("Can't handle access!\n");
3491     for (j=0;j<10;j++) {
3492     write_log("instruction byte %2d is %02x\n",j,i[j]);
3493     }
3494     write_log("Please send the above info (starting at \"fault address\") to\n"
3495     "bmeyer@csse.monash.edu.au\n"
3496     "This shouldn't happen ;-)\n");
3497     fflush(stdout);
3498     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3499     }
3500     #endif
3501    
3502    
3503     /*************************************************************************
3504     * Checking for CPU features *
3505     *************************************************************************/
3506    
3507 gbeauche 1.3 struct cpuinfo_x86 {
3508     uae_u8 x86; // CPU family
3509     uae_u8 x86_vendor; // CPU vendor
3510     uae_u8 x86_processor; // CPU canonical processor type
3511     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3512     uae_u32 x86_hwcap;
3513     uae_u8 x86_model;
3514     uae_u8 x86_mask;
3515     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3516     char x86_vendor_id[16];
3517     };
3518     struct cpuinfo_x86 cpuinfo;
3519    
3520     enum {
3521     X86_VENDOR_INTEL = 0,
3522     X86_VENDOR_CYRIX = 1,
3523     X86_VENDOR_AMD = 2,
3524     X86_VENDOR_UMC = 3,
3525     X86_VENDOR_NEXGEN = 4,
3526     X86_VENDOR_CENTAUR = 5,
3527     X86_VENDOR_RISE = 6,
3528     X86_VENDOR_TRANSMETA = 7,
3529     X86_VENDOR_NSC = 8,
3530     X86_VENDOR_UNKNOWN = 0xff
3531     };
3532    
3533     enum {
3534     X86_PROCESSOR_I386, /* 80386 */
3535     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3536     X86_PROCESSOR_PENTIUM,
3537     X86_PROCESSOR_PENTIUMPRO,
3538     X86_PROCESSOR_K6,
3539     X86_PROCESSOR_ATHLON,
3540     X86_PROCESSOR_PENTIUM4,
3541 gbeauche 1.16 X86_PROCESSOR_K8,
3542 gbeauche 1.3 X86_PROCESSOR_max
3543     };
3544    
3545     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3546     "80386",
3547     "80486",
3548     "Pentium",
3549     "PentiumPro",
3550     "K6",
3551     "Athlon",
3552 gbeauche 1.16 "Pentium4",
3553     "K8"
3554 gbeauche 1.3 };
3555    
3556     static struct ptt {
3557     const int align_loop;
3558     const int align_loop_max_skip;
3559     const int align_jump;
3560     const int align_jump_max_skip;
3561     const int align_func;
3562     }
3563     x86_alignments[X86_PROCESSOR_max] = {
3564     { 4, 3, 4, 3, 4 },
3565     { 16, 15, 16, 15, 16 },
3566     { 16, 7, 16, 7, 16 },
3567     { 16, 15, 16, 7, 16 },
3568     { 32, 7, 32, 7, 32 },
3569 gbeauche 1.4 { 16, 7, 16, 7, 16 },
3570 gbeauche 1.16 { 0, 0, 0, 0, 0 },
3571     { 16, 7, 16, 7, 16 }
3572 gbeauche 1.3 };
3573 gbeauche 1.1
3574 gbeauche 1.3 static void
3575     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3576 gbeauche 1.1 {
3577 gbeauche 1.3 char *v = c->x86_vendor_id;
3578    
3579     if (!strcmp(v, "GenuineIntel"))
3580     c->x86_vendor = X86_VENDOR_INTEL;
3581     else if (!strcmp(v, "AuthenticAMD"))
3582     c->x86_vendor = X86_VENDOR_AMD;
3583     else if (!strcmp(v, "CyrixInstead"))
3584     c->x86_vendor = X86_VENDOR_CYRIX;
3585     else if (!strcmp(v, "Geode by NSC"))
3586     c->x86_vendor = X86_VENDOR_NSC;
3587     else if (!strcmp(v, "UMC UMC UMC "))
3588     c->x86_vendor = X86_VENDOR_UMC;
3589     else if (!strcmp(v, "CentaurHauls"))
3590     c->x86_vendor = X86_VENDOR_CENTAUR;
3591     else if (!strcmp(v, "NexGenDriven"))
3592     c->x86_vendor = X86_VENDOR_NEXGEN;
3593     else if (!strcmp(v, "RiseRiseRise"))
3594     c->x86_vendor = X86_VENDOR_RISE;
3595     else if (!strcmp(v, "GenuineTMx86") ||
3596     !strcmp(v, "TransmetaCPU"))
3597     c->x86_vendor = X86_VENDOR_TRANSMETA;
3598     else
3599     c->x86_vendor = X86_VENDOR_UNKNOWN;
3600     }
3601 gbeauche 1.1
3602 gbeauche 1.3 static void
3603     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3604     {
3605     static uae_u8 cpuid_space[256];
3606 gbeauche 1.20 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3607 gbeauche 1.3 uae_u8* tmp=get_target();
3608 gbeauche 1.1
3609 gbeauche 1.20 s_op = op;
3610 gbeauche 1.3 set_target(cpuid_space);
3611     raw_push_l_r(0); /* eax */
3612     raw_push_l_r(1); /* ecx */
3613     raw_push_l_r(2); /* edx */
3614     raw_push_l_r(3); /* ebx */
3615 gbeauche 1.20 raw_mov_l_rm(0,(uintptr)&s_op);
3616 gbeauche 1.3 raw_cpuid(0);
3617 gbeauche 1.20 raw_mov_l_mr((uintptr)&s_eax,0);
3618     raw_mov_l_mr((uintptr)&s_ebx,3);
3619     raw_mov_l_mr((uintptr)&s_ecx,1);
3620     raw_mov_l_mr((uintptr)&s_edx,2);
3621 gbeauche 1.3 raw_pop_l_r(3);
3622     raw_pop_l_r(2);
3623     raw_pop_l_r(1);
3624     raw_pop_l_r(0);
3625     raw_ret();
3626     set_target(tmp);
3627 gbeauche 1.1
3628 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
3629 gbeauche 1.20 if (eax != NULL) *eax = s_eax;
3630     if (ebx != NULL) *ebx = s_ebx;
3631     if (ecx != NULL) *ecx = s_ecx;
3632     if (edx != NULL) *edx = s_edx;
3633 gbeauche 1.1 }
3634    
3635 gbeauche 1.3 static void
3636     raw_init_cpu(void)
3637 gbeauche 1.1 {
3638 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
3639    
3640     /* Defaults */
3641 gbeauche 1.16 c->x86_processor = X86_PROCESSOR_max;
3642 gbeauche 1.3 c->x86_vendor = X86_VENDOR_UNKNOWN;
3643     c->cpuid_level = -1; /* CPUID not detected */
3644     c->x86_model = c->x86_mask = 0; /* So far unknown... */
3645     c->x86_vendor_id[0] = '\0'; /* Unset */
3646     c->x86_hwcap = 0;
3647    
3648     /* Get vendor name */
3649     c->x86_vendor_id[12] = '\0';
3650     cpuid(0x00000000,
3651     (uae_u32 *)&c->cpuid_level,
3652     (uae_u32 *)&c->x86_vendor_id[0],
3653     (uae_u32 *)&c->x86_vendor_id[8],
3654     (uae_u32 *)&c->x86_vendor_id[4]);
3655     x86_get_cpu_vendor(c);
3656    
3657     /* Intel-defined flags: level 0x00000001 */
3658     c->x86_brand_id = 0;
3659     if ( c->cpuid_level >= 0x00000001 ) {
3660     uae_u32 tfms, brand_id;
3661     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3662     c->x86 = (tfms >> 8) & 15;
3663     c->x86_model = (tfms >> 4) & 15;
3664     c->x86_brand_id = brand_id & 0xff;
3665     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
3666     (c->x86 == 0xf)) {
3667     /* AMD Extended Family and Model Values */
3668     c->x86 += (tfms >> 20) & 0xff;
3669     c->x86_model += (tfms >> 12) & 0xf0;
3670     }
3671     c->x86_mask = tfms & 15;
3672     } else {
3673     /* Have CPUID level 0 only - unheard of */
3674     c->x86 = 4;
3675     }
3676    
3677 gbeauche 1.16 /* AMD-defined flags: level 0x80000001 */
3678     uae_u32 xlvl;
3679     cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3680     if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3681     if ( xlvl >= 0x80000001 ) {
3682     uae_u32 features;
3683     cpuid(0x80000001, NULL, NULL, NULL, &features);
3684     if (features & (1 << 29)) {
3685     /* Assume x86-64 if long mode is supported */
3686     c->x86_processor = X86_PROCESSOR_K8;
3687     }
3688     }
3689     }
3690    
3691 gbeauche 1.3 /* Canonicalize processor ID */
3692     switch (c->x86) {
3693     case 3:
3694     c->x86_processor = X86_PROCESSOR_I386;
3695     break;
3696     case 4:
3697     c->x86_processor = X86_PROCESSOR_I486;
3698     break;
3699     case 5:
3700     if (c->x86_vendor == X86_VENDOR_AMD)
3701     c->x86_processor = X86_PROCESSOR_K6;
3702     else
3703     c->x86_processor = X86_PROCESSOR_PENTIUM;
3704     break;
3705     case 6:
3706     if (c->x86_vendor == X86_VENDOR_AMD)
3707     c->x86_processor = X86_PROCESSOR_ATHLON;
3708     else
3709     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3710     break;
3711     case 15:
3712     if (c->x86_vendor == X86_VENDOR_INTEL) {
3713 gbeauche 1.16 /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3714 gbeauche 1.3 if (c->x86_brand_id >= 8)
3715     c->x86_processor = X86_PROCESSOR_PENTIUM4;
3716     }
3717 gbeauche 1.16 if (c->x86_vendor == X86_VENDOR_AMD) {
3718     /* Assume an Athlon processor if family == 15 and it was not
3719     detected as an x86-64 so far */
3720     if (c->x86_processor == X86_PROCESSOR_max)
3721     c->x86_processor = X86_PROCESSOR_ATHLON;
3722     }
3723 gbeauche 1.3 break;
3724     }
3725     if (c->x86_processor == X86_PROCESSOR_max) {
3726     fprintf(stderr, "Error: unknown processor type\n");
3727     fprintf(stderr, " Family : %d\n", c->x86);
3728     fprintf(stderr, " Model : %d\n", c->x86_model);
3729     fprintf(stderr, " Mask : %d\n", c->x86_mask);
3730 gbeauche 1.16 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3731 gbeauche 1.3 if (c->x86_brand_id)
3732     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3733     abort();
3734     }
3735    
3736     /* Have CMOV support? */
3737 gbeauche 1.16 have_cmov = c->x86_hwcap & (1 << 15);
3738 gbeauche 1.3
3739     /* Can the host CPU suffer from partial register stalls? */
3740     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3741     #if 1
3742     /* It appears that partial register writes are a bad idea even on
3743 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
3744     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3745 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3746     have_rat_stall = true;
3747 gbeauche 1.1 #endif
3748 gbeauche 1.3
3749     /* Alignments */
3750     if (tune_alignment) {
3751     align_loops = x86_alignments[c->x86_processor].align_loop;
3752     align_jumps = x86_alignments[c->x86_processor].align_jump;
3753     }
3754    
3755     write_log("Max CPUID level=%d Processor is %s [%s]\n",
3756     c->cpuid_level, c->x86_vendor_id,
3757     x86_processor_string_table[c->x86_processor]);
3758 gbeauche 1.1 }
3759    
3760 gbeauche 1.10 static bool target_check_bsf(void)
3761     {
3762     bool mismatch = false;
3763     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3764     for (int g_CF = 0; g_CF <= 1; g_CF++) {
3765     for (int g_OF = 0; g_OF <= 1; g_OF++) {
3766     for (int g_SF = 0; g_SF <= 1; g_SF++) {
3767     for (int value = -1; value <= 1; value++) {
3768     int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3769     int tmp = value;
3770     __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3771 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
3772 gbeauche 1.10 int OF = (flags >> 11) & 1;
3773     int SF = (flags >> 7) & 1;
3774     int ZF = (flags >> 6) & 1;
3775     int CF = flags & 1;
3776     tmp = (value == 0);
3777     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3778     mismatch = true;
3779     }
3780     }}}}
3781     if (mismatch)
3782     write_log("Target CPU defines all flags on BSF instruction\n");
3783     return !mismatch;
3784     }
3785    
3786 gbeauche 1.1
3787     /*************************************************************************
3788     * FPU stuff *
3789     *************************************************************************/
3790    
3791    
3792     static __inline__ void raw_fp_init(void)
3793     {
3794     int i;
3795    
3796     for (i=0;i<N_FREGS;i++)
3797     live.spos[i]=-2;
3798     live.tos=-1; /* Stack is empty */
3799     }
3800    
3801     static __inline__ void raw_fp_cleanup_drop(void)
3802     {
3803     #if 0
3804     /* using FINIT instead of popping all the entries.
3805     Seems to have side effects --- there is display corruption in
3806     Quake when this is used */
3807     if (live.tos>1) {
3808     emit_byte(0x9b);
3809     emit_byte(0xdb);
3810     emit_byte(0xe3);
3811     live.tos=-1;
3812     }
3813     #endif
3814     while (live.tos>=1) {
3815     emit_byte(0xde);
3816     emit_byte(0xd9);
3817     live.tos-=2;
3818     }
3819     while (live.tos>=0) {
3820     emit_byte(0xdd);
3821     emit_byte(0xd8);
3822     live.tos--;
3823     }
3824     raw_fp_init();
3825     }
3826    
3827     static __inline__ void make_tos(int r)
3828     {
3829     int p,q;
3830    
3831     if (live.spos[r]<0) { /* Register not yet on stack */
3832     emit_byte(0xd9);
3833     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3834     live.tos++;
3835     live.spos[r]=live.tos;
3836     live.onstack[live.tos]=r;
3837     return;
3838     }
3839     /* Register is on stack */
3840     if (live.tos==live.spos[r])
3841     return;
3842     p=live.spos[r];
3843     q=live.onstack[live.tos];
3844    
3845     emit_byte(0xd9);
3846     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3847     live.onstack[live.tos]=r;
3848     live.spos[r]=live.tos;
3849     live.onstack[p]=q;
3850     live.spos[q]=p;
3851     }
3852    
3853     static __inline__ void make_tos2(int r, int r2)
3854     {
3855     int q;
3856    
3857     make_tos(r2); /* Put the reg that's supposed to end up in position2
3858     on top */
3859    
3860     if (live.spos[r]<0) { /* Register not yet on stack */
3861     make_tos(r); /* This will extend the stack */
3862     return;
3863     }
3864     /* Register is on stack */
3865     emit_byte(0xd9);
3866     emit_byte(0xc9); /* Move r2 into position 2 */
3867    
3868     q=live.onstack[live.tos-1];
3869     live.onstack[live.tos]=q;
3870     live.spos[q]=live.tos;
3871     live.onstack[live.tos-1]=r2;
3872     live.spos[r2]=live.tos-1;
3873    
3874     make_tos(r); /* And r into 1 */
3875     }
3876    
3877     static __inline__ int stackpos(int r)
3878     {
3879     if (live.spos[r]<0)
3880     abort();
3881     if (live.tos<live.spos[r]) {
3882     printf("Looking for spos for fnreg %d\n",r);
3883     abort();
3884     }
3885     return live.tos-live.spos[r];
3886     }
3887    
3888     static __inline__ void usereg(int r)
3889     {
3890     if (live.spos[r]<0)
3891     make_tos(r);
3892     }
3893    
3894     /* This is called with one FP value in a reg *above* tos, which it will
3895     pop off the stack if necessary */
3896     static __inline__ void tos_make(int r)
3897     {
3898     if (live.spos[r]<0) {
3899     live.tos++;
3900     live.spos[r]=live.tos;
3901     live.onstack[live.tos]=r;
3902     return;
3903     }
3904     emit_byte(0xdd);
3905     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3906     and pop it*/
3907     }
3908    
3909    
3910     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3911     {
3912     make_tos(r);
3913     emit_byte(0xdd);
3914     emit_byte(0x15);
3915     emit_long(m);
3916     }
3917     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3918    
3919     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
3920     {
3921     make_tos(r);
3922     emit_byte(0xdd);
3923     emit_byte(0x1d);
3924     emit_long(m);
3925     live.onstack[live.tos]=-1;
3926     live.tos--;
3927     live.spos[r]=-2;
3928     }
3929     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
3930    
3931     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3932     {
3933     emit_byte(0xdd);
3934     emit_byte(0x05);
3935     emit_long(m);
3936     tos_make(r);
3937     }
3938     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
3939    
3940     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3941     {
3942     emit_byte(0xdb);
3943     emit_byte(0x05);
3944     emit_long(m);
3945     tos_make(r);
3946     }
3947     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
3948    
3949     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
3950     {
3951     make_tos(r);
3952     emit_byte(0xdb);
3953     emit_byte(0x15);
3954     emit_long(m);
3955     }
3956     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
3957    
3958     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
3959     {
3960     emit_byte(0xd9);
3961     emit_byte(0x05);
3962     emit_long(m);
3963     tos_make(r);
3964     }
3965     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
3966    
3967     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
3968     {
3969     make_tos(r);
3970     emit_byte(0xd9);
3971     emit_byte(0x15);
3972     emit_long(m);
3973     }
3974     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
3975    
3976     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3977     {
3978     int rs;
3979    
3980     /* Stupid x87 can't write a long double to mem without popping the
3981     stack! */
3982     usereg(r);
3983     rs=stackpos(r);
3984     emit_byte(0xd9); /* Get a copy to the top of stack */
3985     emit_byte(0xc0+rs);
3986    
3987     emit_byte(0xdb); /* store and pop it */
3988     emit_byte(0x3d);
3989     emit_long(m);
3990     }
3991     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
3992    
3993     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
3994     {
3995     int rs;
3996    
3997     make_tos(r);
3998     emit_byte(0xdb); /* store and pop it */
3999     emit_byte(0x3d);
4000     emit_long(m);
4001     live.onstack[live.tos]=-1;
4002     live.tos--;
4003     live.spos[r]=-2;
4004     }
4005     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4006    
4007     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4008     {
4009     emit_byte(0xdb);
4010     emit_byte(0x2d);
4011     emit_long(m);
4012     tos_make(r);
4013     }
4014     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4015    
4016     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4017     {
4018     emit_byte(0xd9);
4019     emit_byte(0xeb);
4020     tos_make(r);
4021     }
4022     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4023    
4024     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4025     {
4026     emit_byte(0xd9);
4027     emit_byte(0xec);
4028     tos_make(r);
4029     }
4030     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4031    
4032     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4033     {
4034     emit_byte(0xd9);
4035     emit_byte(0xea);
4036     tos_make(r);
4037     }
4038     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4039    
4040     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4041     {
4042     emit_byte(0xd9);
4043     emit_byte(0xed);
4044     tos_make(r);
4045     }
4046     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4047    
4048     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4049     {
4050     emit_byte(0xd9);
4051     emit_byte(0xe8);
4052     tos_make(r);
4053     }
4054     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4055    
4056     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4057     {
4058     emit_byte(0xd9);
4059     emit_byte(0xee);
4060     tos_make(r);
4061     }
4062     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4063    
4064     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4065     {
4066     int ds;
4067    
4068     usereg(s);
4069     ds=stackpos(s);
4070     if (ds==0 && live.spos[d]>=0) {
4071     /* source is on top of stack, and we already have the dest */
4072     int dd=stackpos(d);
4073     emit_byte(0xdd);
4074     emit_byte(0xd0+dd);
4075     }
4076     else {
4077     emit_byte(0xd9);
4078     emit_byte(0xc0+ds); /* duplicate source on tos */
4079     tos_make(d); /* store to destination, pop if necessary */
4080     }
4081     }
4082     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4083    
4084     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4085     {
4086     emit_byte(0xd9);
4087     emit_byte(0xa8+index);
4088     emit_long(base);
4089     }
4090     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4091    
4092    
4093     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4094     {
4095     int ds;
4096    
4097     if (d!=s) {
4098     usereg(s);
4099     ds=stackpos(s);
4100     emit_byte(0xd9);
4101     emit_byte(0xc0+ds); /* duplicate source */
4102     emit_byte(0xd9);
4103     emit_byte(0xfa); /* take square root */
4104     tos_make(d); /* store to destination */
4105     }
4106     else {
4107     make_tos(d);
4108     emit_byte(0xd9);
4109     emit_byte(0xfa); /* take square root */
4110     }
4111     }
4112     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4113    
4114     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4115     {
4116     int ds;
4117    
4118     if (d!=s) {
4119     usereg(s);
4120     ds=stackpos(s);
4121     emit_byte(0xd9);
4122     emit_byte(0xc0+ds); /* duplicate source */
4123     emit_byte(0xd9);
4124     emit_byte(0xe1); /* take fabs */
4125     tos_make(d); /* store to destination */
4126     }
4127     else {
4128     make_tos(d);
4129     emit_byte(0xd9);
4130     emit_byte(0xe1); /* take fabs */
4131     }
4132     }
4133     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4134    
4135     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4136     {
4137     int ds;
4138    
4139     if (d!=s) {
4140     usereg(s);
4141     ds=stackpos(s);
4142     emit_byte(0xd9);
4143     emit_byte(0xc0+ds); /* duplicate source */
4144     emit_byte(0xd9);
4145     emit_byte(0xfc); /* take frndint */
4146     tos_make(d); /* store to destination */
4147     }
4148     else {
4149     make_tos(d);
4150     emit_byte(0xd9);
4151     emit_byte(0xfc); /* take frndint */
4152     }
4153     }
4154     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4155    
4156     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4157     {
4158     int ds;
4159    
4160     if (d!=s) {
4161     usereg(s);
4162     ds=stackpos(s);
4163     emit_byte(0xd9);
4164     emit_byte(0xc0+ds); /* duplicate source */
4165     emit_byte(0xd9);
4166     emit_byte(0xff); /* take cos */
4167     tos_make(d); /* store to destination */
4168     }
4169     else {
4170     make_tos(d);
4171     emit_byte(0xd9);
4172     emit_byte(0xff); /* take cos */
4173     }
4174     }
4175     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4176    
4177     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4178     {
4179     int ds;
4180    
4181     if (d!=s) {
4182     usereg(s);
4183     ds=stackpos(s);
4184     emit_byte(0xd9);
4185     emit_byte(0xc0+ds); /* duplicate source */
4186     emit_byte(0xd9);
4187     emit_byte(0xfe); /* take sin */
4188     tos_make(d); /* store to destination */
4189     }
4190     else {
4191     make_tos(d);
4192     emit_byte(0xd9);
4193     emit_byte(0xfe); /* take sin */
4194     }
4195     }
4196     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4197    
4198     double one=1;
4199     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4200     {
4201     int ds;
4202    
4203     usereg(s);
4204     ds=stackpos(s);
4205     emit_byte(0xd9);
4206     emit_byte(0xc0+ds); /* duplicate source */
4207    
4208     emit_byte(0xd9);
4209     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4210     emit_byte(0xd9);
4211     emit_byte(0xfc); /* rndint */
4212     emit_byte(0xd9);
4213     emit_byte(0xc9); /* swap top two elements */
4214     emit_byte(0xd8);
4215     emit_byte(0xe1); /* subtract rounded from original */
4216     emit_byte(0xd9);
4217     emit_byte(0xf0); /* f2xm1 */
4218     emit_byte(0xdc);
4219     emit_byte(0x05);
4220 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4221 gbeauche 1.1 emit_byte(0xd9);
4222     emit_byte(0xfd); /* and scale it */
4223     emit_byte(0xdd);
4224     emit_byte(0xd9); /* take he rounded value off */
4225     tos_make(d); /* store to destination */
4226     }
4227     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4228    
4229     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4230     {
4231     int ds;
4232    
4233     usereg(s);
4234     ds=stackpos(s);
4235     emit_byte(0xd9);
4236     emit_byte(0xc0+ds); /* duplicate source */
4237     emit_byte(0xd9);
4238     emit_byte(0xea); /* fldl2e */
4239     emit_byte(0xde);
4240     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4241    
4242     emit_byte(0xd9);
4243     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4244     emit_byte(0xd9);
4245     emit_byte(0xfc); /* rndint */
4246     emit_byte(0xd9);
4247     emit_byte(0xc9); /* swap top two elements */
4248     emit_byte(0xd8);
4249     emit_byte(0xe1); /* subtract rounded from original */
4250     emit_byte(0xd9);
4251     emit_byte(0xf0); /* f2xm1 */
4252     emit_byte(0xdc);
4253     emit_byte(0x05);
4254 gbeauche 1.20 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4255 gbeauche 1.1 emit_byte(0xd9);
4256     emit_byte(0xfd); /* and scale it */
4257     emit_byte(0xdd);
4258     emit_byte(0xd9); /* take he rounded value off */
4259     tos_make(d); /* store to destination */
4260     }
4261     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4262    
4263     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4264     {
4265     int ds;
4266    
4267     usereg(s);
4268     ds=stackpos(s);
4269     emit_byte(0xd9);
4270     emit_byte(0xc0+ds); /* duplicate source */
4271     emit_byte(0xd9);
4272     emit_byte(0xe8); /* push '1' */
4273     emit_byte(0xd9);
4274     emit_byte(0xc9); /* swap top two */
4275     emit_byte(0xd9);
4276     emit_byte(0xf1); /* take 1*log2(x) */
4277     tos_make(d); /* store to destination */
4278     }
4279     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4280    
4281    
4282     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4283     {
4284     int ds;
4285    
4286     if (d!=s) {
4287     usereg(s);
4288     ds=stackpos(s);
4289     emit_byte(0xd9);
4290     emit_byte(0xc0+ds); /* duplicate source */
4291     emit_byte(0xd9);
4292     emit_byte(0xe0); /* take fchs */
4293     tos_make(d); /* store to destination */
4294     }
4295     else {
4296     make_tos(d);
4297     emit_byte(0xd9);
4298     emit_byte(0xe0); /* take fchs */
4299     }
4300     }
4301     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4302    
4303     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4304     {
4305     int ds;
4306    
4307     usereg(s);
4308     usereg(d);
4309    
4310     if (live.spos[s]==live.tos) {
4311     /* Source is on top of stack */
4312     ds=stackpos(d);
4313     emit_byte(0xdc);
4314     emit_byte(0xc0+ds); /* add source to dest*/
4315     }
4316     else {
4317     make_tos(d);
4318     ds=stackpos(s);
4319    
4320     emit_byte(0xd8);
4321     emit_byte(0xc0+ds); /* add source to dest*/
4322     }
4323     }
4324     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4325    
4326     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4327     {
4328     int ds;
4329    
4330     usereg(s);
4331     usereg(d);
4332    
4333     if (live.spos[s]==live.tos) {
4334     /* Source is on top of stack */
4335     ds=stackpos(d);
4336     emit_byte(0xdc);
4337     emit_byte(0xe8+ds); /* sub source from dest*/
4338     }
4339     else {
4340     make_tos(d);
4341     ds=stackpos(s);
4342    
4343     emit_byte(0xd8);
4344     emit_byte(0xe0+ds); /* sub src from dest */
4345     }
4346     }
4347     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4348    
4349     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4350     {
4351     int ds;
4352    
4353     usereg(s);
4354     usereg(d);
4355    
4356     make_tos(d);
4357     ds=stackpos(s);
4358    
4359     emit_byte(0xdd);
4360     emit_byte(0xe0+ds); /* cmp dest with source*/
4361     }
4362     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4363    
4364     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4365     {
4366     int ds;
4367    
4368     usereg(s);
4369     usereg(d);
4370    
4371     if (live.spos[s]==live.tos) {
4372     /* Source is on top of stack */
4373     ds=stackpos(d);
4374     emit_byte(0xdc);
4375     emit_byte(0xc8+ds); /* mul dest by source*/
4376     }
4377     else {
4378     make_tos(d);
4379     ds=stackpos(s);
4380    
4381     emit_byte(0xd8);
4382     emit_byte(0xc8+ds); /* mul dest by source*/
4383     }
4384     }
4385     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4386    
4387     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4388     {
4389     int ds;
4390    
4391     usereg(s);
4392     usereg(d);
4393    
4394     if (live.spos[s]==live.tos) {
4395     /* Source is on top of stack */
4396     ds=stackpos(d);
4397     emit_byte(0xdc);
4398     emit_byte(0xf8+ds); /* div dest by source */
4399     }
4400     else {
4401     make_tos(d);
4402     ds=stackpos(s);
4403    
4404     emit_byte(0xd8);
4405     emit_byte(0xf0+ds); /* div dest by source*/
4406     }
4407     }
4408     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4409    
4410     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4411     {
4412     int ds;
4413    
4414     usereg(s);
4415     usereg(d);
4416    
4417     make_tos2(d,s);
4418     ds=stackpos(s);
4419    
4420     if (ds!=1) {
4421     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4422     abort();
4423     }
4424     emit_byte(0xd9);
4425     emit_byte(0xf8); /* take rem from dest by source */
4426     }
4427     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4428    
4429     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4430     {
4431     int ds;
4432    
4433     usereg(s);
4434     usereg(d);
4435    
4436     make_tos2(d,s);
4437     ds=stackpos(s);
4438    
4439     if (ds!=1) {
4440     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4441     abort();
4442     }
4443     emit_byte(0xd9);
4444     emit_byte(0xf5); /* take rem1 from dest by source */
4445     }
4446     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4447    
4448    
4449     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4450     {
4451     make_tos(r);
4452     emit_byte(0xd9); /* ftst */
4453     emit_byte(0xe4);
4454     }
4455     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4456    
4457     /* %eax register is clobbered if target processor doesn't support fucomi */
4458     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4459     #define FFLAG_NREG EAX_INDEX
4460    
4461     static __inline__ void raw_fflags_into_flags(int r)
4462     {
4463     int p;
4464    
4465     usereg(r);
4466     p=stackpos(r);
4467    
4468     emit_byte(0xd9);
4469     emit_byte(0xee); /* Push 0 */
4470     emit_byte(0xd9);
4471     emit_byte(0xc9+p); /* swap top two around */
4472     if (have_cmov) {
4473     // gb-- fucomi is for P6 cores only, not K6-2 then...
4474     emit_byte(0xdb);
4475     emit_byte(0xe9+p); /* fucomi them */
4476     }
4477     else {
4478     emit_byte(0xdd);
4479     emit_byte(0xe1+p); /* fucom them */
4480     emit_byte(0x9b);
4481     emit_byte(0xdf);
4482     emit_byte(0xe0); /* fstsw ax */
4483     raw_sahf(0); /* sahf */
4484     }
4485     emit_byte(0xdd);
4486     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4487     }