ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.10
Committed: 2003-03-13T15:57:01Z (21 years, 7 months ago) by gbeauche
Branch: MAIN
Changes since 1.9: +26 -0 lines
Log Message:
Workaround change in flags handling for BSF instruction on Pentium 4.
i.e. currently disable translation of ADDX/SUBX/B<CHG,CLR,SET,TST> instructions
in that case. That is to say, better (much?) slower than inaccurate. :-(

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6     * Adaptation for Basilisk II and improvements, copyright 2000-2002
7     * Gwenole Beauchesne
8     *
9     * Basilisk II (C) 1997-2002 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45    
46     /* The register in which subroutines return an integer return value */
47     #define REG_RESULT 0
48    
49     /* The registers subroutines take their first and second argument in */
50     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
51     /* Handle the _fastcall parameters of ECX and EDX */
52     #define REG_PAR1 1
53     #define REG_PAR2 2
54     #else
55     #define REG_PAR1 0
56     #define REG_PAR2 2
57     #endif
58    
59     /* Three registers that are not used for any of the above */
60     #define REG_NOPAR1 6
61     #define REG_NOPAR2 5
62     #define REG_NOPAR3 3
63    
64     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     #define REG_PC_TMP 0
67     #else
68     #define REG_PC_TMP 1 /* Another register that is not the above */
69     #endif
70    
71     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
72     -1 if any reg will do */
73     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
74     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
75    
76     uae_s8 always_used[]={4,-1};
77     uae_s8 can_byte[]={0,1,2,3,-1};
78     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
79    
80     /* cpuopti mutate instruction handlers to assume registers are saved
81     by the caller */
82     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
83    
84     /* This *should* be the same as call_saved. But:
85     - We might not really know which registers are saved, and which aren't,
86     so we need to preserve some, but don't want to rely on everyone else
87     also saving those registers
88     - Special registers (such like the stack pointer) should not be "preserved"
89     by pushing, even though they are "saved" across function calls
90     */
91     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
92    
93     /* Whether classes of instructions do or don't clobber the native flags */
94     #define CLOBBER_MOV
95     #define CLOBBER_LEA
96     #define CLOBBER_CMOV
97     #define CLOBBER_POP
98     #define CLOBBER_PUSH
99     #define CLOBBER_SUB clobber_flags()
100     #define CLOBBER_SBB clobber_flags()
101     #define CLOBBER_CMP clobber_flags()
102     #define CLOBBER_ADD clobber_flags()
103     #define CLOBBER_ADC clobber_flags()
104     #define CLOBBER_AND clobber_flags()
105     #define CLOBBER_OR clobber_flags()
106     #define CLOBBER_XOR clobber_flags()
107    
108     #define CLOBBER_ROL clobber_flags()
109     #define CLOBBER_ROR clobber_flags()
110     #define CLOBBER_SHLL clobber_flags()
111     #define CLOBBER_SHRL clobber_flags()
112     #define CLOBBER_SHRA clobber_flags()
113     #define CLOBBER_TEST clobber_flags()
114     #define CLOBBER_CL16
115     #define CLOBBER_CL8
116     #define CLOBBER_SE16
117     #define CLOBBER_SE8
118     #define CLOBBER_ZE16
119     #define CLOBBER_ZE8
120     #define CLOBBER_SW16 clobber_flags()
121     #define CLOBBER_SW32
122     #define CLOBBER_SETCC
123     #define CLOBBER_MUL clobber_flags()
124     #define CLOBBER_BT clobber_flags()
125     #define CLOBBER_BSF clobber_flags()
126    
127 gbeauche 1.2 const bool optimize_accum = true;
128 gbeauche 1.1 const bool optimize_imm8 = true;
129     const bool optimize_shift_once = true;
130    
131     /*************************************************************************
132     * Actual encoding of the instructions on the target CPU *
133     *************************************************************************/
134    
135 gbeauche 1.2 static __inline__ int isaccum(int r)
136     {
137     return (r == EAX_INDEX);
138     }
139    
140 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
141     {
142     return (x>=-128 && x<=127);
143     }
144    
145     static __inline__ int isword(uae_s32 x)
146     {
147     return (x>=-32768 && x<=32767);
148     }
149    
150     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
151     {
152     emit_byte(0x50+r);
153     }
154     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
155    
156     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
157     {
158     emit_byte(0x58+r);
159     }
160     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
161    
162     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
163     {
164     emit_byte(0x0f);
165     emit_byte(0xba);
166     emit_byte(0xe0+r);
167     emit_byte(i);
168     }
169     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
170    
171     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
172     {
173     emit_byte(0x0f);
174     emit_byte(0xa3);
175     emit_byte(0xc0+8*b+r);
176     }
177     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
178    
179     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
180     {
181     emit_byte(0x0f);
182     emit_byte(0xba);
183     emit_byte(0xf8+r);
184     emit_byte(i);
185     }
186     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
187    
188     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
189     {
190     emit_byte(0x0f);
191     emit_byte(0xbb);
192     emit_byte(0xc0+8*b+r);
193     }
194     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
195    
196    
197     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
198     {
199     emit_byte(0x0f);
200     emit_byte(0xba);
201     emit_byte(0xf0+r);
202     emit_byte(i);
203     }
204     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
205    
206     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
207     {
208     emit_byte(0x0f);
209     emit_byte(0xb3);
210     emit_byte(0xc0+8*b+r);
211     }
212     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
213    
214     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
215     {
216     emit_byte(0x0f);
217     emit_byte(0xba);
218     emit_byte(0xe8+r);
219     emit_byte(i);
220     }
221     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
222    
223     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
224     {
225     emit_byte(0x0f);
226     emit_byte(0xab);
227     emit_byte(0xc0+8*b+r);
228     }
229     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
230    
231     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
232     {
233     emit_byte(0x66);
234     if (isbyte(i)) {
235     emit_byte(0x83);
236     emit_byte(0xe8+d);
237     emit_byte(i);
238     }
239     else {
240 gbeauche 1.2 if (optimize_accum && isaccum(d))
241     emit_byte(0x2d);
242     else {
243 gbeauche 1.1 emit_byte(0x81);
244     emit_byte(0xe8+d);
245 gbeauche 1.2 }
246 gbeauche 1.1 emit_word(i);
247     }
248     }
249     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
250    
251    
252     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
253     {
254     emit_byte(0x8b);
255     emit_byte(0x05+8*d);
256     emit_long(s);
257     }
258     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
259    
260     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
261     {
262     emit_byte(0xc7);
263     emit_byte(0x05);
264     emit_long(d);
265     emit_long(s);
266     }
267     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
268    
269     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
270     {
271     emit_byte(0x66);
272     emit_byte(0xc7);
273     emit_byte(0x05);
274     emit_long(d);
275     emit_word(s);
276     }
277     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
278    
279     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
280     {
281     emit_byte(0xc6);
282     emit_byte(0x05);
283     emit_long(d);
284     emit_byte(s);
285     }
286     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
287    
288     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
289     {
290     if (optimize_shift_once && (i == 1)) {
291     emit_byte(0xd0);
292     emit_byte(0x05);
293     emit_long(d);
294     }
295     else {
296     emit_byte(0xc0);
297     emit_byte(0x05);
298     emit_long(d);
299     emit_byte(i);
300     }
301     }
302     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
303    
304     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
305     {
306     if (optimize_shift_once && (i == 1)) {
307     emit_byte(0xd0);
308     emit_byte(0xc0+r);
309     }
310     else {
311     emit_byte(0xc0);
312     emit_byte(0xc0+r);
313     emit_byte(i);
314     }
315     }
316     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
317    
318     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
319     {
320     emit_byte(0x66);
321     emit_byte(0xc1);
322     emit_byte(0xc0+r);
323     emit_byte(i);
324     }
325     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
326    
327     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
328     {
329     if (optimize_shift_once && (i == 1)) {
330     emit_byte(0xd1);
331     emit_byte(0xc0+r);
332     }
333     else {
334     emit_byte(0xc1);
335     emit_byte(0xc0+r);
336     emit_byte(i);
337     }
338     }
339     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
340    
341     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
342     {
343     emit_byte(0xd3);
344     emit_byte(0xc0+d);
345     }
346     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
347    
348     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
349     {
350     emit_byte(0x66);
351     emit_byte(0xd3);
352     emit_byte(0xc0+d);
353     }
354     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
355    
356     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
357     {
358     emit_byte(0xd2);
359     emit_byte(0xc0+d);
360     }
361     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
362    
363     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
364     {
365     emit_byte(0xd3);
366     emit_byte(0xe0+d);
367     }
368     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
369    
370     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
371     {
372     emit_byte(0x66);
373     emit_byte(0xd3);
374     emit_byte(0xe0+d);
375     }
376     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
377    
378     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
379     {
380     emit_byte(0xd2);
381     emit_byte(0xe0+d);
382     }
383     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
384    
385     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
386     {
387     if (optimize_shift_once && (i == 1)) {
388     emit_byte(0xd0);
389     emit_byte(0xc8+r);
390     }
391     else {
392     emit_byte(0xc0);
393     emit_byte(0xc8+r);
394     emit_byte(i);
395     }
396     }
397     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
398    
399     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
400     {
401     emit_byte(0x66);
402     emit_byte(0xc1);
403     emit_byte(0xc8+r);
404     emit_byte(i);
405     }
406     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
407    
408     // gb-- used for making an fpcr value in compemu_fpp.cpp
409     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
410     {
411     emit_byte(0x0b);
412     emit_byte(0x05+8*d);
413     emit_long(s);
414     }
415     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
416    
417     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
418     {
419     if (optimize_shift_once && (i == 1)) {
420     emit_byte(0xd1);
421     emit_byte(0xc8+r);
422     }
423     else {
424     emit_byte(0xc1);
425     emit_byte(0xc8+r);
426     emit_byte(i);
427     }
428     }
429     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
430    
431     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
432     {
433     emit_byte(0xd3);
434     emit_byte(0xc8+d);
435     }
436     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
437    
438     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
439     {
440     emit_byte(0x66);
441     emit_byte(0xd3);
442     emit_byte(0xc8+d);
443     }
444     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
445    
446     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
447     {
448     emit_byte(0xd2);
449     emit_byte(0xc8+d);
450     }
451     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
452    
453     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
454     {
455     emit_byte(0xd3);
456     emit_byte(0xe8+d);
457     }
458     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
459    
460     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
461     {
462     emit_byte(0x66);
463     emit_byte(0xd3);
464     emit_byte(0xe8+d);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
469     {
470     emit_byte(0xd2);
471     emit_byte(0xe8+d);
472     }
473     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
474    
475     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
476     {
477     emit_byte(0xd3);
478     emit_byte(0xf8+d);
479     }
480     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
481    
482     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
483     {
484     emit_byte(0x66);
485     emit_byte(0xd3);
486     emit_byte(0xf8+d);
487     }
488     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
489    
490     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
491     {
492     emit_byte(0xd2);
493     emit_byte(0xf8+d);
494     }
495     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
496    
497     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
498     {
499     if (optimize_shift_once && (i == 1)) {
500     emit_byte(0xd1);
501     emit_byte(0xe0+r);
502     }
503     else {
504     emit_byte(0xc1);
505     emit_byte(0xe0+r);
506     emit_byte(i);
507     }
508     }
509     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
510    
511     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
512     {
513     emit_byte(0x66);
514     emit_byte(0xc1);
515     emit_byte(0xe0+r);
516     emit_byte(i);
517     }
518     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
519    
520     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
521     {
522     if (optimize_shift_once && (i == 1)) {
523     emit_byte(0xd0);
524     emit_byte(0xe0+r);
525     }
526     else {
527     emit_byte(0xc0);
528     emit_byte(0xe0+r);
529     emit_byte(i);
530     }
531     }
532     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
533    
534     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
535     {
536     if (optimize_shift_once && (i == 1)) {
537     emit_byte(0xd1);
538     emit_byte(0xe8+r);
539     }
540     else {
541     emit_byte(0xc1);
542     emit_byte(0xe8+r);
543     emit_byte(i);
544     }
545     }
546     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
547    
548     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
549     {
550     emit_byte(0x66);
551     emit_byte(0xc1);
552     emit_byte(0xe8+r);
553     emit_byte(i);
554     }
555     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
556    
557     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
558     {
559     if (optimize_shift_once && (i == 1)) {
560     emit_byte(0xd0);
561     emit_byte(0xe8+r);
562     }
563     else {
564     emit_byte(0xc0);
565     emit_byte(0xe8+r);
566     emit_byte(i);
567     }
568     }
569     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
570    
571     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
572     {
573     if (optimize_shift_once && (i == 1)) {
574     emit_byte(0xd1);
575     emit_byte(0xf8+r);
576     }
577     else {
578     emit_byte(0xc1);
579     emit_byte(0xf8+r);
580     emit_byte(i);
581     }
582     }
583     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
584    
585     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
586     {
587     emit_byte(0x66);
588     emit_byte(0xc1);
589     emit_byte(0xf8+r);
590     emit_byte(i);
591     }
592     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
593    
594     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
595     {
596     if (optimize_shift_once && (i == 1)) {
597     emit_byte(0xd0);
598     emit_byte(0xf8+r);
599     }
600     else {
601     emit_byte(0xc0);
602     emit_byte(0xf8+r);
603     emit_byte(i);
604     }
605     }
606     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
607    
608     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
609     {
610     emit_byte(0x9e);
611     }
612     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
613    
614     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
615     {
616     emit_byte(0x0f);
617     emit_byte(0xa2);
618     }
619     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
620    
621     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
622     {
623     emit_byte(0x9f);
624     }
625     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
626    
627     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
628     {
629     emit_byte(0x0f);
630     emit_byte(0x90+cc);
631     emit_byte(0xc0+d);
632     }
633     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
634    
635     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
636     {
637     emit_byte(0x0f);
638     emit_byte(0x90+cc);
639     emit_byte(0x05);
640     emit_long(d);
641     }
642     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
643    
644     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
645     {
646     if (have_cmov) {
647     emit_byte(0x0f);
648     emit_byte(0x40+cc);
649     emit_byte(0xc0+8*d+s);
650     }
651     else { /* replacement using branch and mov */
652     int uncc=(cc^1);
653     emit_byte(0x70+uncc);
654     emit_byte(2); /* skip next 2 bytes if not cc=true */
655     emit_byte(0x89);
656     emit_byte(0xc0+8*s+d);
657     }
658     }
659     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
660    
661     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
662     {
663     emit_byte(0x0f);
664     emit_byte(0xbc);
665     emit_byte(0xc0+8*d+s);
666     }
667     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
668    
669     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
670     {
671     emit_byte(0x0f);
672     emit_byte(0xbf);
673     emit_byte(0xc0+8*d+s);
674     }
675     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
676    
677     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
678     {
679     emit_byte(0x0f);
680     emit_byte(0xbe);
681     emit_byte(0xc0+8*d+s);
682     }
683     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
684    
685     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
686     {
687     emit_byte(0x0f);
688     emit_byte(0xb7);
689     emit_byte(0xc0+8*d+s);
690     }
691     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
692    
693     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
694     {
695     emit_byte(0x0f);
696     emit_byte(0xb6);
697     emit_byte(0xc0+8*d+s);
698     }
699     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
700    
701     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
702     {
703     emit_byte(0x0f);
704     emit_byte(0xaf);
705     emit_byte(0xc0+8*d+s);
706     }
707     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
708    
709     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
710     {
711     if (d!=MUL_NREG1 || s!=MUL_NREG2)
712     abort();
713     emit_byte(0xf7);
714     emit_byte(0xea);
715     }
716     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
717    
718     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
719     {
720     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
721     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
722     abort();
723     }
724     emit_byte(0xf7);
725     emit_byte(0xe2);
726     }
727     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
728    
729     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
730     {
731     abort(); /* %^$&%^$%#^ x86! */
732     emit_byte(0x0f);
733     emit_byte(0xaf);
734     emit_byte(0xc0+8*d+s);
735     }
736     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
737    
738     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
739     {
740     emit_byte(0x88);
741     emit_byte(0xc0+8*s+d);
742     }
743     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
744    
745     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
746     {
747     emit_byte(0x66);
748     emit_byte(0x89);
749     emit_byte(0xc0+8*s+d);
750     }
751     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
752    
753     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
754     {
755     int isebp=(baser==5)?0x40:0;
756     int fi;
757    
758     switch(factor) {
759     case 1: fi=0; break;
760     case 2: fi=1; break;
761     case 4: fi=2; break;
762     case 8: fi=3; break;
763     default: abort();
764     }
765    
766    
767     emit_byte(0x8b);
768     emit_byte(0x04+8*d+isebp);
769     emit_byte(baser+8*index+0x40*fi);
770     if (isebp)
771     emit_byte(0x00);
772     }
773     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
774    
775     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
776     {
777     int fi;
778     int isebp;
779    
780     switch(factor) {
781     case 1: fi=0; break;
782     case 2: fi=1; break;
783     case 4: fi=2; break;
784     case 8: fi=3; break;
785     default: abort();
786     }
787     isebp=(baser==5)?0x40:0;
788    
789     emit_byte(0x66);
790     emit_byte(0x8b);
791     emit_byte(0x04+8*d+isebp);
792     emit_byte(baser+8*index+0x40*fi);
793     if (isebp)
794     emit_byte(0x00);
795     }
796     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
797    
798     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
799     {
800     int fi;
801     int isebp;
802    
803     switch(factor) {
804     case 1: fi=0; break;
805     case 2: fi=1; break;
806     case 4: fi=2; break;
807     case 8: fi=3; break;
808     default: abort();
809     }
810     isebp=(baser==5)?0x40:0;
811    
812     emit_byte(0x8a);
813     emit_byte(0x04+8*d+isebp);
814     emit_byte(baser+8*index+0x40*fi);
815     if (isebp)
816     emit_byte(0x00);
817     }
818     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
819    
820     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
821     {
822     int fi;
823     int isebp;
824    
825     switch(factor) {
826     case 1: fi=0; break;
827     case 2: fi=1; break;
828     case 4: fi=2; break;
829     case 8: fi=3; break;
830     default: abort();
831     }
832    
833    
834     isebp=(baser==5)?0x40:0;
835    
836     emit_byte(0x89);
837     emit_byte(0x04+8*s+isebp);
838     emit_byte(baser+8*index+0x40*fi);
839     if (isebp)
840     emit_byte(0x00);
841     }
842     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
843    
844     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
845     {
846     int fi;
847     int isebp;
848    
849     switch(factor) {
850     case 1: fi=0; break;
851     case 2: fi=1; break;
852     case 4: fi=2; break;
853     case 8: fi=3; break;
854     default: abort();
855     }
856     isebp=(baser==5)?0x40:0;
857    
858     emit_byte(0x66);
859     emit_byte(0x89);
860     emit_byte(0x04+8*s+isebp);
861     emit_byte(baser+8*index+0x40*fi);
862     if (isebp)
863     emit_byte(0x00);
864     }
865     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
866    
867     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
868     {
869     int fi;
870     int isebp;
871    
872     switch(factor) {
873     case 1: fi=0; break;
874     case 2: fi=1; break;
875     case 4: fi=2; break;
876     case 8: fi=3; break;
877     default: abort();
878     }
879     isebp=(baser==5)?0x40:0;
880    
881     emit_byte(0x88);
882     emit_byte(0x04+8*s+isebp);
883     emit_byte(baser+8*index+0x40*fi);
884     if (isebp)
885     emit_byte(0x00);
886     }
887     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
888    
889     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
890     {
891     int fi;
892    
893     switch(factor) {
894     case 1: fi=0; break;
895     case 2: fi=1; break;
896     case 4: fi=2; break;
897     case 8: fi=3; break;
898     default: abort();
899     }
900    
901     emit_byte(0x89);
902     emit_byte(0x84+8*s);
903     emit_byte(baser+8*index+0x40*fi);
904     emit_long(base);
905     }
906     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
907    
908     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
909     {
910     int fi;
911    
912     switch(factor) {
913     case 1: fi=0; break;
914     case 2: fi=1; break;
915     case 4: fi=2; break;
916     case 8: fi=3; break;
917     default: abort();
918     }
919    
920     emit_byte(0x66);
921     emit_byte(0x89);
922     emit_byte(0x84+8*s);
923     emit_byte(baser+8*index+0x40*fi);
924     emit_long(base);
925     }
926     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
927    
928     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
929     {
930     int fi;
931    
932     switch(factor) {
933     case 1: fi=0; break;
934     case 2: fi=1; break;
935     case 4: fi=2; break;
936     case 8: fi=3; break;
937     default: abort();
938     }
939    
940     emit_byte(0x88);
941     emit_byte(0x84+8*s);
942     emit_byte(baser+8*index+0x40*fi);
943     emit_long(base);
944     }
945     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
946    
947     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
948     {
949     int fi;
950    
951     switch(factor) {
952     case 1: fi=0; break;
953     case 2: fi=1; break;
954     case 4: fi=2; break;
955     case 8: fi=3; break;
956     default: abort();
957     }
958    
959     emit_byte(0x8b);
960     emit_byte(0x84+8*d);
961     emit_byte(baser+8*index+0x40*fi);
962     emit_long(base);
963     }
964     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
965    
966     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
967     {
968     int fi;
969    
970     switch(factor) {
971     case 1: fi=0; break;
972     case 2: fi=1; break;
973     case 4: fi=2; break;
974     case 8: fi=3; break;
975     default: abort();
976     }
977    
978     emit_byte(0x66);
979     emit_byte(0x8b);
980     emit_byte(0x84+8*d);
981     emit_byte(baser+8*index+0x40*fi);
982     emit_long(base);
983     }
984     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
985    
986     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
987     {
988     int fi;
989    
990     switch(factor) {
991     case 1: fi=0; break;
992     case 2: fi=1; break;
993     case 4: fi=2; break;
994     case 8: fi=3; break;
995     default: abort();
996     }
997    
998     emit_byte(0x8a);
999     emit_byte(0x84+8*d);
1000     emit_byte(baser+8*index+0x40*fi);
1001     emit_long(base);
1002     }
1003     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
1004    
1005     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1006     {
1007     int fi;
1008     switch(factor) {
1009     case 1: fi=0; break;
1010     case 2: fi=1; break;
1011     case 4: fi=2; break;
1012     case 8: fi=3; break;
1013     default:
1014     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1015     abort();
1016     }
1017     emit_byte(0x8b);
1018     emit_byte(0x04+8*d);
1019     emit_byte(0x05+8*index+64*fi);
1020     emit_long(base);
1021     }
1022     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1023    
1024     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1025     {
1026     int fi;
1027     switch(factor) {
1028     case 1: fi=0; break;
1029     case 2: fi=1; break;
1030     case 4: fi=2; break;
1031     case 8: fi=3; break;
1032     default:
1033     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1034     abort();
1035     }
1036     if (have_cmov) {
1037     emit_byte(0x0f);
1038     emit_byte(0x40+cond);
1039     emit_byte(0x04+8*d);
1040     emit_byte(0x05+8*index+64*fi);
1041     emit_long(base);
1042     }
1043     else { /* replacement using branch and mov */
1044     int uncc=(cond^1);
1045     emit_byte(0x70+uncc);
1046     emit_byte(7); /* skip next 7 bytes if not cc=true */
1047     emit_byte(0x8b);
1048     emit_byte(0x04+8*d);
1049     emit_byte(0x05+8*index+64*fi);
1050     emit_long(base);
1051     }
1052     }
1053     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1054    
1055     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1056     {
1057     if (have_cmov) {
1058     emit_byte(0x0f);
1059     emit_byte(0x40+cond);
1060     emit_byte(0x05+8*d);
1061     emit_long(mem);
1062     }
1063     else { /* replacement using branch and mov */
1064     int uncc=(cond^1);
1065     emit_byte(0x70+uncc);
1066     emit_byte(6); /* skip next 6 bytes if not cc=true */
1067     emit_byte(0x8b);
1068     emit_byte(0x05+8*d);
1069     emit_long(mem);
1070     }
1071     }
1072     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1073    
1074     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1075     {
1076 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1077 gbeauche 1.1 emit_byte(0x8b);
1078     emit_byte(0x40+8*d+s);
1079     emit_byte(offset);
1080     }
1081     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1082    
1083     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1084     {
1085 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1086 gbeauche 1.1 emit_byte(0x66);
1087     emit_byte(0x8b);
1088     emit_byte(0x40+8*d+s);
1089     emit_byte(offset);
1090     }
1091     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1092    
1093     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1094     {
1095 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1096 gbeauche 1.1 emit_byte(0x8a);
1097     emit_byte(0x40+8*d+s);
1098     emit_byte(offset);
1099     }
1100     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1101    
1102     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1103     {
1104     emit_byte(0x8b);
1105     emit_byte(0x80+8*d+s);
1106     emit_long(offset);
1107     }
1108     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1109    
1110     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1111     {
1112     emit_byte(0x66);
1113     emit_byte(0x8b);
1114     emit_byte(0x80+8*d+s);
1115     emit_long(offset);
1116     }
1117     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1118    
1119     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1120     {
1121     emit_byte(0x8a);
1122     emit_byte(0x80+8*d+s);
1123     emit_long(offset);
1124     }
1125     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1126    
1127     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1128     {
1129 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1130 gbeauche 1.1 emit_byte(0xc7);
1131     emit_byte(0x40+d);
1132     emit_byte(offset);
1133     emit_long(i);
1134     }
1135     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1136    
1137     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1138     {
1139 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1140 gbeauche 1.1 emit_byte(0x66);
1141     emit_byte(0xc7);
1142     emit_byte(0x40+d);
1143     emit_byte(offset);
1144     emit_word(i);
1145     }
1146     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1147    
1148     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1149     {
1150 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1151 gbeauche 1.1 emit_byte(0xc6);
1152     emit_byte(0x40+d);
1153     emit_byte(offset);
1154     emit_byte(i);
1155     }
1156     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1157    
1158     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1159     {
1160 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1161 gbeauche 1.1 emit_byte(0x89);
1162     emit_byte(0x40+8*s+d);
1163     emit_byte(offset);
1164     }
1165     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1166    
1167     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1168     {
1169 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1170 gbeauche 1.1 emit_byte(0x66);
1171     emit_byte(0x89);
1172     emit_byte(0x40+8*s+d);
1173     emit_byte(offset);
1174     }
1175     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1176    
1177     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1178     {
1179 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1180 gbeauche 1.1 emit_byte(0x88);
1181     emit_byte(0x40+8*s+d);
1182     emit_byte(offset);
1183     }
1184     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1185    
1186     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1187     {
1188     if (optimize_imm8 && isbyte(offset)) {
1189     emit_byte(0x8d);
1190     emit_byte(0x40+8*d+s);
1191     emit_byte(offset);
1192     }
1193     else {
1194     emit_byte(0x8d);
1195     emit_byte(0x80+8*d+s);
1196     emit_long(offset);
1197     }
1198     }
1199     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1200    
1201     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1202     {
1203     int fi;
1204    
1205     switch(factor) {
1206     case 1: fi=0; break;
1207     case 2: fi=1; break;
1208     case 4: fi=2; break;
1209     case 8: fi=3; break;
1210     default: abort();
1211     }
1212    
1213     if (optimize_imm8 && isbyte(offset)) {
1214     emit_byte(0x8d);
1215     emit_byte(0x44+8*d);
1216     emit_byte(0x40*fi+8*index+s);
1217     emit_byte(offset);
1218     }
1219     else {
1220     emit_byte(0x8d);
1221     emit_byte(0x84+8*d);
1222     emit_byte(0x40*fi+8*index+s);
1223     emit_long(offset);
1224     }
1225     }
1226     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1227    
1228     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1229     {
1230     int isebp=(s==5)?0x40:0;
1231     int fi;
1232    
1233     switch(factor) {
1234     case 1: fi=0; break;
1235     case 2: fi=1; break;
1236     case 4: fi=2; break;
1237     case 8: fi=3; break;
1238     default: abort();
1239     }
1240    
1241     emit_byte(0x8d);
1242     emit_byte(0x04+8*d+isebp);
1243     emit_byte(0x40*fi+8*index+s);
1244     if (isebp)
1245     emit_byte(0);
1246     }
1247     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1248    
1249     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1250     {
1251     if (optimize_imm8 && isbyte(offset)) {
1252     emit_byte(0x89);
1253     emit_byte(0x40+8*s+d);
1254     emit_byte(offset);
1255     }
1256     else {
1257     emit_byte(0x89);
1258     emit_byte(0x80+8*s+d);
1259     emit_long(offset);
1260     }
1261     }
1262     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1263    
1264     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1265     {
1266     emit_byte(0x66);
1267     emit_byte(0x89);
1268     emit_byte(0x80+8*s+d);
1269     emit_long(offset);
1270     }
1271     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1272    
1273     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1274     {
1275     if (optimize_imm8 && isbyte(offset)) {
1276     emit_byte(0x88);
1277     emit_byte(0x40+8*s+d);
1278     emit_byte(offset);
1279     }
1280     else {
1281     emit_byte(0x88);
1282     emit_byte(0x80+8*s+d);
1283     emit_long(offset);
1284     }
1285     }
1286     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1287    
1288     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1289     {
1290     emit_byte(0x0f);
1291     emit_byte(0xc8+r);
1292     }
1293     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1294    
1295     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1296     {
1297     emit_byte(0x66);
1298     emit_byte(0xc1);
1299     emit_byte(0xc0+r);
1300     emit_byte(0x08);
1301     }
1302     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1303    
1304     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1305     {
1306     emit_byte(0x89);
1307     emit_byte(0xc0+8*s+d);
1308     }
1309     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1310    
1311     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1312     {
1313     emit_byte(0x89);
1314     emit_byte(0x05+8*s);
1315     emit_long(d);
1316     }
1317     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1318    
1319     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1320     {
1321     emit_byte(0x66);
1322     emit_byte(0x89);
1323     emit_byte(0x05+8*s);
1324     emit_long(d);
1325     }
1326     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1327    
1328     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1329     {
1330     emit_byte(0x66);
1331     emit_byte(0x8b);
1332     emit_byte(0x05+8*d);
1333     emit_long(s);
1334     }
1335     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1336    
1337     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1338     {
1339     emit_byte(0x88);
1340     emit_byte(0x05+8*s);
1341     emit_long(d);
1342     }
1343     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1344    
1345     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1346     {
1347     emit_byte(0x8a);
1348     emit_byte(0x05+8*d);
1349     emit_long(s);
1350     }
1351     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1352    
1353     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1354     {
1355     emit_byte(0xb8+d);
1356     emit_long(s);
1357     }
1358     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1359    
1360     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1361     {
1362     emit_byte(0x66);
1363     emit_byte(0xb8+d);
1364     emit_word(s);
1365     }
1366     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1367    
1368     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1369     {
1370     emit_byte(0xb0+d);
1371     emit_byte(s);
1372     }
1373     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1374    
1375     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1376     {
1377     emit_byte(0x81);
1378     emit_byte(0x15);
1379     emit_long(d);
1380     emit_long(s);
1381     }
1382     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1383    
1384     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1385     {
1386     if (optimize_imm8 && isbyte(s)) {
1387     emit_byte(0x83);
1388     emit_byte(0x05);
1389     emit_long(d);
1390     emit_byte(s);
1391     }
1392     else {
1393     emit_byte(0x81);
1394     emit_byte(0x05);
1395     emit_long(d);
1396     emit_long(s);
1397     }
1398     }
1399     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1400    
1401     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1402     {
1403     emit_byte(0x66);
1404     emit_byte(0x81);
1405     emit_byte(0x05);
1406     emit_long(d);
1407     emit_word(s);
1408     }
1409     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1410    
1411     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1412     {
1413     emit_byte(0x80);
1414     emit_byte(0x05);
1415     emit_long(d);
1416     emit_byte(s);
1417     }
1418     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1419    
1420     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1421     {
1422 gbeauche 1.2 if (optimize_accum && isaccum(d))
1423     emit_byte(0xa9);
1424     else {
1425 gbeauche 1.1 emit_byte(0xf7);
1426     emit_byte(0xc0+d);
1427 gbeauche 1.2 }
1428 gbeauche 1.1 emit_long(i);
1429     }
1430     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1431    
1432     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1433     {
1434     emit_byte(0x85);
1435     emit_byte(0xc0+8*s+d);
1436     }
1437     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1438    
1439     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1440     {
1441     emit_byte(0x66);
1442     emit_byte(0x85);
1443     emit_byte(0xc0+8*s+d);
1444     }
1445     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1446    
1447     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1448     {
1449     emit_byte(0x84);
1450     emit_byte(0xc0+8*s+d);
1451     }
1452     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1453    
1454     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1455     {
1456     if (optimize_imm8 && isbyte(i)) {
1457 gbeauche 1.2 emit_byte(0x83);
1458     emit_byte(0xe0+d);
1459     emit_byte(i);
1460 gbeauche 1.1 }
1461     else {
1462 gbeauche 1.2 if (optimize_accum && isaccum(d))
1463     emit_byte(0x25);
1464     else {
1465     emit_byte(0x81);
1466     emit_byte(0xe0+d);
1467     }
1468     emit_long(i);
1469 gbeauche 1.1 }
1470     }
1471     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1472    
1473     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1474     {
1475 gbeauche 1.2 emit_byte(0x66);
1476     if (optimize_imm8 && isbyte(i)) {
1477     emit_byte(0x83);
1478     emit_byte(0xe0+d);
1479     emit_byte(i);
1480     }
1481     else {
1482     if (optimize_accum && isaccum(d))
1483     emit_byte(0x25);
1484     else {
1485     emit_byte(0x81);
1486     emit_byte(0xe0+d);
1487     }
1488     emit_word(i);
1489     }
1490 gbeauche 1.1 }
1491     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1492    
1493     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1494     {
1495     emit_byte(0x21);
1496     emit_byte(0xc0+8*s+d);
1497     }
1498     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1499    
1500     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1501     {
1502     emit_byte(0x66);
1503     emit_byte(0x21);
1504     emit_byte(0xc0+8*s+d);
1505     }
1506     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1507    
1508     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1509     {
1510     emit_byte(0x20);
1511     emit_byte(0xc0+8*s+d);
1512     }
1513     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1514    
1515     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1516     {
1517     if (optimize_imm8 && isbyte(i)) {
1518     emit_byte(0x83);
1519     emit_byte(0xc8+d);
1520     emit_byte(i);
1521     }
1522     else {
1523 gbeauche 1.2 if (optimize_accum && isaccum(d))
1524     emit_byte(0x0d);
1525     else {
1526 gbeauche 1.1 emit_byte(0x81);
1527     emit_byte(0xc8+d);
1528 gbeauche 1.2 }
1529 gbeauche 1.1 emit_long(i);
1530     }
1531     }
1532     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1533    
1534     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1535     {
1536     emit_byte(0x09);
1537     emit_byte(0xc0+8*s+d);
1538     }
1539     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1540    
1541     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1542     {
1543     emit_byte(0x66);
1544     emit_byte(0x09);
1545     emit_byte(0xc0+8*s+d);
1546     }
1547     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1548    
1549     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1550     {
1551     emit_byte(0x08);
1552     emit_byte(0xc0+8*s+d);
1553     }
1554     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1555    
1556     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1557     {
1558     emit_byte(0x11);
1559     emit_byte(0xc0+8*s+d);
1560     }
1561     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1562    
1563     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1564     {
1565     emit_byte(0x66);
1566     emit_byte(0x11);
1567     emit_byte(0xc0+8*s+d);
1568     }
1569     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1570    
1571     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1572     {
1573     emit_byte(0x10);
1574     emit_byte(0xc0+8*s+d);
1575     }
1576     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1577    
1578     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1579     {
1580     emit_byte(0x01);
1581     emit_byte(0xc0+8*s+d);
1582     }
1583     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1584    
1585     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1586     {
1587     emit_byte(0x66);
1588     emit_byte(0x01);
1589     emit_byte(0xc0+8*s+d);
1590     }
1591     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1592    
1593     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1594     {
1595     emit_byte(0x00);
1596     emit_byte(0xc0+8*s+d);
1597     }
1598     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1599    
1600     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1601     {
1602     if (isbyte(i)) {
1603     emit_byte(0x83);
1604     emit_byte(0xe8+d);
1605     emit_byte(i);
1606     }
1607     else {
1608 gbeauche 1.2 if (optimize_accum && isaccum(d))
1609     emit_byte(0x2d);
1610     else {
1611 gbeauche 1.1 emit_byte(0x81);
1612     emit_byte(0xe8+d);
1613 gbeauche 1.2 }
1614 gbeauche 1.1 emit_long(i);
1615     }
1616     }
1617     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1618    
1619     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1620     {
1621 gbeauche 1.2 if (optimize_accum && isaccum(d))
1622     emit_byte(0x2c);
1623     else {
1624 gbeauche 1.1 emit_byte(0x80);
1625     emit_byte(0xe8+d);
1626 gbeauche 1.2 }
1627 gbeauche 1.1 emit_byte(i);
1628     }
1629     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1630    
1631     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1632     {
1633     if (isbyte(i)) {
1634     emit_byte(0x83);
1635     emit_byte(0xc0+d);
1636     emit_byte(i);
1637     }
1638     else {
1639 gbeauche 1.2 if (optimize_accum && isaccum(d))
1640     emit_byte(0x05);
1641     else {
1642 gbeauche 1.1 emit_byte(0x81);
1643     emit_byte(0xc0+d);
1644 gbeauche 1.2 }
1645 gbeauche 1.1 emit_long(i);
1646     }
1647     }
1648     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1649    
1650     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1651     {
1652 gbeauche 1.2 emit_byte(0x66);
1653 gbeauche 1.1 if (isbyte(i)) {
1654     emit_byte(0x83);
1655     emit_byte(0xc0+d);
1656     emit_byte(i);
1657     }
1658     else {
1659 gbeauche 1.2 if (optimize_accum && isaccum(d))
1660     emit_byte(0x05);
1661     else {
1662 gbeauche 1.1 emit_byte(0x81);
1663     emit_byte(0xc0+d);
1664 gbeauche 1.2 }
1665 gbeauche 1.1 emit_word(i);
1666     }
1667     }
1668     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1669    
1670     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1671     {
1672 gbeauche 1.2 if (optimize_accum && isaccum(d))
1673     emit_byte(0x04);
1674     else {
1675     emit_byte(0x80);
1676     emit_byte(0xc0+d);
1677     }
1678 gbeauche 1.1 emit_byte(i);
1679     }
1680     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1681    
1682     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1683     {
1684     emit_byte(0x19);
1685     emit_byte(0xc0+8*s+d);
1686     }
1687     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1688    
1689     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1690     {
1691     emit_byte(0x66);
1692     emit_byte(0x19);
1693     emit_byte(0xc0+8*s+d);
1694     }
1695     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1696    
1697     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1698     {
1699     emit_byte(0x18);
1700     emit_byte(0xc0+8*s+d);
1701     }
1702     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1703    
1704     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1705     {
1706     emit_byte(0x29);
1707     emit_byte(0xc0+8*s+d);
1708     }
1709     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1710    
1711     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1712     {
1713     emit_byte(0x66);
1714     emit_byte(0x29);
1715     emit_byte(0xc0+8*s+d);
1716     }
1717     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1718    
1719     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1720     {
1721     emit_byte(0x28);
1722     emit_byte(0xc0+8*s+d);
1723     }
1724     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1725    
1726     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1727     {
1728     emit_byte(0x39);
1729     emit_byte(0xc0+8*s+d);
1730     }
1731     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1732    
1733     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1734     {
1735     if (optimize_imm8 && isbyte(i)) {
1736     emit_byte(0x83);
1737     emit_byte(0xf8+r);
1738     emit_byte(i);
1739     }
1740     else {
1741 gbeauche 1.2 if (optimize_accum && isaccum(r))
1742     emit_byte(0x3d);
1743     else {
1744 gbeauche 1.1 emit_byte(0x81);
1745     emit_byte(0xf8+r);
1746 gbeauche 1.2 }
1747 gbeauche 1.1 emit_long(i);
1748     }
1749     }
1750     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1751    
1752     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1753     {
1754     emit_byte(0x66);
1755     emit_byte(0x39);
1756     emit_byte(0xc0+8*s+d);
1757     }
1758     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1759    
1760 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1761     {
1762     emit_byte(0x80);
1763     emit_byte(0x3d);
1764     emit_long(d);
1765     emit_byte(s);
1766     }
1767     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1768    
1769 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1770     {
1771 gbeauche 1.2 if (optimize_accum && isaccum(d))
1772     emit_byte(0x3c);
1773     else {
1774 gbeauche 1.1 emit_byte(0x80);
1775     emit_byte(0xf8+d);
1776 gbeauche 1.2 }
1777 gbeauche 1.1 emit_byte(i);
1778     }
1779     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1780    
1781     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1782     {
1783     emit_byte(0x38);
1784     emit_byte(0xc0+8*s+d);
1785     }
1786     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1787    
1788     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1789     {
1790     int fi;
1791    
1792     switch(factor) {
1793     case 1: fi=0; break;
1794     case 2: fi=1; break;
1795     case 4: fi=2; break;
1796     case 8: fi=3; break;
1797     default: abort();
1798     }
1799     emit_byte(0x39);
1800     emit_byte(0x04+8*d);
1801     emit_byte(5+8*index+0x40*fi);
1802     emit_long(offset);
1803     }
1804     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1805    
1806     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1807     {
1808     emit_byte(0x31);
1809     emit_byte(0xc0+8*s+d);
1810     }
1811     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1812    
1813     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1814     {
1815     emit_byte(0x66);
1816     emit_byte(0x31);
1817     emit_byte(0xc0+8*s+d);
1818     }
1819     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1820    
1821     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1822     {
1823     emit_byte(0x30);
1824     emit_byte(0xc0+8*s+d);
1825     }
1826     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1827    
1828     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1829     {
1830     if (optimize_imm8 && isbyte(s)) {
1831     emit_byte(0x83);
1832     emit_byte(0x2d);
1833     emit_long(d);
1834     emit_byte(s);
1835     }
1836     else {
1837     emit_byte(0x81);
1838     emit_byte(0x2d);
1839     emit_long(d);
1840     emit_long(s);
1841     }
1842     }
1843     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1844    
1845     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1846     {
1847     if (optimize_imm8 && isbyte(s)) {
1848     emit_byte(0x83);
1849     emit_byte(0x3d);
1850     emit_long(d);
1851     emit_byte(s);
1852     }
1853     else {
1854     emit_byte(0x81);
1855     emit_byte(0x3d);
1856     emit_long(d);
1857     emit_long(s);
1858     }
1859     }
1860     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1861    
1862     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1863     {
1864     emit_byte(0x87);
1865     emit_byte(0xc0+8*r1+r2);
1866     }
1867     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1868    
1869     /*************************************************************************
1870     * FIXME: string-related instructions *
1871     *************************************************************************/
1872    
1873     LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1874     {
1875     emit_byte(0xfc);
1876     }
1877     LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1878    
1879     LOWFUNC(WRITE,NONE,0,raw_std,(void))
1880     {
1881     emit_byte(0xfd);
1882     }
1883     LENDFUNC(WRITE,NONE,0,raw_std,(void))
1884    
1885     LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1886     {
1887     emit_byte(0xa4);
1888     }
1889     LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1890    
1891     LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1892     {
1893     emit_byte(0xa5);
1894     }
1895     LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1896    
1897     LOWFUNC(NONE,RMW,0,raw_rep,(void))
1898     {
1899     emit_byte(0xf3);
1900     }
1901     LENDFUNC(NONE,RMW,0,raw_rep,(void))
1902    
1903     LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1904     {
1905     raw_rep();
1906     raw_movs_b();
1907     }
1908     LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1909    
1910     LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1911     {
1912     raw_rep();
1913     raw_movs_l();
1914     }
1915     LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1916    
1917     /*************************************************************************
1918     * FIXME: mem access modes probably wrong *
1919     *************************************************************************/
1920    
1921     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1922     {
1923     emit_byte(0x9c);
1924     }
1925     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1926    
1927     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1928     {
1929     emit_byte(0x9d);
1930     }
1931     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1932    
1933     /*************************************************************************
1934     * Unoptimizable stuff --- jump *
1935     *************************************************************************/
1936    
1937     static __inline__ void raw_call_r(R4 r)
1938     {
1939     emit_byte(0xff);
1940     emit_byte(0xd0+r);
1941 gbeauche 1.5 }
1942    
1943     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1944     {
1945     int mu;
1946     switch(m) {
1947     case 1: mu=0; break;
1948     case 2: mu=1; break;
1949     case 4: mu=2; break;
1950     case 8: mu=3; break;
1951     default: abort();
1952     }
1953     emit_byte(0xff);
1954     emit_byte(0x14);
1955     emit_byte(0x05+8*r+0x40*mu);
1956     emit_long(base);
1957 gbeauche 1.1 }
1958    
1959     static __inline__ void raw_jmp_r(R4 r)
1960     {
1961     emit_byte(0xff);
1962     emit_byte(0xe0+r);
1963     }
1964    
1965     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1966     {
1967     int mu;
1968     switch(m) {
1969     case 1: mu=0; break;
1970     case 2: mu=1; break;
1971     case 4: mu=2; break;
1972     case 8: mu=3; break;
1973     default: abort();
1974     }
1975     emit_byte(0xff);
1976     emit_byte(0x24);
1977     emit_byte(0x05+8*r+0x40*mu);
1978     emit_long(base);
1979     }
1980    
1981     static __inline__ void raw_jmp_m(uae_u32 base)
1982     {
1983     emit_byte(0xff);
1984     emit_byte(0x25);
1985     emit_long(base);
1986     }
1987    
1988    
1989     static __inline__ void raw_call(uae_u32 t)
1990     {
1991     emit_byte(0xe8);
1992     emit_long(t-(uae_u32)target-4);
1993     }
1994    
1995     static __inline__ void raw_jmp(uae_u32 t)
1996     {
1997     emit_byte(0xe9);
1998     emit_long(t-(uae_u32)target-4);
1999     }
2000    
2001     static __inline__ void raw_jl(uae_u32 t)
2002     {
2003     emit_byte(0x0f);
2004     emit_byte(0x8c);
2005     emit_long(t-(uae_u32)target-4);
2006     }
2007    
2008     static __inline__ void raw_jz(uae_u32 t)
2009     {
2010     emit_byte(0x0f);
2011     emit_byte(0x84);
2012     emit_long(t-(uae_u32)target-4);
2013     }
2014    
2015     static __inline__ void raw_jnz(uae_u32 t)
2016     {
2017     emit_byte(0x0f);
2018     emit_byte(0x85);
2019     emit_long(t-(uae_u32)target-4);
2020     }
2021    
2022     static __inline__ void raw_jnz_l_oponly(void)
2023     {
2024     emit_byte(0x0f);
2025     emit_byte(0x85);
2026     }
2027    
2028     static __inline__ void raw_jcc_l_oponly(int cc)
2029     {
2030     emit_byte(0x0f);
2031     emit_byte(0x80+cc);
2032     }
2033    
2034     static __inline__ void raw_jnz_b_oponly(void)
2035     {
2036     emit_byte(0x75);
2037     }
2038    
2039     static __inline__ void raw_jz_b_oponly(void)
2040     {
2041     emit_byte(0x74);
2042     }
2043    
2044     static __inline__ void raw_jcc_b_oponly(int cc)
2045     {
2046     emit_byte(0x70+cc);
2047     }
2048    
2049     static __inline__ void raw_jmp_l_oponly(void)
2050     {
2051     emit_byte(0xe9);
2052     }
2053    
2054     static __inline__ void raw_jmp_b_oponly(void)
2055     {
2056     emit_byte(0xeb);
2057     }
2058    
2059     static __inline__ void raw_ret(void)
2060     {
2061     emit_byte(0xc3);
2062     }
2063    
2064     static __inline__ void raw_nop(void)
2065     {
2066     emit_byte(0x90);
2067     }
2068    
2069 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
2070     {
2071     /* Source: GNU Binutils 2.12.90.0.15 */
2072     /* Various efficient no-op patterns for aligning code labels.
2073     Note: Don't try to assemble the instructions in the comments.
2074     0L and 0w are not legal. */
2075     static const uae_u8 f32_1[] =
2076     {0x90}; /* nop */
2077     static const uae_u8 f32_2[] =
2078     {0x89,0xf6}; /* movl %esi,%esi */
2079     static const uae_u8 f32_3[] =
2080     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
2081     static const uae_u8 f32_4[] =
2082     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
2083     static const uae_u8 f32_5[] =
2084     {0x90, /* nop */
2085     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
2086     static const uae_u8 f32_6[] =
2087     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
2088     static const uae_u8 f32_7[] =
2089     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
2090     static const uae_u8 f32_8[] =
2091     {0x90, /* nop */
2092     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
2093     static const uae_u8 f32_9[] =
2094     {0x89,0xf6, /* movl %esi,%esi */
2095     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2096     static const uae_u8 f32_10[] =
2097     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
2098     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2099     static const uae_u8 f32_11[] =
2100     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
2101     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2102     static const uae_u8 f32_12[] =
2103     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
2104     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
2105     static const uae_u8 f32_13[] =
2106     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
2107     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2108     static const uae_u8 f32_14[] =
2109     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
2110     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2111     static const uae_u8 f32_15[] =
2112     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
2113     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
2114     static const uae_u8 f32_16[] =
2115     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
2116     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
2117     static const uae_u8 *const f32_patt[] = {
2118     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
2119     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
2120     };
2121    
2122     int nloops = nbytes / 16;
2123     while (nloops-- > 0)
2124     emit_block(f32_16, sizeof(f32_16));
2125    
2126     nbytes %= 16;
2127     if (nbytes)
2128     emit_block(f32_patt[nbytes - 1], nbytes);
2129     }
2130    
2131 gbeauche 1.1
2132     /*************************************************************************
2133     * Flag handling, to and fro UAE flag register *
2134     *************************************************************************/
2135    
2136     #ifdef SAHF_SETO_PROFITABLE
2137    
2138     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
2139    
2140     static __inline__ void raw_flags_to_reg(int r)
2141     {
2142     raw_lahf(0); /* Most flags in AH */
2143     //raw_setcc(r,0); /* V flag in AL */
2144     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2145    
2146     #if 1 /* Let's avoid those nasty partial register stalls */
2147     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2148     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
2149     //live.state[FLAGTMP].status=CLEAN;
2150     live.state[FLAGTMP].status=INMEM;
2151     live.state[FLAGTMP].realreg=-1;
2152     /* We just "evicted" FLAGTMP. */
2153     if (live.nat[r].nholds!=1) {
2154     /* Huh? */
2155     abort();
2156     }
2157     live.nat[r].nholds=0;
2158     #endif
2159     }
2160    
2161     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
2162     static __inline__ void raw_reg_to_flags(int r)
2163     {
2164     raw_cmp_b_ri(r,-127); /* set V */
2165     raw_sahf(0);
2166     }
2167    
2168     #else
2169    
2170     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
2171     static __inline__ void raw_flags_to_reg(int r)
2172     {
2173     raw_pushfl();
2174     raw_pop_l_r(r);
2175     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
2176     // live.state[FLAGTMP].status=CLEAN;
2177     live.state[FLAGTMP].status=INMEM;
2178     live.state[FLAGTMP].realreg=-1;
2179     /* We just "evicted" FLAGTMP. */
2180     if (live.nat[r].nholds!=1) {
2181     /* Huh? */
2182     abort();
2183     }
2184     live.nat[r].nholds=0;
2185     }
2186    
2187     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2188     static __inline__ void raw_reg_to_flags(int r)
2189     {
2190     raw_push_l_r(r);
2191     raw_popfl();
2192     }
2193    
2194     #endif
2195    
2196     /* Apparently, there are enough instructions between flag store and
2197     flag reload to avoid the partial memory stall */
2198     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2199     {
2200     #if 1
2201     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2202     #else
2203     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2204     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2205     #endif
2206     }
2207    
2208     /* FLAGX is byte sized, and we *do* write it at that size */
2209     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2210     {
2211     if (live.nat[target].canbyte)
2212     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2213     else if (live.nat[target].canword)
2214     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2215     else
2216     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2217     }
2218    
2219    
2220     static __inline__ void raw_inc_sp(int off)
2221     {
2222 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
2223 gbeauche 1.1 }
2224    
2225     /*************************************************************************
2226     * Handling mistaken direct memory access *
2227     *************************************************************************/
2228    
2229     // gb-- I don't need that part for JIT Basilisk II
2230     #if defined(NATMEM_OFFSET) && 0
2231     #include <asm/sigcontext.h>
2232     #include <signal.h>
2233    
2234     #define SIG_READ 1
2235     #define SIG_WRITE 2
2236    
2237     static int in_handler=0;
2238     static uae_u8 veccode[256];
2239    
2240     static void vec(int x, struct sigcontext sc)
2241     {
2242     uae_u8* i=(uae_u8*)sc.eip;
2243     uae_u32 addr=sc.cr2;
2244     int r=-1;
2245     int size=4;
2246     int dir=-1;
2247     int len=0;
2248     int j;
2249    
2250     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2251     if (!canbang)
2252     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2253     if (in_handler)
2254     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2255    
2256     if (canbang && i>=compiled_code && i<=current_compile_p) {
2257     if (*i==0x66) {
2258     i++;
2259     size=2;
2260     len++;
2261     }
2262    
2263     switch(i[0]) {
2264     case 0x8a:
2265     if ((i[1]&0xc0)==0x80) {
2266     r=(i[1]>>3)&7;
2267     dir=SIG_READ;
2268     size=1;
2269     len+=6;
2270     break;
2271     }
2272     break;
2273     case 0x88:
2274     if ((i[1]&0xc0)==0x80) {
2275     r=(i[1]>>3)&7;
2276     dir=SIG_WRITE;
2277     size=1;
2278     len+=6;
2279     break;
2280     }
2281     break;
2282     case 0x8b:
2283     if ((i[1]&0xc0)==0x80) {
2284     r=(i[1]>>3)&7;
2285     dir=SIG_READ;
2286     len+=6;
2287     break;
2288     }
2289     if ((i[1]&0xc0)==0x40) {
2290     r=(i[1]>>3)&7;
2291     dir=SIG_READ;
2292     len+=3;
2293     break;
2294     }
2295     break;
2296     case 0x89:
2297     if ((i[1]&0xc0)==0x80) {
2298     r=(i[1]>>3)&7;
2299     dir=SIG_WRITE;
2300     len+=6;
2301     break;
2302     }
2303     if ((i[1]&0xc0)==0x40) {
2304     r=(i[1]>>3)&7;
2305     dir=SIG_WRITE;
2306     len+=3;
2307     break;
2308     }
2309     break;
2310     }
2311     }
2312    
2313     if (r!=-1) {
2314     void* pr=NULL;
2315     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2316    
2317     switch(r) {
2318     case 0: pr=&(sc.eax); break;
2319     case 1: pr=&(sc.ecx); break;
2320     case 2: pr=&(sc.edx); break;
2321     case 3: pr=&(sc.ebx); break;
2322     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2323     case 5: pr=(size>1)?
2324     (void*)(&(sc.ebp)):
2325     (void*)(((uae_u8*)&(sc.ecx))+1); break;
2326     case 6: pr=(size>1)?
2327     (void*)(&(sc.esi)):
2328     (void*)(((uae_u8*)&(sc.edx))+1); break;
2329     case 7: pr=(size>1)?
2330     (void*)(&(sc.edi)):
2331     (void*)(((uae_u8*)&(sc.ebx))+1); break;
2332     default: abort();
2333     }
2334     if (pr) {
2335     blockinfo* bi;
2336    
2337     if (currprefs.comp_oldsegv) {
2338     addr-=NATMEM_OFFSET;
2339    
2340     if ((addr>=0x10000000 && addr<0x40000000) ||
2341     (addr>=0x50000000)) {
2342     write_log("Suspicious address in %x SEGV handler.\n",addr);
2343     }
2344     if (dir==SIG_READ) {
2345     switch(size) {
2346     case 1: *((uae_u8*)pr)=get_byte(addr); break;
2347     case 2: *((uae_u16*)pr)=get_word(addr); break;
2348     case 4: *((uae_u32*)pr)=get_long(addr); break;
2349     default: abort();
2350     }
2351     }
2352     else { /* write */
2353     switch(size) {
2354     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2355     case 2: put_word(addr,*((uae_u16*)pr)); break;
2356     case 4: put_long(addr,*((uae_u32*)pr)); break;
2357     default: abort();
2358     }
2359     }
2360     write_log("Handled one access!\n");
2361     fflush(stdout);
2362     segvcount++;
2363     sc.eip+=len;
2364     }
2365     else {
2366     void* tmp=target;
2367     int i;
2368     uae_u8 vecbuf[5];
2369    
2370     addr-=NATMEM_OFFSET;
2371    
2372     if ((addr>=0x10000000 && addr<0x40000000) ||
2373     (addr>=0x50000000)) {
2374     write_log("Suspicious address in %x SEGV handler.\n",addr);
2375     }
2376    
2377     target=(uae_u8*)sc.eip;
2378     for (i=0;i<5;i++)
2379     vecbuf[i]=target[i];
2380     emit_byte(0xe9);
2381     emit_long((uae_u32)veccode-(uae_u32)target-4);
2382     write_log("Create jump to %p\n",veccode);
2383    
2384     write_log("Handled one access!\n");
2385     fflush(stdout);
2386     segvcount++;
2387    
2388     target=veccode;
2389    
2390     if (dir==SIG_READ) {
2391     switch(size) {
2392     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2393     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2394     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2395     default: abort();
2396     }
2397     }
2398     else { /* write */
2399     switch(size) {
2400     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2401     case 2: put_word(addr,*((uae_u16*)pr)); break;
2402     case 4: put_long(addr,*((uae_u32*)pr)); break;
2403     default: abort();
2404     }
2405     }
2406     for (i=0;i<5;i++)
2407     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2408     raw_mov_l_mi((uae_u32)&in_handler,0);
2409     emit_byte(0xe9);
2410     emit_long(sc.eip+len-(uae_u32)target-4);
2411     in_handler=1;
2412     target=tmp;
2413     }
2414     bi=active;
2415     while (bi) {
2416     if (bi->handler &&
2417     (uae_u8*)bi->direct_handler<=i &&
2418     (uae_u8*)bi->nexthandler>i) {
2419     write_log("deleted trigger (%p<%p<%p) %p\n",
2420     bi->handler,
2421     i,
2422     bi->nexthandler,
2423     bi->pc_p);
2424     invalidate_block(bi);
2425     raise_in_cl_list(bi);
2426     set_special(0);
2427     return;
2428     }
2429     bi=bi->next;
2430     }
2431     /* Not found in the active list. Might be a rom routine that
2432     is in the dormant list */
2433     bi=dormant;
2434     while (bi) {
2435     if (bi->handler &&
2436     (uae_u8*)bi->direct_handler<=i &&
2437     (uae_u8*)bi->nexthandler>i) {
2438     write_log("deleted trigger (%p<%p<%p) %p\n",
2439     bi->handler,
2440     i,
2441     bi->nexthandler,
2442     bi->pc_p);
2443     invalidate_block(bi);
2444     raise_in_cl_list(bi);
2445     set_special(0);
2446     return;
2447     }
2448     bi=bi->next;
2449     }
2450     write_log("Huh? Could not find trigger!\n");
2451     return;
2452     }
2453     }
2454     write_log("Can't handle access!\n");
2455     for (j=0;j<10;j++) {
2456     write_log("instruction byte %2d is %02x\n",j,i[j]);
2457     }
2458     write_log("Please send the above info (starting at \"fault address\") to\n"
2459     "bmeyer@csse.monash.edu.au\n"
2460     "This shouldn't happen ;-)\n");
2461     fflush(stdout);
2462     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2463     }
2464     #endif
2465    
2466    
2467     /*************************************************************************
2468     * Checking for CPU features *
2469     *************************************************************************/
2470    
2471 gbeauche 1.3 struct cpuinfo_x86 {
2472     uae_u8 x86; // CPU family
2473     uae_u8 x86_vendor; // CPU vendor
2474     uae_u8 x86_processor; // CPU canonical processor type
2475     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
2476     uae_u32 x86_hwcap;
2477     uae_u8 x86_model;
2478     uae_u8 x86_mask;
2479     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
2480     char x86_vendor_id[16];
2481     };
2482     struct cpuinfo_x86 cpuinfo;
2483    
2484     enum {
2485     X86_VENDOR_INTEL = 0,
2486     X86_VENDOR_CYRIX = 1,
2487     X86_VENDOR_AMD = 2,
2488     X86_VENDOR_UMC = 3,
2489     X86_VENDOR_NEXGEN = 4,
2490     X86_VENDOR_CENTAUR = 5,
2491     X86_VENDOR_RISE = 6,
2492     X86_VENDOR_TRANSMETA = 7,
2493     X86_VENDOR_NSC = 8,
2494     X86_VENDOR_UNKNOWN = 0xff
2495     };
2496    
2497     enum {
2498     X86_PROCESSOR_I386, /* 80386 */
2499     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
2500     X86_PROCESSOR_PENTIUM,
2501     X86_PROCESSOR_PENTIUMPRO,
2502     X86_PROCESSOR_K6,
2503     X86_PROCESSOR_ATHLON,
2504     X86_PROCESSOR_PENTIUM4,
2505     X86_PROCESSOR_max
2506     };
2507    
2508     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2509     "80386",
2510     "80486",
2511     "Pentium",
2512     "PentiumPro",
2513     "K6",
2514     "Athlon",
2515     "Pentium4"
2516     };
2517    
2518     static struct ptt {
2519     const int align_loop;
2520     const int align_loop_max_skip;
2521     const int align_jump;
2522     const int align_jump_max_skip;
2523     const int align_func;
2524     }
2525     x86_alignments[X86_PROCESSOR_max] = {
2526     { 4, 3, 4, 3, 4 },
2527     { 16, 15, 16, 15, 16 },
2528     { 16, 7, 16, 7, 16 },
2529     { 16, 15, 16, 7, 16 },
2530     { 32, 7, 32, 7, 32 },
2531 gbeauche 1.4 { 16, 7, 16, 7, 16 },
2532 gbeauche 1.3 { 0, 0, 0, 0, 0 }
2533     };
2534 gbeauche 1.1
2535 gbeauche 1.3 static void
2536     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2537 gbeauche 1.1 {
2538 gbeauche 1.3 char *v = c->x86_vendor_id;
2539    
2540     if (!strcmp(v, "GenuineIntel"))
2541     c->x86_vendor = X86_VENDOR_INTEL;
2542     else if (!strcmp(v, "AuthenticAMD"))
2543     c->x86_vendor = X86_VENDOR_AMD;
2544     else if (!strcmp(v, "CyrixInstead"))
2545     c->x86_vendor = X86_VENDOR_CYRIX;
2546     else if (!strcmp(v, "Geode by NSC"))
2547     c->x86_vendor = X86_VENDOR_NSC;
2548     else if (!strcmp(v, "UMC UMC UMC "))
2549     c->x86_vendor = X86_VENDOR_UMC;
2550     else if (!strcmp(v, "CentaurHauls"))
2551     c->x86_vendor = X86_VENDOR_CENTAUR;
2552     else if (!strcmp(v, "NexGenDriven"))
2553     c->x86_vendor = X86_VENDOR_NEXGEN;
2554     else if (!strcmp(v, "RiseRiseRise"))
2555     c->x86_vendor = X86_VENDOR_RISE;
2556     else if (!strcmp(v, "GenuineTMx86") ||
2557     !strcmp(v, "TransmetaCPU"))
2558     c->x86_vendor = X86_VENDOR_TRANSMETA;
2559     else
2560     c->x86_vendor = X86_VENDOR_UNKNOWN;
2561     }
2562 gbeauche 1.1
2563 gbeauche 1.3 static void
2564     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2565     {
2566     static uae_u8 cpuid_space[256];
2567     uae_u8* tmp=get_target();
2568 gbeauche 1.1
2569 gbeauche 1.3 set_target(cpuid_space);
2570     raw_push_l_r(0); /* eax */
2571     raw_push_l_r(1); /* ecx */
2572     raw_push_l_r(2); /* edx */
2573     raw_push_l_r(3); /* ebx */
2574     raw_mov_l_rm(0,(uae_u32)&op);
2575     raw_cpuid(0);
2576     if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2577     if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2578     if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2579     if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2580     raw_pop_l_r(3);
2581     raw_pop_l_r(2);
2582     raw_pop_l_r(1);
2583     raw_pop_l_r(0);
2584     raw_ret();
2585     set_target(tmp);
2586 gbeauche 1.1
2587 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
2588 gbeauche 1.1 }
2589    
2590 gbeauche 1.3 static void
2591     raw_init_cpu(void)
2592 gbeauche 1.1 {
2593 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
2594    
2595     /* Defaults */
2596     c->x86_vendor = X86_VENDOR_UNKNOWN;
2597     c->cpuid_level = -1; /* CPUID not detected */
2598     c->x86_model = c->x86_mask = 0; /* So far unknown... */
2599     c->x86_vendor_id[0] = '\0'; /* Unset */
2600     c->x86_hwcap = 0;
2601    
2602     /* Get vendor name */
2603     c->x86_vendor_id[12] = '\0';
2604     cpuid(0x00000000,
2605     (uae_u32 *)&c->cpuid_level,
2606     (uae_u32 *)&c->x86_vendor_id[0],
2607     (uae_u32 *)&c->x86_vendor_id[8],
2608     (uae_u32 *)&c->x86_vendor_id[4]);
2609     x86_get_cpu_vendor(c);
2610    
2611     /* Intel-defined flags: level 0x00000001 */
2612     c->x86_brand_id = 0;
2613     if ( c->cpuid_level >= 0x00000001 ) {
2614     uae_u32 tfms, brand_id;
2615     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2616     c->x86 = (tfms >> 8) & 15;
2617     c->x86_model = (tfms >> 4) & 15;
2618     c->x86_brand_id = brand_id & 0xff;
2619     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2620     (c->x86 == 0xf)) {
2621     /* AMD Extended Family and Model Values */
2622     c->x86 += (tfms >> 20) & 0xff;
2623     c->x86_model += (tfms >> 12) & 0xf0;
2624     }
2625     c->x86_mask = tfms & 15;
2626     } else {
2627     /* Have CPUID level 0 only - unheard of */
2628     c->x86 = 4;
2629     }
2630    
2631     /* Canonicalize processor ID */
2632     c->x86_processor = X86_PROCESSOR_max;
2633     switch (c->x86) {
2634     case 3:
2635     c->x86_processor = X86_PROCESSOR_I386;
2636     break;
2637     case 4:
2638     c->x86_processor = X86_PROCESSOR_I486;
2639     break;
2640     case 5:
2641     if (c->x86_vendor == X86_VENDOR_AMD)
2642     c->x86_processor = X86_PROCESSOR_K6;
2643     else
2644     c->x86_processor = X86_PROCESSOR_PENTIUM;
2645     break;
2646     case 6:
2647     if (c->x86_vendor == X86_VENDOR_AMD)
2648     c->x86_processor = X86_PROCESSOR_ATHLON;
2649     else
2650     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2651     break;
2652     case 15:
2653     if (c->x86_vendor == X86_VENDOR_INTEL) {
2654     /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2655     if (c->x86_brand_id >= 8)
2656     c->x86_processor = X86_PROCESSOR_PENTIUM4;
2657     }
2658     break;
2659     }
2660     if (c->x86_processor == X86_PROCESSOR_max) {
2661     fprintf(stderr, "Error: unknown processor type\n");
2662     fprintf(stderr, " Family : %d\n", c->x86);
2663     fprintf(stderr, " Model : %d\n", c->x86_model);
2664     fprintf(stderr, " Mask : %d\n", c->x86_mask);
2665     if (c->x86_brand_id)
2666     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
2667     abort();
2668     }
2669    
2670     /* Have CMOV support? */
2671     have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2672    
2673     /* Can the host CPU suffer from partial register stalls? */
2674     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2675     #if 1
2676     /* It appears that partial register writes are a bad idea even on
2677 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
2678     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2679 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
2680     have_rat_stall = true;
2681 gbeauche 1.1 #endif
2682 gbeauche 1.3
2683     /* Alignments */
2684     if (tune_alignment) {
2685     align_loops = x86_alignments[c->x86_processor].align_loop;
2686     align_jumps = x86_alignments[c->x86_processor].align_jump;
2687     }
2688    
2689     write_log("Max CPUID level=%d Processor is %s [%s]\n",
2690     c->cpuid_level, c->x86_vendor_id,
2691     x86_processor_string_table[c->x86_processor]);
2692 gbeauche 1.1 }
2693    
2694 gbeauche 1.10 static bool target_check_bsf(void)
2695     {
2696     bool mismatch = false;
2697     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
2698     for (int g_CF = 0; g_CF <= 1; g_CF++) {
2699     for (int g_OF = 0; g_OF <= 1; g_OF++) {
2700     for (int g_SF = 0; g_SF <= 1; g_SF++) {
2701     for (int value = -1; value <= 1; value++) {
2702     int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
2703     int tmp = value;
2704     __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
2705     : "+r" (flags), "+r" (tmp) : : "flags");
2706     int OF = (flags >> 11) & 1;
2707     int SF = (flags >> 7) & 1;
2708     int ZF = (flags >> 6) & 1;
2709     int CF = flags & 1;
2710     tmp = (value == 0);
2711     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
2712     mismatch = true;
2713     }
2714     }}}}
2715     if (mismatch)
2716     write_log("Target CPU defines all flags on BSF instruction\n");
2717     return !mismatch;
2718     }
2719    
2720 gbeauche 1.1
2721     /*************************************************************************
2722     * FPU stuff *
2723     *************************************************************************/
2724    
2725    
2726     static __inline__ void raw_fp_init(void)
2727     {
2728     int i;
2729    
2730     for (i=0;i<N_FREGS;i++)
2731     live.spos[i]=-2;
2732     live.tos=-1; /* Stack is empty */
2733     }
2734    
2735     static __inline__ void raw_fp_cleanup_drop(void)
2736     {
2737     #if 0
2738     /* using FINIT instead of popping all the entries.
2739     Seems to have side effects --- there is display corruption in
2740     Quake when this is used */
2741     if (live.tos>1) {
2742     emit_byte(0x9b);
2743     emit_byte(0xdb);
2744     emit_byte(0xe3);
2745     live.tos=-1;
2746     }
2747     #endif
2748     while (live.tos>=1) {
2749     emit_byte(0xde);
2750     emit_byte(0xd9);
2751     live.tos-=2;
2752     }
2753     while (live.tos>=0) {
2754     emit_byte(0xdd);
2755     emit_byte(0xd8);
2756     live.tos--;
2757     }
2758     raw_fp_init();
2759     }
2760    
2761     static __inline__ void make_tos(int r)
2762     {
2763     int p,q;
2764    
2765     if (live.spos[r]<0) { /* Register not yet on stack */
2766     emit_byte(0xd9);
2767     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2768     live.tos++;
2769     live.spos[r]=live.tos;
2770     live.onstack[live.tos]=r;
2771     return;
2772     }
2773     /* Register is on stack */
2774     if (live.tos==live.spos[r])
2775     return;
2776     p=live.spos[r];
2777     q=live.onstack[live.tos];
2778    
2779     emit_byte(0xd9);
2780     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2781     live.onstack[live.tos]=r;
2782     live.spos[r]=live.tos;
2783     live.onstack[p]=q;
2784     live.spos[q]=p;
2785     }
2786    
2787     static __inline__ void make_tos2(int r, int r2)
2788     {
2789     int q;
2790    
2791     make_tos(r2); /* Put the reg that's supposed to end up in position2
2792     on top */
2793    
2794     if (live.spos[r]<0) { /* Register not yet on stack */
2795     make_tos(r); /* This will extend the stack */
2796     return;
2797     }
2798     /* Register is on stack */
2799     emit_byte(0xd9);
2800     emit_byte(0xc9); /* Move r2 into position 2 */
2801    
2802     q=live.onstack[live.tos-1];
2803     live.onstack[live.tos]=q;
2804     live.spos[q]=live.tos;
2805     live.onstack[live.tos-1]=r2;
2806     live.spos[r2]=live.tos-1;
2807    
2808     make_tos(r); /* And r into 1 */
2809     }
2810    
2811     static __inline__ int stackpos(int r)
2812     {
2813     if (live.spos[r]<0)
2814     abort();
2815     if (live.tos<live.spos[r]) {
2816     printf("Looking for spos for fnreg %d\n",r);
2817     abort();
2818     }
2819     return live.tos-live.spos[r];
2820     }
2821    
2822     static __inline__ void usereg(int r)
2823     {
2824     if (live.spos[r]<0)
2825     make_tos(r);
2826     }
2827    
2828     /* This is called with one FP value in a reg *above* tos, which it will
2829     pop off the stack if necessary */
2830     static __inline__ void tos_make(int r)
2831     {
2832     if (live.spos[r]<0) {
2833     live.tos++;
2834     live.spos[r]=live.tos;
2835     live.onstack[live.tos]=r;
2836     return;
2837     }
2838     emit_byte(0xdd);
2839     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2840     and pop it*/
2841     }
2842    
2843    
2844     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2845     {
2846     make_tos(r);
2847     emit_byte(0xdd);
2848     emit_byte(0x15);
2849     emit_long(m);
2850     }
2851     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2852    
2853     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2854     {
2855     make_tos(r);
2856     emit_byte(0xdd);
2857     emit_byte(0x1d);
2858     emit_long(m);
2859     live.onstack[live.tos]=-1;
2860     live.tos--;
2861     live.spos[r]=-2;
2862     }
2863     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2864    
2865     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2866     {
2867     emit_byte(0xdd);
2868     emit_byte(0x05);
2869     emit_long(m);
2870     tos_make(r);
2871     }
2872     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2873    
2874     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2875     {
2876     emit_byte(0xdb);
2877     emit_byte(0x05);
2878     emit_long(m);
2879     tos_make(r);
2880     }
2881     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2882    
2883     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2884     {
2885     make_tos(r);
2886     emit_byte(0xdb);
2887     emit_byte(0x15);
2888     emit_long(m);
2889     }
2890     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2891    
2892     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2893     {
2894     emit_byte(0xd9);
2895     emit_byte(0x05);
2896     emit_long(m);
2897     tos_make(r);
2898     }
2899     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2900    
2901     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2902     {
2903     make_tos(r);
2904     emit_byte(0xd9);
2905     emit_byte(0x15);
2906     emit_long(m);
2907     }
2908     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2909    
2910     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2911     {
2912     int rs;
2913    
2914     /* Stupid x87 can't write a long double to mem without popping the
2915     stack! */
2916     usereg(r);
2917     rs=stackpos(r);
2918     emit_byte(0xd9); /* Get a copy to the top of stack */
2919     emit_byte(0xc0+rs);
2920    
2921     emit_byte(0xdb); /* store and pop it */
2922     emit_byte(0x3d);
2923     emit_long(m);
2924     }
2925     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2926    
2927     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2928     {
2929     int rs;
2930    
2931     make_tos(r);
2932     emit_byte(0xdb); /* store and pop it */
2933     emit_byte(0x3d);
2934     emit_long(m);
2935     live.onstack[live.tos]=-1;
2936     live.tos--;
2937     live.spos[r]=-2;
2938     }
2939     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2940    
2941     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2942     {
2943     emit_byte(0xdb);
2944     emit_byte(0x2d);
2945     emit_long(m);
2946     tos_make(r);
2947     }
2948     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2949    
2950     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2951     {
2952     emit_byte(0xd9);
2953     emit_byte(0xeb);
2954     tos_make(r);
2955     }
2956     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2957    
2958     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2959     {
2960     emit_byte(0xd9);
2961     emit_byte(0xec);
2962     tos_make(r);
2963     }
2964     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2965    
2966     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2967     {
2968     emit_byte(0xd9);
2969     emit_byte(0xea);
2970     tos_make(r);
2971     }
2972     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2973    
2974     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2975     {
2976     emit_byte(0xd9);
2977     emit_byte(0xed);
2978     tos_make(r);
2979     }
2980     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2981    
2982     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2983     {
2984     emit_byte(0xd9);
2985     emit_byte(0xe8);
2986     tos_make(r);
2987     }
2988     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2989    
2990     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2991     {
2992     emit_byte(0xd9);
2993     emit_byte(0xee);
2994     tos_make(r);
2995     }
2996     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2997    
2998     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2999     {
3000     int ds;
3001    
3002     usereg(s);
3003     ds=stackpos(s);
3004     if (ds==0 && live.spos[d]>=0) {
3005     /* source is on top of stack, and we already have the dest */
3006     int dd=stackpos(d);
3007     emit_byte(0xdd);
3008     emit_byte(0xd0+dd);
3009     }
3010     else {
3011     emit_byte(0xd9);
3012     emit_byte(0xc0+ds); /* duplicate source on tos */
3013     tos_make(d); /* store to destination, pop if necessary */
3014     }
3015     }
3016     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
3017    
3018     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
3019     {
3020     emit_byte(0xd9);
3021     emit_byte(0xa8+index);
3022     emit_long(base);
3023     }
3024     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
3025    
3026    
3027     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
3028     {
3029     int ds;
3030    
3031     if (d!=s) {
3032     usereg(s);
3033     ds=stackpos(s);
3034     emit_byte(0xd9);
3035     emit_byte(0xc0+ds); /* duplicate source */
3036     emit_byte(0xd9);
3037     emit_byte(0xfa); /* take square root */
3038     tos_make(d); /* store to destination */
3039     }
3040     else {
3041     make_tos(d);
3042     emit_byte(0xd9);
3043     emit_byte(0xfa); /* take square root */
3044     }
3045     }
3046     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
3047    
3048     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
3049     {
3050     int ds;
3051    
3052     if (d!=s) {
3053     usereg(s);
3054     ds=stackpos(s);
3055     emit_byte(0xd9);
3056     emit_byte(0xc0+ds); /* duplicate source */
3057     emit_byte(0xd9);
3058     emit_byte(0xe1); /* take fabs */
3059     tos_make(d); /* store to destination */
3060     }
3061     else {
3062     make_tos(d);
3063     emit_byte(0xd9);
3064     emit_byte(0xe1); /* take fabs */
3065     }
3066     }
3067     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
3068    
3069     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
3070     {
3071     int ds;
3072    
3073     if (d!=s) {
3074     usereg(s);
3075     ds=stackpos(s);
3076     emit_byte(0xd9);
3077     emit_byte(0xc0+ds); /* duplicate source */
3078     emit_byte(0xd9);
3079     emit_byte(0xfc); /* take frndint */
3080     tos_make(d); /* store to destination */
3081     }
3082     else {
3083     make_tos(d);
3084     emit_byte(0xd9);
3085     emit_byte(0xfc); /* take frndint */
3086     }
3087     }
3088     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
3089    
3090     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
3091     {
3092     int ds;
3093    
3094     if (d!=s) {
3095     usereg(s);
3096     ds=stackpos(s);
3097     emit_byte(0xd9);
3098     emit_byte(0xc0+ds); /* duplicate source */
3099     emit_byte(0xd9);
3100     emit_byte(0xff); /* take cos */
3101     tos_make(d); /* store to destination */
3102     }
3103     else {
3104     make_tos(d);
3105     emit_byte(0xd9);
3106     emit_byte(0xff); /* take cos */
3107     }
3108     }
3109     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
3110    
3111     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3112     {
3113     int ds;
3114    
3115     if (d!=s) {
3116     usereg(s);
3117     ds=stackpos(s);
3118     emit_byte(0xd9);
3119     emit_byte(0xc0+ds); /* duplicate source */
3120     emit_byte(0xd9);
3121     emit_byte(0xfe); /* take sin */
3122     tos_make(d); /* store to destination */
3123     }
3124     else {
3125     make_tos(d);
3126     emit_byte(0xd9);
3127     emit_byte(0xfe); /* take sin */
3128     }
3129     }
3130     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3131    
3132     double one=1;
3133     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3134     {
3135     int ds;
3136    
3137     usereg(s);
3138     ds=stackpos(s);
3139     emit_byte(0xd9);
3140     emit_byte(0xc0+ds); /* duplicate source */
3141    
3142     emit_byte(0xd9);
3143     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3144     emit_byte(0xd9);
3145     emit_byte(0xfc); /* rndint */
3146     emit_byte(0xd9);
3147     emit_byte(0xc9); /* swap top two elements */
3148     emit_byte(0xd8);
3149     emit_byte(0xe1); /* subtract rounded from original */
3150     emit_byte(0xd9);
3151     emit_byte(0xf0); /* f2xm1 */
3152     emit_byte(0xdc);
3153     emit_byte(0x05);
3154     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3155     emit_byte(0xd9);
3156     emit_byte(0xfd); /* and scale it */
3157     emit_byte(0xdd);
3158     emit_byte(0xd9); /* take he rounded value off */
3159     tos_make(d); /* store to destination */
3160     }
3161     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3162    
3163     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3164     {
3165     int ds;
3166    
3167     usereg(s);
3168     ds=stackpos(s);
3169     emit_byte(0xd9);
3170     emit_byte(0xc0+ds); /* duplicate source */
3171     emit_byte(0xd9);
3172     emit_byte(0xea); /* fldl2e */
3173     emit_byte(0xde);
3174     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
3175    
3176     emit_byte(0xd9);
3177     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3178     emit_byte(0xd9);
3179     emit_byte(0xfc); /* rndint */
3180     emit_byte(0xd9);
3181     emit_byte(0xc9); /* swap top two elements */
3182     emit_byte(0xd8);
3183     emit_byte(0xe1); /* subtract rounded from original */
3184     emit_byte(0xd9);
3185     emit_byte(0xf0); /* f2xm1 */
3186     emit_byte(0xdc);
3187     emit_byte(0x05);
3188     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3189     emit_byte(0xd9);
3190     emit_byte(0xfd); /* and scale it */
3191     emit_byte(0xdd);
3192     emit_byte(0xd9); /* take he rounded value off */
3193     tos_make(d); /* store to destination */
3194     }
3195     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3196    
3197     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3198     {
3199     int ds;
3200    
3201     usereg(s);
3202     ds=stackpos(s);
3203     emit_byte(0xd9);
3204     emit_byte(0xc0+ds); /* duplicate source */
3205     emit_byte(0xd9);
3206     emit_byte(0xe8); /* push '1' */
3207     emit_byte(0xd9);
3208     emit_byte(0xc9); /* swap top two */
3209     emit_byte(0xd9);
3210     emit_byte(0xf1); /* take 1*log2(x) */
3211     tos_make(d); /* store to destination */
3212     }
3213     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3214    
3215    
3216     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3217     {
3218     int ds;
3219    
3220     if (d!=s) {
3221     usereg(s);
3222     ds=stackpos(s);
3223     emit_byte(0xd9);
3224     emit_byte(0xc0+ds); /* duplicate source */
3225     emit_byte(0xd9);
3226     emit_byte(0xe0); /* take fchs */
3227     tos_make(d); /* store to destination */
3228     }
3229     else {
3230     make_tos(d);
3231     emit_byte(0xd9);
3232     emit_byte(0xe0); /* take fchs */
3233     }
3234     }
3235     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3236    
3237     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3238     {
3239     int ds;
3240    
3241     usereg(s);
3242     usereg(d);
3243    
3244     if (live.spos[s]==live.tos) {
3245     /* Source is on top of stack */
3246     ds=stackpos(d);
3247     emit_byte(0xdc);
3248     emit_byte(0xc0+ds); /* add source to dest*/
3249     }
3250     else {
3251     make_tos(d);
3252     ds=stackpos(s);
3253    
3254     emit_byte(0xd8);
3255     emit_byte(0xc0+ds); /* add source to dest*/
3256     }
3257     }
3258     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3259    
3260     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3261     {
3262     int ds;
3263    
3264     usereg(s);
3265     usereg(d);
3266    
3267     if (live.spos[s]==live.tos) {
3268     /* Source is on top of stack */
3269     ds=stackpos(d);
3270     emit_byte(0xdc);
3271     emit_byte(0xe8+ds); /* sub source from dest*/
3272     }
3273     else {
3274     make_tos(d);
3275     ds=stackpos(s);
3276    
3277     emit_byte(0xd8);
3278     emit_byte(0xe0+ds); /* sub src from dest */
3279     }
3280     }
3281     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3282    
3283     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3284     {
3285     int ds;
3286    
3287     usereg(s);
3288     usereg(d);
3289    
3290     make_tos(d);
3291     ds=stackpos(s);
3292    
3293     emit_byte(0xdd);
3294     emit_byte(0xe0+ds); /* cmp dest with source*/
3295     }
3296     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3297    
3298     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3299     {
3300     int ds;
3301    
3302     usereg(s);
3303     usereg(d);
3304    
3305     if (live.spos[s]==live.tos) {
3306     /* Source is on top of stack */
3307     ds=stackpos(d);
3308     emit_byte(0xdc);
3309     emit_byte(0xc8+ds); /* mul dest by source*/
3310     }
3311     else {
3312     make_tos(d);
3313     ds=stackpos(s);
3314    
3315     emit_byte(0xd8);
3316     emit_byte(0xc8+ds); /* mul dest by source*/
3317     }
3318     }
3319     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3320    
3321     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3322     {
3323     int ds;
3324    
3325     usereg(s);
3326     usereg(d);
3327    
3328     if (live.spos[s]==live.tos) {
3329     /* Source is on top of stack */
3330     ds=stackpos(d);
3331     emit_byte(0xdc);
3332     emit_byte(0xf8+ds); /* div dest by source */
3333     }
3334     else {
3335     make_tos(d);
3336     ds=stackpos(s);
3337    
3338     emit_byte(0xd8);
3339     emit_byte(0xf0+ds); /* div dest by source*/
3340     }
3341     }
3342     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3343    
3344     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3345     {
3346     int ds;
3347    
3348     usereg(s);
3349     usereg(d);
3350    
3351     make_tos2(d,s);
3352     ds=stackpos(s);
3353    
3354     if (ds!=1) {
3355     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3356     abort();
3357     }
3358     emit_byte(0xd9);
3359     emit_byte(0xf8); /* take rem from dest by source */
3360     }
3361     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3362    
3363     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3364     {
3365     int ds;
3366    
3367     usereg(s);
3368     usereg(d);
3369    
3370     make_tos2(d,s);
3371     ds=stackpos(s);
3372    
3373     if (ds!=1) {
3374     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3375     abort();
3376     }
3377     emit_byte(0xd9);
3378     emit_byte(0xf5); /* take rem1 from dest by source */
3379     }
3380     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3381    
3382    
3383     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3384     {
3385     make_tos(r);
3386     emit_byte(0xd9); /* ftst */
3387     emit_byte(0xe4);
3388     }
3389     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3390    
3391     /* %eax register is clobbered if target processor doesn't support fucomi */
3392     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3393     #define FFLAG_NREG EAX_INDEX
3394    
3395     static __inline__ void raw_fflags_into_flags(int r)
3396     {
3397     int p;
3398    
3399     usereg(r);
3400     p=stackpos(r);
3401    
3402     emit_byte(0xd9);
3403     emit_byte(0xee); /* Push 0 */
3404     emit_byte(0xd9);
3405     emit_byte(0xc9+p); /* swap top two around */
3406     if (have_cmov) {
3407     // gb-- fucomi is for P6 cores only, not K6-2 then...
3408     emit_byte(0xdb);
3409     emit_byte(0xe9+p); /* fucomi them */
3410     }
3411     else {
3412     emit_byte(0xdd);
3413     emit_byte(0xe1+p); /* fucom them */
3414     emit_byte(0x9b);
3415     emit_byte(0xdf);
3416     emit_byte(0xe0); /* fstsw ax */
3417     raw_sahf(0); /* sahf */
3418     }
3419     emit_byte(0xdd);
3420     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3421     }