ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.12
Committed: 2003-03-17T22:37:55Z (21 years, 6 months ago) by gbeauche
Branch: MAIN
Changes since 1.11: +1 -1 lines
Log Message:
clobber "cc" for flags, not "flags". Thanks Milan for noticing it.

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6     * Adaptation for Basilisk II and improvements, copyright 2000-2002
7     * Gwenole Beauchesne
8     *
9     * Basilisk II (C) 1997-2002 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45    
46     /* The register in which subroutines return an integer return value */
47     #define REG_RESULT 0
48    
49     /* The registers subroutines take their first and second argument in */
50     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
51     /* Handle the _fastcall parameters of ECX and EDX */
52     #define REG_PAR1 1
53     #define REG_PAR2 2
54     #else
55     #define REG_PAR1 0
56     #define REG_PAR2 2
57     #endif
58    
59     /* Three registers that are not used for any of the above */
60     #define REG_NOPAR1 6
61     #define REG_NOPAR2 5
62     #define REG_NOPAR3 3
63    
64     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     #define REG_PC_TMP 0
67     #else
68     #define REG_PC_TMP 1 /* Another register that is not the above */
69     #endif
70    
71     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
72     -1 if any reg will do */
73     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
74     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
75    
76     uae_s8 always_used[]={4,-1};
77     uae_s8 can_byte[]={0,1,2,3,-1};
78     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
79    
80     /* cpuopti mutate instruction handlers to assume registers are saved
81     by the caller */
82     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
83    
84     /* This *should* be the same as call_saved. But:
85     - We might not really know which registers are saved, and which aren't,
86     so we need to preserve some, but don't want to rely on everyone else
87     also saving those registers
88     - Special registers (such like the stack pointer) should not be "preserved"
89     by pushing, even though they are "saved" across function calls
90     */
91     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
92    
93     /* Whether classes of instructions do or don't clobber the native flags */
94     #define CLOBBER_MOV
95     #define CLOBBER_LEA
96     #define CLOBBER_CMOV
97     #define CLOBBER_POP
98     #define CLOBBER_PUSH
99     #define CLOBBER_SUB clobber_flags()
100     #define CLOBBER_SBB clobber_flags()
101     #define CLOBBER_CMP clobber_flags()
102     #define CLOBBER_ADD clobber_flags()
103     #define CLOBBER_ADC clobber_flags()
104     #define CLOBBER_AND clobber_flags()
105     #define CLOBBER_OR clobber_flags()
106     #define CLOBBER_XOR clobber_flags()
107    
108     #define CLOBBER_ROL clobber_flags()
109     #define CLOBBER_ROR clobber_flags()
110     #define CLOBBER_SHLL clobber_flags()
111     #define CLOBBER_SHRL clobber_flags()
112     #define CLOBBER_SHRA clobber_flags()
113     #define CLOBBER_TEST clobber_flags()
114     #define CLOBBER_CL16
115     #define CLOBBER_CL8
116     #define CLOBBER_SE16
117     #define CLOBBER_SE8
118     #define CLOBBER_ZE16
119     #define CLOBBER_ZE8
120     #define CLOBBER_SW16 clobber_flags()
121     #define CLOBBER_SW32
122     #define CLOBBER_SETCC
123     #define CLOBBER_MUL clobber_flags()
124     #define CLOBBER_BT clobber_flags()
125     #define CLOBBER_BSF clobber_flags()
126    
127 gbeauche 1.2 const bool optimize_accum = true;
128 gbeauche 1.1 const bool optimize_imm8 = true;
129     const bool optimize_shift_once = true;
130    
131     /*************************************************************************
132     * Actual encoding of the instructions on the target CPU *
133     *************************************************************************/
134    
135 gbeauche 1.2 static __inline__ int isaccum(int r)
136     {
137     return (r == EAX_INDEX);
138     }
139    
140 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
141     {
142     return (x>=-128 && x<=127);
143     }
144    
145     static __inline__ int isword(uae_s32 x)
146     {
147     return (x>=-32768 && x<=32767);
148     }
149    
150     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
151     {
152     emit_byte(0x50+r);
153     }
154     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
155    
156     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
157     {
158     emit_byte(0x58+r);
159     }
160     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
161    
162     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
163     {
164     emit_byte(0x0f);
165     emit_byte(0xba);
166     emit_byte(0xe0+r);
167     emit_byte(i);
168     }
169     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
170    
171     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
172     {
173     emit_byte(0x0f);
174     emit_byte(0xa3);
175     emit_byte(0xc0+8*b+r);
176     }
177     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
178    
179     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
180     {
181     emit_byte(0x0f);
182     emit_byte(0xba);
183     emit_byte(0xf8+r);
184     emit_byte(i);
185     }
186     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
187    
188     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
189     {
190     emit_byte(0x0f);
191     emit_byte(0xbb);
192     emit_byte(0xc0+8*b+r);
193     }
194     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
195    
196    
197     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
198     {
199     emit_byte(0x0f);
200     emit_byte(0xba);
201     emit_byte(0xf0+r);
202     emit_byte(i);
203     }
204     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
205    
206     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
207     {
208     emit_byte(0x0f);
209     emit_byte(0xb3);
210     emit_byte(0xc0+8*b+r);
211     }
212     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
213    
214     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
215     {
216     emit_byte(0x0f);
217     emit_byte(0xba);
218     emit_byte(0xe8+r);
219     emit_byte(i);
220     }
221     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
222    
223     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
224     {
225     emit_byte(0x0f);
226     emit_byte(0xab);
227     emit_byte(0xc0+8*b+r);
228     }
229     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
230    
231     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
232     {
233     emit_byte(0x66);
234     if (isbyte(i)) {
235     emit_byte(0x83);
236     emit_byte(0xe8+d);
237     emit_byte(i);
238     }
239     else {
240 gbeauche 1.2 if (optimize_accum && isaccum(d))
241     emit_byte(0x2d);
242     else {
243 gbeauche 1.1 emit_byte(0x81);
244     emit_byte(0xe8+d);
245 gbeauche 1.2 }
246 gbeauche 1.1 emit_word(i);
247     }
248     }
249     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
250    
251    
252     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
253     {
254     emit_byte(0x8b);
255     emit_byte(0x05+8*d);
256     emit_long(s);
257     }
258     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
259    
260     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
261     {
262     emit_byte(0xc7);
263     emit_byte(0x05);
264     emit_long(d);
265     emit_long(s);
266     }
267     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
268    
269     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
270     {
271     emit_byte(0x66);
272     emit_byte(0xc7);
273     emit_byte(0x05);
274     emit_long(d);
275     emit_word(s);
276     }
277     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
278    
279     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
280     {
281     emit_byte(0xc6);
282     emit_byte(0x05);
283     emit_long(d);
284     emit_byte(s);
285     }
286     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
287    
288     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
289     {
290     if (optimize_shift_once && (i == 1)) {
291     emit_byte(0xd0);
292     emit_byte(0x05);
293     emit_long(d);
294     }
295     else {
296     emit_byte(0xc0);
297     emit_byte(0x05);
298     emit_long(d);
299     emit_byte(i);
300     }
301     }
302     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
303    
304     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
305     {
306     if (optimize_shift_once && (i == 1)) {
307     emit_byte(0xd0);
308     emit_byte(0xc0+r);
309     }
310     else {
311     emit_byte(0xc0);
312     emit_byte(0xc0+r);
313     emit_byte(i);
314     }
315     }
316     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
317    
318     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
319     {
320     emit_byte(0x66);
321     emit_byte(0xc1);
322     emit_byte(0xc0+r);
323     emit_byte(i);
324     }
325     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
326    
327     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
328     {
329     if (optimize_shift_once && (i == 1)) {
330     emit_byte(0xd1);
331     emit_byte(0xc0+r);
332     }
333     else {
334     emit_byte(0xc1);
335     emit_byte(0xc0+r);
336     emit_byte(i);
337     }
338     }
339     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
340    
341     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
342     {
343     emit_byte(0xd3);
344     emit_byte(0xc0+d);
345     }
346     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
347    
348     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
349     {
350     emit_byte(0x66);
351     emit_byte(0xd3);
352     emit_byte(0xc0+d);
353     }
354     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
355    
356     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
357     {
358     emit_byte(0xd2);
359     emit_byte(0xc0+d);
360     }
361     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
362    
363     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
364     {
365     emit_byte(0xd3);
366     emit_byte(0xe0+d);
367     }
368     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
369    
370     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
371     {
372     emit_byte(0x66);
373     emit_byte(0xd3);
374     emit_byte(0xe0+d);
375     }
376     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
377    
378     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
379     {
380     emit_byte(0xd2);
381     emit_byte(0xe0+d);
382     }
383     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
384    
385     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
386     {
387     if (optimize_shift_once && (i == 1)) {
388     emit_byte(0xd0);
389     emit_byte(0xc8+r);
390     }
391     else {
392     emit_byte(0xc0);
393     emit_byte(0xc8+r);
394     emit_byte(i);
395     }
396     }
397     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
398    
399     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
400     {
401     emit_byte(0x66);
402     emit_byte(0xc1);
403     emit_byte(0xc8+r);
404     emit_byte(i);
405     }
406     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
407    
408     // gb-- used for making an fpcr value in compemu_fpp.cpp
409     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
410     {
411     emit_byte(0x0b);
412     emit_byte(0x05+8*d);
413     emit_long(s);
414     }
415     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
416    
417     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
418     {
419     if (optimize_shift_once && (i == 1)) {
420     emit_byte(0xd1);
421     emit_byte(0xc8+r);
422     }
423     else {
424     emit_byte(0xc1);
425     emit_byte(0xc8+r);
426     emit_byte(i);
427     }
428     }
429     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
430    
431     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
432     {
433     emit_byte(0xd3);
434     emit_byte(0xc8+d);
435     }
436     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
437    
438     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
439     {
440     emit_byte(0x66);
441     emit_byte(0xd3);
442     emit_byte(0xc8+d);
443     }
444     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
445    
446     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
447     {
448     emit_byte(0xd2);
449     emit_byte(0xc8+d);
450     }
451     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
452    
453     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
454     {
455     emit_byte(0xd3);
456     emit_byte(0xe8+d);
457     }
458     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
459    
460     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
461     {
462     emit_byte(0x66);
463     emit_byte(0xd3);
464     emit_byte(0xe8+d);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
469     {
470     emit_byte(0xd2);
471     emit_byte(0xe8+d);
472     }
473     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
474    
475     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
476     {
477     emit_byte(0xd3);
478     emit_byte(0xf8+d);
479     }
480     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
481    
482     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
483     {
484     emit_byte(0x66);
485     emit_byte(0xd3);
486     emit_byte(0xf8+d);
487     }
488     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
489    
490     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
491     {
492     emit_byte(0xd2);
493     emit_byte(0xf8+d);
494     }
495     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
496    
497     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
498     {
499     if (optimize_shift_once && (i == 1)) {
500     emit_byte(0xd1);
501     emit_byte(0xe0+r);
502     }
503     else {
504     emit_byte(0xc1);
505     emit_byte(0xe0+r);
506     emit_byte(i);
507     }
508     }
509     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
510    
511     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
512     {
513     emit_byte(0x66);
514     emit_byte(0xc1);
515     emit_byte(0xe0+r);
516     emit_byte(i);
517     }
518     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
519    
520     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
521     {
522     if (optimize_shift_once && (i == 1)) {
523     emit_byte(0xd0);
524     emit_byte(0xe0+r);
525     }
526     else {
527     emit_byte(0xc0);
528     emit_byte(0xe0+r);
529     emit_byte(i);
530     }
531     }
532     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
533    
534     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
535     {
536     if (optimize_shift_once && (i == 1)) {
537     emit_byte(0xd1);
538     emit_byte(0xe8+r);
539     }
540     else {
541     emit_byte(0xc1);
542     emit_byte(0xe8+r);
543     emit_byte(i);
544     }
545     }
546     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
547    
548     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
549     {
550     emit_byte(0x66);
551     emit_byte(0xc1);
552     emit_byte(0xe8+r);
553     emit_byte(i);
554     }
555     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
556    
557     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
558     {
559     if (optimize_shift_once && (i == 1)) {
560     emit_byte(0xd0);
561     emit_byte(0xe8+r);
562     }
563     else {
564     emit_byte(0xc0);
565     emit_byte(0xe8+r);
566     emit_byte(i);
567     }
568     }
569     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
570    
571     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
572     {
573     if (optimize_shift_once && (i == 1)) {
574     emit_byte(0xd1);
575     emit_byte(0xf8+r);
576     }
577     else {
578     emit_byte(0xc1);
579     emit_byte(0xf8+r);
580     emit_byte(i);
581     }
582     }
583     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
584    
585     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
586     {
587     emit_byte(0x66);
588     emit_byte(0xc1);
589     emit_byte(0xf8+r);
590     emit_byte(i);
591     }
592     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
593    
594     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
595     {
596     if (optimize_shift_once && (i == 1)) {
597     emit_byte(0xd0);
598     emit_byte(0xf8+r);
599     }
600     else {
601     emit_byte(0xc0);
602     emit_byte(0xf8+r);
603     emit_byte(i);
604     }
605     }
606     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
607    
608     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
609     {
610     emit_byte(0x9e);
611     }
612     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
613    
614     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
615     {
616     emit_byte(0x0f);
617     emit_byte(0xa2);
618     }
619     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
620    
621     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
622     {
623     emit_byte(0x9f);
624     }
625     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
626    
627     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
628     {
629     emit_byte(0x0f);
630     emit_byte(0x90+cc);
631     emit_byte(0xc0+d);
632     }
633     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
634    
635     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
636     {
637     emit_byte(0x0f);
638     emit_byte(0x90+cc);
639     emit_byte(0x05);
640     emit_long(d);
641     }
642     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
643    
644     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
645     {
646     if (have_cmov) {
647     emit_byte(0x0f);
648     emit_byte(0x40+cc);
649     emit_byte(0xc0+8*d+s);
650     }
651     else { /* replacement using branch and mov */
652     int uncc=(cc^1);
653     emit_byte(0x70+uncc);
654     emit_byte(2); /* skip next 2 bytes if not cc=true */
655     emit_byte(0x89);
656     emit_byte(0xc0+8*s+d);
657     }
658     }
659     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
660    
661     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
662     {
663     emit_byte(0x0f);
664     emit_byte(0xbc);
665     emit_byte(0xc0+8*d+s);
666     }
667     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
668    
669     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
670     {
671     emit_byte(0x0f);
672     emit_byte(0xbf);
673     emit_byte(0xc0+8*d+s);
674     }
675     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
676    
677     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
678     {
679     emit_byte(0x0f);
680     emit_byte(0xbe);
681     emit_byte(0xc0+8*d+s);
682     }
683     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
684    
685     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
686     {
687     emit_byte(0x0f);
688     emit_byte(0xb7);
689     emit_byte(0xc0+8*d+s);
690     }
691     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
692    
693     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
694     {
695     emit_byte(0x0f);
696     emit_byte(0xb6);
697     emit_byte(0xc0+8*d+s);
698     }
699     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
700    
701     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
702     {
703     emit_byte(0x0f);
704     emit_byte(0xaf);
705     emit_byte(0xc0+8*d+s);
706     }
707     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
708    
709     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
710     {
711     if (d!=MUL_NREG1 || s!=MUL_NREG2)
712     abort();
713     emit_byte(0xf7);
714     emit_byte(0xea);
715     }
716     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
717    
718     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
719     {
720     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
721     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
722     abort();
723     }
724     emit_byte(0xf7);
725     emit_byte(0xe2);
726     }
727     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
728    
729     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
730     {
731     abort(); /* %^$&%^$%#^ x86! */
732     emit_byte(0x0f);
733     emit_byte(0xaf);
734     emit_byte(0xc0+8*d+s);
735     }
736     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
737    
738     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
739     {
740     emit_byte(0x88);
741     emit_byte(0xc0+8*s+d);
742     }
743     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
744    
745     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
746     {
747     emit_byte(0x66);
748     emit_byte(0x89);
749     emit_byte(0xc0+8*s+d);
750     }
751     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
752    
753     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
754     {
755     int isebp=(baser==5)?0x40:0;
756     int fi;
757    
758     switch(factor) {
759     case 1: fi=0; break;
760     case 2: fi=1; break;
761     case 4: fi=2; break;
762     case 8: fi=3; break;
763     default: abort();
764     }
765    
766    
767     emit_byte(0x8b);
768     emit_byte(0x04+8*d+isebp);
769     emit_byte(baser+8*index+0x40*fi);
770     if (isebp)
771     emit_byte(0x00);
772     }
773     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
774    
775     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
776     {
777     int fi;
778     int isebp;
779    
780     switch(factor) {
781     case 1: fi=0; break;
782     case 2: fi=1; break;
783     case 4: fi=2; break;
784     case 8: fi=3; break;
785     default: abort();
786     }
787     isebp=(baser==5)?0x40:0;
788    
789     emit_byte(0x66);
790     emit_byte(0x8b);
791     emit_byte(0x04+8*d+isebp);
792     emit_byte(baser+8*index+0x40*fi);
793     if (isebp)
794     emit_byte(0x00);
795     }
796     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
797    
798     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
799     {
800     int fi;
801     int isebp;
802    
803     switch(factor) {
804     case 1: fi=0; break;
805     case 2: fi=1; break;
806     case 4: fi=2; break;
807     case 8: fi=3; break;
808     default: abort();
809     }
810     isebp=(baser==5)?0x40:0;
811    
812     emit_byte(0x8a);
813     emit_byte(0x04+8*d+isebp);
814     emit_byte(baser+8*index+0x40*fi);
815     if (isebp)
816     emit_byte(0x00);
817     }
818     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
819    
820     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
821     {
822     int fi;
823     int isebp;
824    
825     switch(factor) {
826     case 1: fi=0; break;
827     case 2: fi=1; break;
828     case 4: fi=2; break;
829     case 8: fi=3; break;
830     default: abort();
831     }
832    
833    
834     isebp=(baser==5)?0x40:0;
835    
836     emit_byte(0x89);
837     emit_byte(0x04+8*s+isebp);
838     emit_byte(baser+8*index+0x40*fi);
839     if (isebp)
840     emit_byte(0x00);
841     }
842     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
843    
844     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
845     {
846     int fi;
847     int isebp;
848    
849     switch(factor) {
850     case 1: fi=0; break;
851     case 2: fi=1; break;
852     case 4: fi=2; break;
853     case 8: fi=3; break;
854     default: abort();
855     }
856     isebp=(baser==5)?0x40:0;
857    
858     emit_byte(0x66);
859     emit_byte(0x89);
860     emit_byte(0x04+8*s+isebp);
861     emit_byte(baser+8*index+0x40*fi);
862     if (isebp)
863     emit_byte(0x00);
864     }
865     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
866    
867     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
868     {
869     int fi;
870     int isebp;
871    
872     switch(factor) {
873     case 1: fi=0; break;
874     case 2: fi=1; break;
875     case 4: fi=2; break;
876     case 8: fi=3; break;
877     default: abort();
878     }
879     isebp=(baser==5)?0x40:0;
880    
881     emit_byte(0x88);
882     emit_byte(0x04+8*s+isebp);
883     emit_byte(baser+8*index+0x40*fi);
884     if (isebp)
885     emit_byte(0x00);
886     }
887     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
888    
889     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
890     {
891     int fi;
892    
893     switch(factor) {
894     case 1: fi=0; break;
895     case 2: fi=1; break;
896     case 4: fi=2; break;
897     case 8: fi=3; break;
898     default: abort();
899     }
900    
901     emit_byte(0x89);
902     emit_byte(0x84+8*s);
903     emit_byte(baser+8*index+0x40*fi);
904     emit_long(base);
905     }
906     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
907    
908     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
909     {
910     int fi;
911    
912     switch(factor) {
913     case 1: fi=0; break;
914     case 2: fi=1; break;
915     case 4: fi=2; break;
916     case 8: fi=3; break;
917     default: abort();
918     }
919    
920     emit_byte(0x66);
921     emit_byte(0x89);
922     emit_byte(0x84+8*s);
923     emit_byte(baser+8*index+0x40*fi);
924     emit_long(base);
925     }
926     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
927    
928     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
929     {
930     int fi;
931    
932     switch(factor) {
933     case 1: fi=0; break;
934     case 2: fi=1; break;
935     case 4: fi=2; break;
936     case 8: fi=3; break;
937     default: abort();
938     }
939    
940     emit_byte(0x88);
941     emit_byte(0x84+8*s);
942     emit_byte(baser+8*index+0x40*fi);
943     emit_long(base);
944     }
945     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
946    
947     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
948     {
949     int fi;
950    
951     switch(factor) {
952     case 1: fi=0; break;
953     case 2: fi=1; break;
954     case 4: fi=2; break;
955     case 8: fi=3; break;
956     default: abort();
957     }
958    
959     emit_byte(0x8b);
960     emit_byte(0x84+8*d);
961     emit_byte(baser+8*index+0x40*fi);
962     emit_long(base);
963     }
964     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
965    
966     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
967     {
968     int fi;
969    
970     switch(factor) {
971     case 1: fi=0; break;
972     case 2: fi=1; break;
973     case 4: fi=2; break;
974     case 8: fi=3; break;
975     default: abort();
976     }
977    
978     emit_byte(0x66);
979     emit_byte(0x8b);
980     emit_byte(0x84+8*d);
981     emit_byte(baser+8*index+0x40*fi);
982     emit_long(base);
983     }
984     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
985    
986     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
987     {
988     int fi;
989    
990     switch(factor) {
991     case 1: fi=0; break;
992     case 2: fi=1; break;
993     case 4: fi=2; break;
994     case 8: fi=3; break;
995     default: abort();
996     }
997    
998     emit_byte(0x8a);
999     emit_byte(0x84+8*d);
1000     emit_byte(baser+8*index+0x40*fi);
1001     emit_long(base);
1002     }
1003     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
1004    
1005     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1006     {
1007     int fi;
1008     switch(factor) {
1009     case 1: fi=0; break;
1010     case 2: fi=1; break;
1011     case 4: fi=2; break;
1012     case 8: fi=3; break;
1013     default:
1014     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1015     abort();
1016     }
1017     emit_byte(0x8b);
1018     emit_byte(0x04+8*d);
1019     emit_byte(0x05+8*index+64*fi);
1020     emit_long(base);
1021     }
1022     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1023    
1024     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1025     {
1026     int fi;
1027     switch(factor) {
1028     case 1: fi=0; break;
1029     case 2: fi=1; break;
1030     case 4: fi=2; break;
1031     case 8: fi=3; break;
1032     default:
1033     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1034     abort();
1035     }
1036     if (have_cmov) {
1037     emit_byte(0x0f);
1038     emit_byte(0x40+cond);
1039     emit_byte(0x04+8*d);
1040     emit_byte(0x05+8*index+64*fi);
1041     emit_long(base);
1042     }
1043     else { /* replacement using branch and mov */
1044     int uncc=(cond^1);
1045     emit_byte(0x70+uncc);
1046     emit_byte(7); /* skip next 7 bytes if not cc=true */
1047     emit_byte(0x8b);
1048     emit_byte(0x04+8*d);
1049     emit_byte(0x05+8*index+64*fi);
1050     emit_long(base);
1051     }
1052     }
1053     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1054    
1055     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1056     {
1057     if (have_cmov) {
1058     emit_byte(0x0f);
1059     emit_byte(0x40+cond);
1060     emit_byte(0x05+8*d);
1061     emit_long(mem);
1062     }
1063     else { /* replacement using branch and mov */
1064     int uncc=(cond^1);
1065     emit_byte(0x70+uncc);
1066     emit_byte(6); /* skip next 6 bytes if not cc=true */
1067     emit_byte(0x8b);
1068     emit_byte(0x05+8*d);
1069     emit_long(mem);
1070     }
1071     }
1072     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1073    
1074     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1075     {
1076 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1077 gbeauche 1.1 emit_byte(0x8b);
1078     emit_byte(0x40+8*d+s);
1079     emit_byte(offset);
1080     }
1081     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1082    
1083     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1084     {
1085 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1086 gbeauche 1.1 emit_byte(0x66);
1087     emit_byte(0x8b);
1088     emit_byte(0x40+8*d+s);
1089     emit_byte(offset);
1090     }
1091     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1092    
1093     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1094     {
1095 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1096 gbeauche 1.1 emit_byte(0x8a);
1097     emit_byte(0x40+8*d+s);
1098     emit_byte(offset);
1099     }
1100     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1101    
1102     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1103     {
1104     emit_byte(0x8b);
1105     emit_byte(0x80+8*d+s);
1106     emit_long(offset);
1107     }
1108     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1109    
1110     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1111     {
1112     emit_byte(0x66);
1113     emit_byte(0x8b);
1114     emit_byte(0x80+8*d+s);
1115     emit_long(offset);
1116     }
1117     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1118    
1119     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1120     {
1121     emit_byte(0x8a);
1122     emit_byte(0x80+8*d+s);
1123     emit_long(offset);
1124     }
1125     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1126    
1127     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1128     {
1129 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1130 gbeauche 1.1 emit_byte(0xc7);
1131     emit_byte(0x40+d);
1132     emit_byte(offset);
1133     emit_long(i);
1134     }
1135     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1136    
1137     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1138     {
1139 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1140 gbeauche 1.1 emit_byte(0x66);
1141     emit_byte(0xc7);
1142     emit_byte(0x40+d);
1143     emit_byte(offset);
1144     emit_word(i);
1145     }
1146     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1147    
1148     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1149     {
1150 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1151 gbeauche 1.1 emit_byte(0xc6);
1152     emit_byte(0x40+d);
1153     emit_byte(offset);
1154     emit_byte(i);
1155     }
1156     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1157    
1158     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1159     {
1160 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1161 gbeauche 1.1 emit_byte(0x89);
1162     emit_byte(0x40+8*s+d);
1163     emit_byte(offset);
1164     }
1165     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1166    
1167     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1168     {
1169 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1170 gbeauche 1.1 emit_byte(0x66);
1171     emit_byte(0x89);
1172     emit_byte(0x40+8*s+d);
1173     emit_byte(offset);
1174     }
1175     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1176    
1177     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1178     {
1179 gbeauche 1.9 Dif(!isbyte(offset)) abort();
1180 gbeauche 1.1 emit_byte(0x88);
1181     emit_byte(0x40+8*s+d);
1182     emit_byte(offset);
1183     }
1184     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1185    
1186     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1187     {
1188     if (optimize_imm8 && isbyte(offset)) {
1189     emit_byte(0x8d);
1190     emit_byte(0x40+8*d+s);
1191     emit_byte(offset);
1192     }
1193     else {
1194     emit_byte(0x8d);
1195     emit_byte(0x80+8*d+s);
1196     emit_long(offset);
1197     }
1198     }
1199     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1200    
1201     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1202     {
1203     int fi;
1204    
1205     switch(factor) {
1206     case 1: fi=0; break;
1207     case 2: fi=1; break;
1208     case 4: fi=2; break;
1209     case 8: fi=3; break;
1210     default: abort();
1211     }
1212    
1213     if (optimize_imm8 && isbyte(offset)) {
1214     emit_byte(0x8d);
1215     emit_byte(0x44+8*d);
1216     emit_byte(0x40*fi+8*index+s);
1217     emit_byte(offset);
1218     }
1219     else {
1220     emit_byte(0x8d);
1221     emit_byte(0x84+8*d);
1222     emit_byte(0x40*fi+8*index+s);
1223     emit_long(offset);
1224     }
1225     }
1226     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1227    
1228     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1229     {
1230     int isebp=(s==5)?0x40:0;
1231     int fi;
1232    
1233     switch(factor) {
1234     case 1: fi=0; break;
1235     case 2: fi=1; break;
1236     case 4: fi=2; break;
1237     case 8: fi=3; break;
1238     default: abort();
1239     }
1240    
1241     emit_byte(0x8d);
1242     emit_byte(0x04+8*d+isebp);
1243     emit_byte(0x40*fi+8*index+s);
1244     if (isebp)
1245     emit_byte(0);
1246     }
1247     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1248    
1249     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1250     {
1251     if (optimize_imm8 && isbyte(offset)) {
1252     emit_byte(0x89);
1253     emit_byte(0x40+8*s+d);
1254     emit_byte(offset);
1255     }
1256     else {
1257     emit_byte(0x89);
1258     emit_byte(0x80+8*s+d);
1259     emit_long(offset);
1260     }
1261     }
1262     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1263    
1264     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1265     {
1266     emit_byte(0x66);
1267     emit_byte(0x89);
1268     emit_byte(0x80+8*s+d);
1269     emit_long(offset);
1270     }
1271     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1272    
1273     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1274     {
1275     if (optimize_imm8 && isbyte(offset)) {
1276     emit_byte(0x88);
1277     emit_byte(0x40+8*s+d);
1278     emit_byte(offset);
1279     }
1280     else {
1281     emit_byte(0x88);
1282     emit_byte(0x80+8*s+d);
1283     emit_long(offset);
1284     }
1285     }
1286     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1287    
1288     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1289     {
1290     emit_byte(0x0f);
1291     emit_byte(0xc8+r);
1292     }
1293     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1294    
1295     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1296     {
1297     emit_byte(0x66);
1298     emit_byte(0xc1);
1299     emit_byte(0xc0+r);
1300     emit_byte(0x08);
1301     }
1302     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1303    
1304     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1305     {
1306     emit_byte(0x89);
1307     emit_byte(0xc0+8*s+d);
1308     }
1309     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1310    
1311     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1312     {
1313     emit_byte(0x89);
1314     emit_byte(0x05+8*s);
1315     emit_long(d);
1316     }
1317     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1318    
1319     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1320     {
1321     emit_byte(0x66);
1322     emit_byte(0x89);
1323     emit_byte(0x05+8*s);
1324     emit_long(d);
1325     }
1326     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1327    
1328     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1329     {
1330     emit_byte(0x66);
1331     emit_byte(0x8b);
1332     emit_byte(0x05+8*d);
1333     emit_long(s);
1334     }
1335     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1336    
1337     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1338     {
1339     emit_byte(0x88);
1340     emit_byte(0x05+8*s);
1341     emit_long(d);
1342     }
1343     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1344    
1345     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1346     {
1347     emit_byte(0x8a);
1348     emit_byte(0x05+8*d);
1349     emit_long(s);
1350     }
1351     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1352    
1353     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1354     {
1355     emit_byte(0xb8+d);
1356     emit_long(s);
1357     }
1358     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1359    
1360     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1361     {
1362     emit_byte(0x66);
1363     emit_byte(0xb8+d);
1364     emit_word(s);
1365     }
1366     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1367    
1368     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1369     {
1370     emit_byte(0xb0+d);
1371     emit_byte(s);
1372     }
1373     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1374    
1375     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1376     {
1377     emit_byte(0x81);
1378     emit_byte(0x15);
1379     emit_long(d);
1380     emit_long(s);
1381     }
1382     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1383    
1384     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1385     {
1386     if (optimize_imm8 && isbyte(s)) {
1387     emit_byte(0x83);
1388     emit_byte(0x05);
1389     emit_long(d);
1390     emit_byte(s);
1391     }
1392     else {
1393     emit_byte(0x81);
1394     emit_byte(0x05);
1395     emit_long(d);
1396     emit_long(s);
1397     }
1398     }
1399     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1400    
1401     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1402     {
1403     emit_byte(0x66);
1404     emit_byte(0x81);
1405     emit_byte(0x05);
1406     emit_long(d);
1407     emit_word(s);
1408     }
1409     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1410    
1411     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1412     {
1413     emit_byte(0x80);
1414     emit_byte(0x05);
1415     emit_long(d);
1416     emit_byte(s);
1417     }
1418     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1419    
1420     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1421     {
1422 gbeauche 1.2 if (optimize_accum && isaccum(d))
1423     emit_byte(0xa9);
1424     else {
1425 gbeauche 1.1 emit_byte(0xf7);
1426     emit_byte(0xc0+d);
1427 gbeauche 1.2 }
1428 gbeauche 1.1 emit_long(i);
1429     }
1430     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1431    
1432     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1433     {
1434     emit_byte(0x85);
1435     emit_byte(0xc0+8*s+d);
1436     }
1437     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1438    
1439     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1440     {
1441     emit_byte(0x66);
1442     emit_byte(0x85);
1443     emit_byte(0xc0+8*s+d);
1444     }
1445     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1446    
1447     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1448     {
1449     emit_byte(0x84);
1450     emit_byte(0xc0+8*s+d);
1451     }
1452     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1453    
1454     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1455     {
1456     if (optimize_imm8 && isbyte(i)) {
1457 gbeauche 1.2 emit_byte(0x83);
1458     emit_byte(0xe0+d);
1459     emit_byte(i);
1460 gbeauche 1.1 }
1461     else {
1462 gbeauche 1.2 if (optimize_accum && isaccum(d))
1463     emit_byte(0x25);
1464     else {
1465     emit_byte(0x81);
1466     emit_byte(0xe0+d);
1467     }
1468     emit_long(i);
1469 gbeauche 1.1 }
1470     }
1471     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1472    
1473     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1474     {
1475 gbeauche 1.2 emit_byte(0x66);
1476     if (optimize_imm8 && isbyte(i)) {
1477     emit_byte(0x83);
1478     emit_byte(0xe0+d);
1479     emit_byte(i);
1480     }
1481     else {
1482     if (optimize_accum && isaccum(d))
1483     emit_byte(0x25);
1484     else {
1485     emit_byte(0x81);
1486     emit_byte(0xe0+d);
1487     }
1488     emit_word(i);
1489     }
1490 gbeauche 1.1 }
1491     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1492    
1493     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1494     {
1495     emit_byte(0x21);
1496     emit_byte(0xc0+8*s+d);
1497     }
1498     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1499    
1500     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1501     {
1502     emit_byte(0x66);
1503     emit_byte(0x21);
1504     emit_byte(0xc0+8*s+d);
1505     }
1506     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1507    
1508     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1509     {
1510     emit_byte(0x20);
1511     emit_byte(0xc0+8*s+d);
1512     }
1513     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1514    
1515     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1516     {
1517     if (optimize_imm8 && isbyte(i)) {
1518     emit_byte(0x83);
1519     emit_byte(0xc8+d);
1520     emit_byte(i);
1521     }
1522     else {
1523 gbeauche 1.2 if (optimize_accum && isaccum(d))
1524     emit_byte(0x0d);
1525     else {
1526 gbeauche 1.1 emit_byte(0x81);
1527     emit_byte(0xc8+d);
1528 gbeauche 1.2 }
1529 gbeauche 1.1 emit_long(i);
1530     }
1531     }
1532     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1533    
1534     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1535     {
1536     emit_byte(0x09);
1537     emit_byte(0xc0+8*s+d);
1538     }
1539     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1540    
1541     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1542     {
1543     emit_byte(0x66);
1544     emit_byte(0x09);
1545     emit_byte(0xc0+8*s+d);
1546     }
1547     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1548    
1549     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1550     {
1551     emit_byte(0x08);
1552     emit_byte(0xc0+8*s+d);
1553     }
1554     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1555    
1556     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1557     {
1558     emit_byte(0x11);
1559     emit_byte(0xc0+8*s+d);
1560     }
1561     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1562    
1563     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1564     {
1565     emit_byte(0x66);
1566     emit_byte(0x11);
1567     emit_byte(0xc0+8*s+d);
1568     }
1569     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1570    
1571     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1572     {
1573     emit_byte(0x10);
1574     emit_byte(0xc0+8*s+d);
1575     }
1576     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1577    
1578     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1579     {
1580     emit_byte(0x01);
1581     emit_byte(0xc0+8*s+d);
1582     }
1583     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1584    
1585     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1586     {
1587     emit_byte(0x66);
1588     emit_byte(0x01);
1589     emit_byte(0xc0+8*s+d);
1590     }
1591     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1592    
1593     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1594     {
1595     emit_byte(0x00);
1596     emit_byte(0xc0+8*s+d);
1597     }
1598     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1599    
1600     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1601     {
1602     if (isbyte(i)) {
1603     emit_byte(0x83);
1604     emit_byte(0xe8+d);
1605     emit_byte(i);
1606     }
1607     else {
1608 gbeauche 1.2 if (optimize_accum && isaccum(d))
1609     emit_byte(0x2d);
1610     else {
1611 gbeauche 1.1 emit_byte(0x81);
1612     emit_byte(0xe8+d);
1613 gbeauche 1.2 }
1614 gbeauche 1.1 emit_long(i);
1615     }
1616     }
1617     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1618    
1619     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1620     {
1621 gbeauche 1.2 if (optimize_accum && isaccum(d))
1622     emit_byte(0x2c);
1623     else {
1624 gbeauche 1.1 emit_byte(0x80);
1625     emit_byte(0xe8+d);
1626 gbeauche 1.2 }
1627 gbeauche 1.1 emit_byte(i);
1628     }
1629     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1630    
1631     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1632     {
1633     if (isbyte(i)) {
1634     emit_byte(0x83);
1635     emit_byte(0xc0+d);
1636     emit_byte(i);
1637     }
1638     else {
1639 gbeauche 1.2 if (optimize_accum && isaccum(d))
1640     emit_byte(0x05);
1641     else {
1642 gbeauche 1.1 emit_byte(0x81);
1643     emit_byte(0xc0+d);
1644 gbeauche 1.2 }
1645 gbeauche 1.1 emit_long(i);
1646     }
1647     }
1648     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1649    
1650     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1651     {
1652 gbeauche 1.2 emit_byte(0x66);
1653 gbeauche 1.1 if (isbyte(i)) {
1654     emit_byte(0x83);
1655     emit_byte(0xc0+d);
1656     emit_byte(i);
1657     }
1658     else {
1659 gbeauche 1.2 if (optimize_accum && isaccum(d))
1660     emit_byte(0x05);
1661     else {
1662 gbeauche 1.1 emit_byte(0x81);
1663     emit_byte(0xc0+d);
1664 gbeauche 1.2 }
1665 gbeauche 1.1 emit_word(i);
1666     }
1667     }
1668     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1669    
1670     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1671     {
1672 gbeauche 1.2 if (optimize_accum && isaccum(d))
1673     emit_byte(0x04);
1674     else {
1675     emit_byte(0x80);
1676     emit_byte(0xc0+d);
1677     }
1678 gbeauche 1.1 emit_byte(i);
1679     }
1680     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1681    
1682     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1683     {
1684     emit_byte(0x19);
1685     emit_byte(0xc0+8*s+d);
1686     }
1687     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1688    
1689     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1690     {
1691     emit_byte(0x66);
1692     emit_byte(0x19);
1693     emit_byte(0xc0+8*s+d);
1694     }
1695     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1696    
1697     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1698     {
1699     emit_byte(0x18);
1700     emit_byte(0xc0+8*s+d);
1701     }
1702     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1703    
1704     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1705     {
1706     emit_byte(0x29);
1707     emit_byte(0xc0+8*s+d);
1708     }
1709     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1710    
1711     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1712     {
1713     emit_byte(0x66);
1714     emit_byte(0x29);
1715     emit_byte(0xc0+8*s+d);
1716     }
1717     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1718    
1719     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1720     {
1721     emit_byte(0x28);
1722     emit_byte(0xc0+8*s+d);
1723     }
1724     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1725    
1726     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1727     {
1728     emit_byte(0x39);
1729     emit_byte(0xc0+8*s+d);
1730     }
1731     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1732    
1733     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1734     {
1735     if (optimize_imm8 && isbyte(i)) {
1736     emit_byte(0x83);
1737     emit_byte(0xf8+r);
1738     emit_byte(i);
1739     }
1740     else {
1741 gbeauche 1.2 if (optimize_accum && isaccum(r))
1742     emit_byte(0x3d);
1743     else {
1744 gbeauche 1.1 emit_byte(0x81);
1745     emit_byte(0xf8+r);
1746 gbeauche 1.2 }
1747 gbeauche 1.1 emit_long(i);
1748     }
1749     }
1750     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1751    
1752     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1753     {
1754     emit_byte(0x66);
1755     emit_byte(0x39);
1756     emit_byte(0xc0+8*s+d);
1757     }
1758     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1759    
1760 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1761     {
1762     emit_byte(0x80);
1763     emit_byte(0x3d);
1764     emit_long(d);
1765     emit_byte(s);
1766     }
1767     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1768    
1769 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1770     {
1771 gbeauche 1.2 if (optimize_accum && isaccum(d))
1772     emit_byte(0x3c);
1773     else {
1774 gbeauche 1.1 emit_byte(0x80);
1775     emit_byte(0xf8+d);
1776 gbeauche 1.2 }
1777 gbeauche 1.1 emit_byte(i);
1778     }
1779     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1780    
1781     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1782     {
1783     emit_byte(0x38);
1784     emit_byte(0xc0+8*s+d);
1785     }
1786     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1787    
1788     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1789     {
1790     int fi;
1791    
1792     switch(factor) {
1793     case 1: fi=0; break;
1794     case 2: fi=1; break;
1795     case 4: fi=2; break;
1796     case 8: fi=3; break;
1797     default: abort();
1798     }
1799     emit_byte(0x39);
1800     emit_byte(0x04+8*d);
1801     emit_byte(5+8*index+0x40*fi);
1802     emit_long(offset);
1803     }
1804     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1805    
1806     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1807     {
1808     emit_byte(0x31);
1809     emit_byte(0xc0+8*s+d);
1810     }
1811     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1812    
1813     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1814     {
1815     emit_byte(0x66);
1816     emit_byte(0x31);
1817     emit_byte(0xc0+8*s+d);
1818     }
1819     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1820    
1821     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1822     {
1823     emit_byte(0x30);
1824     emit_byte(0xc0+8*s+d);
1825     }
1826     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1827    
1828     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1829     {
1830     if (optimize_imm8 && isbyte(s)) {
1831     emit_byte(0x83);
1832     emit_byte(0x2d);
1833     emit_long(d);
1834     emit_byte(s);
1835     }
1836     else {
1837     emit_byte(0x81);
1838     emit_byte(0x2d);
1839     emit_long(d);
1840     emit_long(s);
1841     }
1842     }
1843     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1844    
1845     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1846     {
1847     if (optimize_imm8 && isbyte(s)) {
1848     emit_byte(0x83);
1849     emit_byte(0x3d);
1850     emit_long(d);
1851     emit_byte(s);
1852     }
1853     else {
1854     emit_byte(0x81);
1855     emit_byte(0x3d);
1856     emit_long(d);
1857     emit_long(s);
1858     }
1859     }
1860     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1861    
1862     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1863     {
1864     emit_byte(0x87);
1865     emit_byte(0xc0+8*r1+r2);
1866     }
1867     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1868    
1869     /*************************************************************************
1870     * FIXME: string-related instructions *
1871     *************************************************************************/
1872    
1873     LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1874     {
1875     emit_byte(0xfc);
1876     }
1877     LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1878    
1879     LOWFUNC(WRITE,NONE,0,raw_std,(void))
1880     {
1881     emit_byte(0xfd);
1882     }
1883     LENDFUNC(WRITE,NONE,0,raw_std,(void))
1884    
1885     LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1886     {
1887     emit_byte(0xa4);
1888     }
1889     LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1890    
1891     LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1892     {
1893     emit_byte(0xa5);
1894     }
1895     LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1896    
1897     LOWFUNC(NONE,RMW,0,raw_rep,(void))
1898     {
1899     emit_byte(0xf3);
1900     }
1901     LENDFUNC(NONE,RMW,0,raw_rep,(void))
1902    
1903     LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1904     {
1905     raw_rep();
1906     raw_movs_b();
1907     }
1908     LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1909    
1910     LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1911     {
1912     raw_rep();
1913     raw_movs_l();
1914     }
1915     LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1916    
1917     /*************************************************************************
1918     * FIXME: mem access modes probably wrong *
1919     *************************************************************************/
1920    
1921     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1922     {
1923     emit_byte(0x9c);
1924     }
1925     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1926    
1927     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1928     {
1929     emit_byte(0x9d);
1930     }
1931     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1932    
1933     /*************************************************************************
1934     * Unoptimizable stuff --- jump *
1935     *************************************************************************/
1936    
1937     static __inline__ void raw_call_r(R4 r)
1938     {
1939     emit_byte(0xff);
1940     emit_byte(0xd0+r);
1941 gbeauche 1.5 }
1942    
1943     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1944     {
1945     int mu;
1946     switch(m) {
1947     case 1: mu=0; break;
1948     case 2: mu=1; break;
1949     case 4: mu=2; break;
1950     case 8: mu=3; break;
1951     default: abort();
1952     }
1953     emit_byte(0xff);
1954     emit_byte(0x14);
1955     emit_byte(0x05+8*r+0x40*mu);
1956     emit_long(base);
1957 gbeauche 1.1 }
1958    
1959     static __inline__ void raw_jmp_r(R4 r)
1960     {
1961     emit_byte(0xff);
1962     emit_byte(0xe0+r);
1963     }
1964    
1965     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1966     {
1967     int mu;
1968     switch(m) {
1969     case 1: mu=0; break;
1970     case 2: mu=1; break;
1971     case 4: mu=2; break;
1972     case 8: mu=3; break;
1973     default: abort();
1974     }
1975     emit_byte(0xff);
1976     emit_byte(0x24);
1977     emit_byte(0x05+8*r+0x40*mu);
1978     emit_long(base);
1979     }
1980    
1981     static __inline__ void raw_jmp_m(uae_u32 base)
1982     {
1983     emit_byte(0xff);
1984     emit_byte(0x25);
1985     emit_long(base);
1986     }
1987    
1988    
1989     static __inline__ void raw_call(uae_u32 t)
1990     {
1991     emit_byte(0xe8);
1992     emit_long(t-(uae_u32)target-4);
1993     }
1994    
1995     static __inline__ void raw_jmp(uae_u32 t)
1996     {
1997     emit_byte(0xe9);
1998     emit_long(t-(uae_u32)target-4);
1999     }
2000    
2001     static __inline__ void raw_jl(uae_u32 t)
2002     {
2003     emit_byte(0x0f);
2004     emit_byte(0x8c);
2005     emit_long(t-(uae_u32)target-4);
2006     }
2007    
2008     static __inline__ void raw_jz(uae_u32 t)
2009     {
2010     emit_byte(0x0f);
2011     emit_byte(0x84);
2012     emit_long(t-(uae_u32)target-4);
2013     }
2014    
2015     static __inline__ void raw_jnz(uae_u32 t)
2016     {
2017     emit_byte(0x0f);
2018     emit_byte(0x85);
2019     emit_long(t-(uae_u32)target-4);
2020     }
2021    
2022     static __inline__ void raw_jnz_l_oponly(void)
2023     {
2024     emit_byte(0x0f);
2025     emit_byte(0x85);
2026     }
2027    
2028     static __inline__ void raw_jcc_l_oponly(int cc)
2029     {
2030     emit_byte(0x0f);
2031     emit_byte(0x80+cc);
2032     }
2033    
2034     static __inline__ void raw_jnz_b_oponly(void)
2035     {
2036     emit_byte(0x75);
2037     }
2038    
2039     static __inline__ void raw_jz_b_oponly(void)
2040     {
2041     emit_byte(0x74);
2042     }
2043    
2044     static __inline__ void raw_jcc_b_oponly(int cc)
2045     {
2046     emit_byte(0x70+cc);
2047     }
2048    
2049     static __inline__ void raw_jmp_l_oponly(void)
2050     {
2051     emit_byte(0xe9);
2052     }
2053    
2054     static __inline__ void raw_jmp_b_oponly(void)
2055     {
2056     emit_byte(0xeb);
2057     }
2058    
2059     static __inline__ void raw_ret(void)
2060     {
2061     emit_byte(0xc3);
2062     }
2063    
2064     static __inline__ void raw_nop(void)
2065     {
2066     emit_byte(0x90);
2067     }
2068    
2069 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
2070     {
2071     /* Source: GNU Binutils 2.12.90.0.15 */
2072     /* Various efficient no-op patterns for aligning code labels.
2073     Note: Don't try to assemble the instructions in the comments.
2074     0L and 0w are not legal. */
2075     static const uae_u8 f32_1[] =
2076     {0x90}; /* nop */
2077     static const uae_u8 f32_2[] =
2078     {0x89,0xf6}; /* movl %esi,%esi */
2079     static const uae_u8 f32_3[] =
2080     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
2081     static const uae_u8 f32_4[] =
2082     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
2083     static const uae_u8 f32_5[] =
2084     {0x90, /* nop */
2085     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
2086     static const uae_u8 f32_6[] =
2087     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
2088     static const uae_u8 f32_7[] =
2089     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
2090     static const uae_u8 f32_8[] =
2091     {0x90, /* nop */
2092     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
2093     static const uae_u8 f32_9[] =
2094     {0x89,0xf6, /* movl %esi,%esi */
2095     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2096     static const uae_u8 f32_10[] =
2097     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
2098     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2099     static const uae_u8 f32_11[] =
2100     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
2101     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2102     static const uae_u8 f32_12[] =
2103     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
2104     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
2105     static const uae_u8 f32_13[] =
2106     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
2107     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2108     static const uae_u8 f32_14[] =
2109     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
2110     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2111     static const uae_u8 f32_15[] =
2112     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
2113     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
2114     static const uae_u8 f32_16[] =
2115     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
2116     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
2117     static const uae_u8 *const f32_patt[] = {
2118     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
2119     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
2120     };
2121    
2122     int nloops = nbytes / 16;
2123     while (nloops-- > 0)
2124     emit_block(f32_16, sizeof(f32_16));
2125    
2126     nbytes %= 16;
2127     if (nbytes)
2128     emit_block(f32_patt[nbytes - 1], nbytes);
2129     }
2130    
2131 gbeauche 1.1
2132     /*************************************************************************
2133     * Flag handling, to and fro UAE flag register *
2134     *************************************************************************/
2135    
2136     #ifdef SAHF_SETO_PROFITABLE
2137    
2138     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
2139    
2140     static __inline__ void raw_flags_to_reg(int r)
2141     {
2142     raw_lahf(0); /* Most flags in AH */
2143     //raw_setcc(r,0); /* V flag in AL */
2144     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2145    
2146     #if 1 /* Let's avoid those nasty partial register stalls */
2147     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2148     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
2149     //live.state[FLAGTMP].status=CLEAN;
2150     live.state[FLAGTMP].status=INMEM;
2151     live.state[FLAGTMP].realreg=-1;
2152     /* We just "evicted" FLAGTMP. */
2153     if (live.nat[r].nholds!=1) {
2154     /* Huh? */
2155     abort();
2156     }
2157     live.nat[r].nholds=0;
2158     #endif
2159     }
2160    
2161     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
2162     static __inline__ void raw_reg_to_flags(int r)
2163     {
2164     raw_cmp_b_ri(r,-127); /* set V */
2165     raw_sahf(0);
2166     }
2167    
2168     #else
2169    
2170     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
2171     static __inline__ void raw_flags_to_reg(int r)
2172     {
2173     raw_pushfl();
2174     raw_pop_l_r(r);
2175     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
2176     // live.state[FLAGTMP].status=CLEAN;
2177     live.state[FLAGTMP].status=INMEM;
2178     live.state[FLAGTMP].realreg=-1;
2179     /* We just "evicted" FLAGTMP. */
2180     if (live.nat[r].nholds!=1) {
2181     /* Huh? */
2182     abort();
2183     }
2184     live.nat[r].nholds=0;
2185     }
2186    
2187     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2188     static __inline__ void raw_reg_to_flags(int r)
2189     {
2190     raw_push_l_r(r);
2191     raw_popfl();
2192     }
2193    
2194     #endif
2195    
2196     /* Apparently, there are enough instructions between flag store and
2197     flag reload to avoid the partial memory stall */
2198     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2199     {
2200     #if 1
2201     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2202     #else
2203     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2204     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2205     #endif
2206     }
2207    
2208     /* FLAGX is byte sized, and we *do* write it at that size */
2209     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2210     {
2211     if (live.nat[target].canbyte)
2212     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2213     else if (live.nat[target].canword)
2214     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2215     else
2216     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2217     }
2218    
2219 gbeauche 1.11 #define NATIVE_FLAG_Z 0x40
2220     static __inline__ void raw_flags_set_zero(int f, int r, int t)
2221     {
2222     // FIXME: this is really suboptimal
2223     raw_pushfl();
2224     raw_pop_l_r(f);
2225     raw_and_l_ri(f,~NATIVE_FLAG_Z);
2226     raw_test_l_rr(r,r);
2227     raw_mov_l_ri(r,0);
2228     raw_mov_l_ri(t,NATIVE_FLAG_Z);
2229     raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
2230     raw_or_l(f,r);
2231     raw_push_l_r(f);
2232     raw_popfl();
2233     }
2234 gbeauche 1.1
2235     static __inline__ void raw_inc_sp(int off)
2236     {
2237 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
2238 gbeauche 1.1 }
2239    
2240     /*************************************************************************
2241     * Handling mistaken direct memory access *
2242     *************************************************************************/
2243    
2244     // gb-- I don't need that part for JIT Basilisk II
2245     #if defined(NATMEM_OFFSET) && 0
2246     #include <asm/sigcontext.h>
2247     #include <signal.h>
2248    
2249     #define SIG_READ 1
2250     #define SIG_WRITE 2
2251    
2252     static int in_handler=0;
2253     static uae_u8 veccode[256];
2254    
2255     static void vec(int x, struct sigcontext sc)
2256     {
2257     uae_u8* i=(uae_u8*)sc.eip;
2258     uae_u32 addr=sc.cr2;
2259     int r=-1;
2260     int size=4;
2261     int dir=-1;
2262     int len=0;
2263     int j;
2264    
2265     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2266     if (!canbang)
2267     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2268     if (in_handler)
2269     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2270    
2271     if (canbang && i>=compiled_code && i<=current_compile_p) {
2272     if (*i==0x66) {
2273     i++;
2274     size=2;
2275     len++;
2276     }
2277    
2278     switch(i[0]) {
2279     case 0x8a:
2280     if ((i[1]&0xc0)==0x80) {
2281     r=(i[1]>>3)&7;
2282     dir=SIG_READ;
2283     size=1;
2284     len+=6;
2285     break;
2286     }
2287     break;
2288     case 0x88:
2289     if ((i[1]&0xc0)==0x80) {
2290     r=(i[1]>>3)&7;
2291     dir=SIG_WRITE;
2292     size=1;
2293     len+=6;
2294     break;
2295     }
2296     break;
2297     case 0x8b:
2298     if ((i[1]&0xc0)==0x80) {
2299     r=(i[1]>>3)&7;
2300     dir=SIG_READ;
2301     len+=6;
2302     break;
2303     }
2304     if ((i[1]&0xc0)==0x40) {
2305     r=(i[1]>>3)&7;
2306     dir=SIG_READ;
2307     len+=3;
2308     break;
2309     }
2310     break;
2311     case 0x89:
2312     if ((i[1]&0xc0)==0x80) {
2313     r=(i[1]>>3)&7;
2314     dir=SIG_WRITE;
2315     len+=6;
2316     break;
2317     }
2318     if ((i[1]&0xc0)==0x40) {
2319     r=(i[1]>>3)&7;
2320     dir=SIG_WRITE;
2321     len+=3;
2322     break;
2323     }
2324     break;
2325     }
2326     }
2327    
2328     if (r!=-1) {
2329     void* pr=NULL;
2330     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2331    
2332     switch(r) {
2333     case 0: pr=&(sc.eax); break;
2334     case 1: pr=&(sc.ecx); break;
2335     case 2: pr=&(sc.edx); break;
2336     case 3: pr=&(sc.ebx); break;
2337     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2338     case 5: pr=(size>1)?
2339     (void*)(&(sc.ebp)):
2340     (void*)(((uae_u8*)&(sc.ecx))+1); break;
2341     case 6: pr=(size>1)?
2342     (void*)(&(sc.esi)):
2343     (void*)(((uae_u8*)&(sc.edx))+1); break;
2344     case 7: pr=(size>1)?
2345     (void*)(&(sc.edi)):
2346     (void*)(((uae_u8*)&(sc.ebx))+1); break;
2347     default: abort();
2348     }
2349     if (pr) {
2350     blockinfo* bi;
2351    
2352     if (currprefs.comp_oldsegv) {
2353     addr-=NATMEM_OFFSET;
2354    
2355     if ((addr>=0x10000000 && addr<0x40000000) ||
2356     (addr>=0x50000000)) {
2357     write_log("Suspicious address in %x SEGV handler.\n",addr);
2358     }
2359     if (dir==SIG_READ) {
2360     switch(size) {
2361     case 1: *((uae_u8*)pr)=get_byte(addr); break;
2362     case 2: *((uae_u16*)pr)=get_word(addr); break;
2363     case 4: *((uae_u32*)pr)=get_long(addr); break;
2364     default: abort();
2365     }
2366     }
2367     else { /* write */
2368     switch(size) {
2369     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2370     case 2: put_word(addr,*((uae_u16*)pr)); break;
2371     case 4: put_long(addr,*((uae_u32*)pr)); break;
2372     default: abort();
2373     }
2374     }
2375     write_log("Handled one access!\n");
2376     fflush(stdout);
2377     segvcount++;
2378     sc.eip+=len;
2379     }
2380     else {
2381     void* tmp=target;
2382     int i;
2383     uae_u8 vecbuf[5];
2384    
2385     addr-=NATMEM_OFFSET;
2386    
2387     if ((addr>=0x10000000 && addr<0x40000000) ||
2388     (addr>=0x50000000)) {
2389     write_log("Suspicious address in %x SEGV handler.\n",addr);
2390     }
2391    
2392     target=(uae_u8*)sc.eip;
2393     for (i=0;i<5;i++)
2394     vecbuf[i]=target[i];
2395     emit_byte(0xe9);
2396     emit_long((uae_u32)veccode-(uae_u32)target-4);
2397     write_log("Create jump to %p\n",veccode);
2398    
2399     write_log("Handled one access!\n");
2400     fflush(stdout);
2401     segvcount++;
2402    
2403     target=veccode;
2404    
2405     if (dir==SIG_READ) {
2406     switch(size) {
2407     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2408     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2409     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2410     default: abort();
2411     }
2412     }
2413     else { /* write */
2414     switch(size) {
2415     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2416     case 2: put_word(addr,*((uae_u16*)pr)); break;
2417     case 4: put_long(addr,*((uae_u32*)pr)); break;
2418     default: abort();
2419     }
2420     }
2421     for (i=0;i<5;i++)
2422     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2423     raw_mov_l_mi((uae_u32)&in_handler,0);
2424     emit_byte(0xe9);
2425     emit_long(sc.eip+len-(uae_u32)target-4);
2426     in_handler=1;
2427     target=tmp;
2428     }
2429     bi=active;
2430     while (bi) {
2431     if (bi->handler &&
2432     (uae_u8*)bi->direct_handler<=i &&
2433     (uae_u8*)bi->nexthandler>i) {
2434     write_log("deleted trigger (%p<%p<%p) %p\n",
2435     bi->handler,
2436     i,
2437     bi->nexthandler,
2438     bi->pc_p);
2439     invalidate_block(bi);
2440     raise_in_cl_list(bi);
2441     set_special(0);
2442     return;
2443     }
2444     bi=bi->next;
2445     }
2446     /* Not found in the active list. Might be a rom routine that
2447     is in the dormant list */
2448     bi=dormant;
2449     while (bi) {
2450     if (bi->handler &&
2451     (uae_u8*)bi->direct_handler<=i &&
2452     (uae_u8*)bi->nexthandler>i) {
2453     write_log("deleted trigger (%p<%p<%p) %p\n",
2454     bi->handler,
2455     i,
2456     bi->nexthandler,
2457     bi->pc_p);
2458     invalidate_block(bi);
2459     raise_in_cl_list(bi);
2460     set_special(0);
2461     return;
2462     }
2463     bi=bi->next;
2464     }
2465     write_log("Huh? Could not find trigger!\n");
2466     return;
2467     }
2468     }
2469     write_log("Can't handle access!\n");
2470     for (j=0;j<10;j++) {
2471     write_log("instruction byte %2d is %02x\n",j,i[j]);
2472     }
2473     write_log("Please send the above info (starting at \"fault address\") to\n"
2474     "bmeyer@csse.monash.edu.au\n"
2475     "This shouldn't happen ;-)\n");
2476     fflush(stdout);
2477     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2478     }
2479     #endif
2480    
2481    
2482     /*************************************************************************
2483     * Checking for CPU features *
2484     *************************************************************************/
2485    
2486 gbeauche 1.3 struct cpuinfo_x86 {
2487     uae_u8 x86; // CPU family
2488     uae_u8 x86_vendor; // CPU vendor
2489     uae_u8 x86_processor; // CPU canonical processor type
2490     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
2491     uae_u32 x86_hwcap;
2492     uae_u8 x86_model;
2493     uae_u8 x86_mask;
2494     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
2495     char x86_vendor_id[16];
2496     };
2497     struct cpuinfo_x86 cpuinfo;
2498    
2499     enum {
2500     X86_VENDOR_INTEL = 0,
2501     X86_VENDOR_CYRIX = 1,
2502     X86_VENDOR_AMD = 2,
2503     X86_VENDOR_UMC = 3,
2504     X86_VENDOR_NEXGEN = 4,
2505     X86_VENDOR_CENTAUR = 5,
2506     X86_VENDOR_RISE = 6,
2507     X86_VENDOR_TRANSMETA = 7,
2508     X86_VENDOR_NSC = 8,
2509     X86_VENDOR_UNKNOWN = 0xff
2510     };
2511    
2512     enum {
2513     X86_PROCESSOR_I386, /* 80386 */
2514     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
2515     X86_PROCESSOR_PENTIUM,
2516     X86_PROCESSOR_PENTIUMPRO,
2517     X86_PROCESSOR_K6,
2518     X86_PROCESSOR_ATHLON,
2519     X86_PROCESSOR_PENTIUM4,
2520     X86_PROCESSOR_max
2521     };
2522    
2523     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2524     "80386",
2525     "80486",
2526     "Pentium",
2527     "PentiumPro",
2528     "K6",
2529     "Athlon",
2530     "Pentium4"
2531     };
2532    
2533     static struct ptt {
2534     const int align_loop;
2535     const int align_loop_max_skip;
2536     const int align_jump;
2537     const int align_jump_max_skip;
2538     const int align_func;
2539     }
2540     x86_alignments[X86_PROCESSOR_max] = {
2541     { 4, 3, 4, 3, 4 },
2542     { 16, 15, 16, 15, 16 },
2543     { 16, 7, 16, 7, 16 },
2544     { 16, 15, 16, 7, 16 },
2545     { 32, 7, 32, 7, 32 },
2546 gbeauche 1.4 { 16, 7, 16, 7, 16 },
2547 gbeauche 1.3 { 0, 0, 0, 0, 0 }
2548     };
2549 gbeauche 1.1
2550 gbeauche 1.3 static void
2551     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2552 gbeauche 1.1 {
2553 gbeauche 1.3 char *v = c->x86_vendor_id;
2554    
2555     if (!strcmp(v, "GenuineIntel"))
2556     c->x86_vendor = X86_VENDOR_INTEL;
2557     else if (!strcmp(v, "AuthenticAMD"))
2558     c->x86_vendor = X86_VENDOR_AMD;
2559     else if (!strcmp(v, "CyrixInstead"))
2560     c->x86_vendor = X86_VENDOR_CYRIX;
2561     else if (!strcmp(v, "Geode by NSC"))
2562     c->x86_vendor = X86_VENDOR_NSC;
2563     else if (!strcmp(v, "UMC UMC UMC "))
2564     c->x86_vendor = X86_VENDOR_UMC;
2565     else if (!strcmp(v, "CentaurHauls"))
2566     c->x86_vendor = X86_VENDOR_CENTAUR;
2567     else if (!strcmp(v, "NexGenDriven"))
2568     c->x86_vendor = X86_VENDOR_NEXGEN;
2569     else if (!strcmp(v, "RiseRiseRise"))
2570     c->x86_vendor = X86_VENDOR_RISE;
2571     else if (!strcmp(v, "GenuineTMx86") ||
2572     !strcmp(v, "TransmetaCPU"))
2573     c->x86_vendor = X86_VENDOR_TRANSMETA;
2574     else
2575     c->x86_vendor = X86_VENDOR_UNKNOWN;
2576     }
2577 gbeauche 1.1
2578 gbeauche 1.3 static void
2579     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2580     {
2581     static uae_u8 cpuid_space[256];
2582     uae_u8* tmp=get_target();
2583 gbeauche 1.1
2584 gbeauche 1.3 set_target(cpuid_space);
2585     raw_push_l_r(0); /* eax */
2586     raw_push_l_r(1); /* ecx */
2587     raw_push_l_r(2); /* edx */
2588     raw_push_l_r(3); /* ebx */
2589     raw_mov_l_rm(0,(uae_u32)&op);
2590     raw_cpuid(0);
2591     if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2592     if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2593     if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2594     if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2595     raw_pop_l_r(3);
2596     raw_pop_l_r(2);
2597     raw_pop_l_r(1);
2598     raw_pop_l_r(0);
2599     raw_ret();
2600     set_target(tmp);
2601 gbeauche 1.1
2602 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
2603 gbeauche 1.1 }
2604    
2605 gbeauche 1.3 static void
2606     raw_init_cpu(void)
2607 gbeauche 1.1 {
2608 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
2609    
2610     /* Defaults */
2611     c->x86_vendor = X86_VENDOR_UNKNOWN;
2612     c->cpuid_level = -1; /* CPUID not detected */
2613     c->x86_model = c->x86_mask = 0; /* So far unknown... */
2614     c->x86_vendor_id[0] = '\0'; /* Unset */
2615     c->x86_hwcap = 0;
2616    
2617     /* Get vendor name */
2618     c->x86_vendor_id[12] = '\0';
2619     cpuid(0x00000000,
2620     (uae_u32 *)&c->cpuid_level,
2621     (uae_u32 *)&c->x86_vendor_id[0],
2622     (uae_u32 *)&c->x86_vendor_id[8],
2623     (uae_u32 *)&c->x86_vendor_id[4]);
2624     x86_get_cpu_vendor(c);
2625    
2626     /* Intel-defined flags: level 0x00000001 */
2627     c->x86_brand_id = 0;
2628     if ( c->cpuid_level >= 0x00000001 ) {
2629     uae_u32 tfms, brand_id;
2630     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2631     c->x86 = (tfms >> 8) & 15;
2632     c->x86_model = (tfms >> 4) & 15;
2633     c->x86_brand_id = brand_id & 0xff;
2634     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2635     (c->x86 == 0xf)) {
2636     /* AMD Extended Family and Model Values */
2637     c->x86 += (tfms >> 20) & 0xff;
2638     c->x86_model += (tfms >> 12) & 0xf0;
2639     }
2640     c->x86_mask = tfms & 15;
2641     } else {
2642     /* Have CPUID level 0 only - unheard of */
2643     c->x86 = 4;
2644     }
2645    
2646     /* Canonicalize processor ID */
2647     c->x86_processor = X86_PROCESSOR_max;
2648     switch (c->x86) {
2649     case 3:
2650     c->x86_processor = X86_PROCESSOR_I386;
2651     break;
2652     case 4:
2653     c->x86_processor = X86_PROCESSOR_I486;
2654     break;
2655     case 5:
2656     if (c->x86_vendor == X86_VENDOR_AMD)
2657     c->x86_processor = X86_PROCESSOR_K6;
2658     else
2659     c->x86_processor = X86_PROCESSOR_PENTIUM;
2660     break;
2661     case 6:
2662     if (c->x86_vendor == X86_VENDOR_AMD)
2663     c->x86_processor = X86_PROCESSOR_ATHLON;
2664     else
2665     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2666     break;
2667     case 15:
2668     if (c->x86_vendor == X86_VENDOR_INTEL) {
2669     /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2670     if (c->x86_brand_id >= 8)
2671     c->x86_processor = X86_PROCESSOR_PENTIUM4;
2672     }
2673     break;
2674     }
2675     if (c->x86_processor == X86_PROCESSOR_max) {
2676     fprintf(stderr, "Error: unknown processor type\n");
2677     fprintf(stderr, " Family : %d\n", c->x86);
2678     fprintf(stderr, " Model : %d\n", c->x86_model);
2679     fprintf(stderr, " Mask : %d\n", c->x86_mask);
2680     if (c->x86_brand_id)
2681     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
2682     abort();
2683     }
2684    
2685     /* Have CMOV support? */
2686     have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2687    
2688     /* Can the host CPU suffer from partial register stalls? */
2689     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2690     #if 1
2691     /* It appears that partial register writes are a bad idea even on
2692 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
2693     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2694 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
2695     have_rat_stall = true;
2696 gbeauche 1.1 #endif
2697 gbeauche 1.3
2698     /* Alignments */
2699     if (tune_alignment) {
2700     align_loops = x86_alignments[c->x86_processor].align_loop;
2701     align_jumps = x86_alignments[c->x86_processor].align_jump;
2702     }
2703    
2704     write_log("Max CPUID level=%d Processor is %s [%s]\n",
2705     c->cpuid_level, c->x86_vendor_id,
2706     x86_processor_string_table[c->x86_processor]);
2707 gbeauche 1.1 }
2708    
2709 gbeauche 1.10 static bool target_check_bsf(void)
2710     {
2711     bool mismatch = false;
2712     for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
2713     for (int g_CF = 0; g_CF <= 1; g_CF++) {
2714     for (int g_OF = 0; g_OF <= 1; g_OF++) {
2715     for (int g_SF = 0; g_SF <= 1; g_SF++) {
2716     for (int value = -1; value <= 1; value++) {
2717     int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
2718     int tmp = value;
2719     __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
2720 gbeauche 1.12 : "+r" (flags), "+r" (tmp) : : "cc");
2721 gbeauche 1.10 int OF = (flags >> 11) & 1;
2722     int SF = (flags >> 7) & 1;
2723     int ZF = (flags >> 6) & 1;
2724     int CF = flags & 1;
2725     tmp = (value == 0);
2726     if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
2727     mismatch = true;
2728     }
2729     }}}}
2730     if (mismatch)
2731     write_log("Target CPU defines all flags on BSF instruction\n");
2732     return !mismatch;
2733     }
2734    
2735 gbeauche 1.1
2736     /*************************************************************************
2737     * FPU stuff *
2738     *************************************************************************/
2739    
2740    
2741     static __inline__ void raw_fp_init(void)
2742     {
2743     int i;
2744    
2745     for (i=0;i<N_FREGS;i++)
2746     live.spos[i]=-2;
2747     live.tos=-1; /* Stack is empty */
2748     }
2749    
2750     static __inline__ void raw_fp_cleanup_drop(void)
2751     {
2752     #if 0
2753     /* using FINIT instead of popping all the entries.
2754     Seems to have side effects --- there is display corruption in
2755     Quake when this is used */
2756     if (live.tos>1) {
2757     emit_byte(0x9b);
2758     emit_byte(0xdb);
2759     emit_byte(0xe3);
2760     live.tos=-1;
2761     }
2762     #endif
2763     while (live.tos>=1) {
2764     emit_byte(0xde);
2765     emit_byte(0xd9);
2766     live.tos-=2;
2767     }
2768     while (live.tos>=0) {
2769     emit_byte(0xdd);
2770     emit_byte(0xd8);
2771     live.tos--;
2772     }
2773     raw_fp_init();
2774     }
2775    
2776     static __inline__ void make_tos(int r)
2777     {
2778     int p,q;
2779    
2780     if (live.spos[r]<0) { /* Register not yet on stack */
2781     emit_byte(0xd9);
2782     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2783     live.tos++;
2784     live.spos[r]=live.tos;
2785     live.onstack[live.tos]=r;
2786     return;
2787     }
2788     /* Register is on stack */
2789     if (live.tos==live.spos[r])
2790     return;
2791     p=live.spos[r];
2792     q=live.onstack[live.tos];
2793    
2794     emit_byte(0xd9);
2795     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2796     live.onstack[live.tos]=r;
2797     live.spos[r]=live.tos;
2798     live.onstack[p]=q;
2799     live.spos[q]=p;
2800     }
2801    
2802     static __inline__ void make_tos2(int r, int r2)
2803     {
2804     int q;
2805    
2806     make_tos(r2); /* Put the reg that's supposed to end up in position2
2807     on top */
2808    
2809     if (live.spos[r]<0) { /* Register not yet on stack */
2810     make_tos(r); /* This will extend the stack */
2811     return;
2812     }
2813     /* Register is on stack */
2814     emit_byte(0xd9);
2815     emit_byte(0xc9); /* Move r2 into position 2 */
2816    
2817     q=live.onstack[live.tos-1];
2818     live.onstack[live.tos]=q;
2819     live.spos[q]=live.tos;
2820     live.onstack[live.tos-1]=r2;
2821     live.spos[r2]=live.tos-1;
2822    
2823     make_tos(r); /* And r into 1 */
2824     }
2825    
2826     static __inline__ int stackpos(int r)
2827     {
2828     if (live.spos[r]<0)
2829     abort();
2830     if (live.tos<live.spos[r]) {
2831     printf("Looking for spos for fnreg %d\n",r);
2832     abort();
2833     }
2834     return live.tos-live.spos[r];
2835     }
2836    
2837     static __inline__ void usereg(int r)
2838     {
2839     if (live.spos[r]<0)
2840     make_tos(r);
2841     }
2842    
2843     /* This is called with one FP value in a reg *above* tos, which it will
2844     pop off the stack if necessary */
2845     static __inline__ void tos_make(int r)
2846     {
2847     if (live.spos[r]<0) {
2848     live.tos++;
2849     live.spos[r]=live.tos;
2850     live.onstack[live.tos]=r;
2851     return;
2852     }
2853     emit_byte(0xdd);
2854     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2855     and pop it*/
2856     }
2857    
2858    
2859     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2860     {
2861     make_tos(r);
2862     emit_byte(0xdd);
2863     emit_byte(0x15);
2864     emit_long(m);
2865     }
2866     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2867    
2868     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2869     {
2870     make_tos(r);
2871     emit_byte(0xdd);
2872     emit_byte(0x1d);
2873     emit_long(m);
2874     live.onstack[live.tos]=-1;
2875     live.tos--;
2876     live.spos[r]=-2;
2877     }
2878     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2879    
2880     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2881     {
2882     emit_byte(0xdd);
2883     emit_byte(0x05);
2884     emit_long(m);
2885     tos_make(r);
2886     }
2887     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2888    
2889     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2890     {
2891     emit_byte(0xdb);
2892     emit_byte(0x05);
2893     emit_long(m);
2894     tos_make(r);
2895     }
2896     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2897    
2898     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2899     {
2900     make_tos(r);
2901     emit_byte(0xdb);
2902     emit_byte(0x15);
2903     emit_long(m);
2904     }
2905     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2906    
2907     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2908     {
2909     emit_byte(0xd9);
2910     emit_byte(0x05);
2911     emit_long(m);
2912     tos_make(r);
2913     }
2914     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2915    
2916     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2917     {
2918     make_tos(r);
2919     emit_byte(0xd9);
2920     emit_byte(0x15);
2921     emit_long(m);
2922     }
2923     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2924    
2925     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2926     {
2927     int rs;
2928    
2929     /* Stupid x87 can't write a long double to mem without popping the
2930     stack! */
2931     usereg(r);
2932     rs=stackpos(r);
2933     emit_byte(0xd9); /* Get a copy to the top of stack */
2934     emit_byte(0xc0+rs);
2935    
2936     emit_byte(0xdb); /* store and pop it */
2937     emit_byte(0x3d);
2938     emit_long(m);
2939     }
2940     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2941    
2942     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2943     {
2944     int rs;
2945    
2946     make_tos(r);
2947     emit_byte(0xdb); /* store and pop it */
2948     emit_byte(0x3d);
2949     emit_long(m);
2950     live.onstack[live.tos]=-1;
2951     live.tos--;
2952     live.spos[r]=-2;
2953     }
2954     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2955    
2956     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2957     {
2958     emit_byte(0xdb);
2959     emit_byte(0x2d);
2960     emit_long(m);
2961     tos_make(r);
2962     }
2963     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2964    
2965     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2966     {
2967     emit_byte(0xd9);
2968     emit_byte(0xeb);
2969     tos_make(r);
2970     }
2971     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2972    
2973     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2974     {
2975     emit_byte(0xd9);
2976     emit_byte(0xec);
2977     tos_make(r);
2978     }
2979     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2980    
2981     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2982     {
2983     emit_byte(0xd9);
2984     emit_byte(0xea);
2985     tos_make(r);
2986     }
2987     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2988    
2989     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2990     {
2991     emit_byte(0xd9);
2992     emit_byte(0xed);
2993     tos_make(r);
2994     }
2995     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2996    
2997     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2998     {
2999     emit_byte(0xd9);
3000     emit_byte(0xe8);
3001     tos_make(r);
3002     }
3003     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
3004    
3005     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
3006     {
3007     emit_byte(0xd9);
3008     emit_byte(0xee);
3009     tos_make(r);
3010     }
3011     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
3012    
3013     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
3014     {
3015     int ds;
3016    
3017     usereg(s);
3018     ds=stackpos(s);
3019     if (ds==0 && live.spos[d]>=0) {
3020     /* source is on top of stack, and we already have the dest */
3021     int dd=stackpos(d);
3022     emit_byte(0xdd);
3023     emit_byte(0xd0+dd);
3024     }
3025     else {
3026     emit_byte(0xd9);
3027     emit_byte(0xc0+ds); /* duplicate source on tos */
3028     tos_make(d); /* store to destination, pop if necessary */
3029     }
3030     }
3031     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
3032    
3033     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
3034     {
3035     emit_byte(0xd9);
3036     emit_byte(0xa8+index);
3037     emit_long(base);
3038     }
3039     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
3040    
3041    
3042     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
3043     {
3044     int ds;
3045    
3046     if (d!=s) {
3047     usereg(s);
3048     ds=stackpos(s);
3049     emit_byte(0xd9);
3050     emit_byte(0xc0+ds); /* duplicate source */
3051     emit_byte(0xd9);
3052     emit_byte(0xfa); /* take square root */
3053     tos_make(d); /* store to destination */
3054     }
3055     else {
3056     make_tos(d);
3057     emit_byte(0xd9);
3058     emit_byte(0xfa); /* take square root */
3059     }
3060     }
3061     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
3062    
3063     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
3064     {
3065     int ds;
3066    
3067     if (d!=s) {
3068     usereg(s);
3069     ds=stackpos(s);
3070     emit_byte(0xd9);
3071     emit_byte(0xc0+ds); /* duplicate source */
3072     emit_byte(0xd9);
3073     emit_byte(0xe1); /* take fabs */
3074     tos_make(d); /* store to destination */
3075     }
3076     else {
3077     make_tos(d);
3078     emit_byte(0xd9);
3079     emit_byte(0xe1); /* take fabs */
3080     }
3081     }
3082     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
3083    
3084     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
3085     {
3086     int ds;
3087    
3088     if (d!=s) {
3089     usereg(s);
3090     ds=stackpos(s);
3091     emit_byte(0xd9);
3092     emit_byte(0xc0+ds); /* duplicate source */
3093     emit_byte(0xd9);
3094     emit_byte(0xfc); /* take frndint */
3095     tos_make(d); /* store to destination */
3096     }
3097     else {
3098     make_tos(d);
3099     emit_byte(0xd9);
3100     emit_byte(0xfc); /* take frndint */
3101     }
3102     }
3103     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
3104    
3105     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
3106     {
3107     int ds;
3108    
3109     if (d!=s) {
3110     usereg(s);
3111     ds=stackpos(s);
3112     emit_byte(0xd9);
3113     emit_byte(0xc0+ds); /* duplicate source */
3114     emit_byte(0xd9);
3115     emit_byte(0xff); /* take cos */
3116     tos_make(d); /* store to destination */
3117     }
3118     else {
3119     make_tos(d);
3120     emit_byte(0xd9);
3121     emit_byte(0xff); /* take cos */
3122     }
3123     }
3124     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
3125    
3126     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3127     {
3128     int ds;
3129    
3130     if (d!=s) {
3131     usereg(s);
3132     ds=stackpos(s);
3133     emit_byte(0xd9);
3134     emit_byte(0xc0+ds); /* duplicate source */
3135     emit_byte(0xd9);
3136     emit_byte(0xfe); /* take sin */
3137     tos_make(d); /* store to destination */
3138     }
3139     else {
3140     make_tos(d);
3141     emit_byte(0xd9);
3142     emit_byte(0xfe); /* take sin */
3143     }
3144     }
3145     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3146    
3147     double one=1;
3148     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3149     {
3150     int ds;
3151    
3152     usereg(s);
3153     ds=stackpos(s);
3154     emit_byte(0xd9);
3155     emit_byte(0xc0+ds); /* duplicate source */
3156    
3157     emit_byte(0xd9);
3158     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3159     emit_byte(0xd9);
3160     emit_byte(0xfc); /* rndint */
3161     emit_byte(0xd9);
3162     emit_byte(0xc9); /* swap top two elements */
3163     emit_byte(0xd8);
3164     emit_byte(0xe1); /* subtract rounded from original */
3165     emit_byte(0xd9);
3166     emit_byte(0xf0); /* f2xm1 */
3167     emit_byte(0xdc);
3168     emit_byte(0x05);
3169     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3170     emit_byte(0xd9);
3171     emit_byte(0xfd); /* and scale it */
3172     emit_byte(0xdd);
3173     emit_byte(0xd9); /* take he rounded value off */
3174     tos_make(d); /* store to destination */
3175     }
3176     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3177    
3178     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3179     {
3180     int ds;
3181    
3182     usereg(s);
3183     ds=stackpos(s);
3184     emit_byte(0xd9);
3185     emit_byte(0xc0+ds); /* duplicate source */
3186     emit_byte(0xd9);
3187     emit_byte(0xea); /* fldl2e */
3188     emit_byte(0xde);
3189     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
3190    
3191     emit_byte(0xd9);
3192     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3193     emit_byte(0xd9);
3194     emit_byte(0xfc); /* rndint */
3195     emit_byte(0xd9);
3196     emit_byte(0xc9); /* swap top two elements */
3197     emit_byte(0xd8);
3198     emit_byte(0xe1); /* subtract rounded from original */
3199     emit_byte(0xd9);
3200     emit_byte(0xf0); /* f2xm1 */
3201     emit_byte(0xdc);
3202     emit_byte(0x05);
3203     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3204     emit_byte(0xd9);
3205     emit_byte(0xfd); /* and scale it */
3206     emit_byte(0xdd);
3207     emit_byte(0xd9); /* take he rounded value off */
3208     tos_make(d); /* store to destination */
3209     }
3210     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3211    
3212     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3213     {
3214     int ds;
3215    
3216     usereg(s);
3217     ds=stackpos(s);
3218     emit_byte(0xd9);
3219     emit_byte(0xc0+ds); /* duplicate source */
3220     emit_byte(0xd9);
3221     emit_byte(0xe8); /* push '1' */
3222     emit_byte(0xd9);
3223     emit_byte(0xc9); /* swap top two */
3224     emit_byte(0xd9);
3225     emit_byte(0xf1); /* take 1*log2(x) */
3226     tos_make(d); /* store to destination */
3227     }
3228     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3229    
3230    
3231     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3232     {
3233     int ds;
3234    
3235     if (d!=s) {
3236     usereg(s);
3237     ds=stackpos(s);
3238     emit_byte(0xd9);
3239     emit_byte(0xc0+ds); /* duplicate source */
3240     emit_byte(0xd9);
3241     emit_byte(0xe0); /* take fchs */
3242     tos_make(d); /* store to destination */
3243     }
3244     else {
3245     make_tos(d);
3246     emit_byte(0xd9);
3247     emit_byte(0xe0); /* take fchs */
3248     }
3249     }
3250     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3251    
3252     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3253     {
3254     int ds;
3255    
3256     usereg(s);
3257     usereg(d);
3258    
3259     if (live.spos[s]==live.tos) {
3260     /* Source is on top of stack */
3261     ds=stackpos(d);
3262     emit_byte(0xdc);
3263     emit_byte(0xc0+ds); /* add source to dest*/
3264     }
3265     else {
3266     make_tos(d);
3267     ds=stackpos(s);
3268    
3269     emit_byte(0xd8);
3270     emit_byte(0xc0+ds); /* add source to dest*/
3271     }
3272     }
3273     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3274    
3275     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3276     {
3277     int ds;
3278    
3279     usereg(s);
3280     usereg(d);
3281    
3282     if (live.spos[s]==live.tos) {
3283     /* Source is on top of stack */
3284     ds=stackpos(d);
3285     emit_byte(0xdc);
3286     emit_byte(0xe8+ds); /* sub source from dest*/
3287     }
3288     else {
3289     make_tos(d);
3290     ds=stackpos(s);
3291    
3292     emit_byte(0xd8);
3293     emit_byte(0xe0+ds); /* sub src from dest */
3294     }
3295     }
3296     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3297    
3298     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3299     {
3300     int ds;
3301    
3302     usereg(s);
3303     usereg(d);
3304    
3305     make_tos(d);
3306     ds=stackpos(s);
3307    
3308     emit_byte(0xdd);
3309     emit_byte(0xe0+ds); /* cmp dest with source*/
3310     }
3311     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3312    
3313     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3314     {
3315     int ds;
3316    
3317     usereg(s);
3318     usereg(d);
3319    
3320     if (live.spos[s]==live.tos) {
3321     /* Source is on top of stack */
3322     ds=stackpos(d);
3323     emit_byte(0xdc);
3324     emit_byte(0xc8+ds); /* mul dest by source*/
3325     }
3326     else {
3327     make_tos(d);
3328     ds=stackpos(s);
3329    
3330     emit_byte(0xd8);
3331     emit_byte(0xc8+ds); /* mul dest by source*/
3332     }
3333     }
3334     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3335    
3336     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3337     {
3338     int ds;
3339    
3340     usereg(s);
3341     usereg(d);
3342    
3343     if (live.spos[s]==live.tos) {
3344     /* Source is on top of stack */
3345     ds=stackpos(d);
3346     emit_byte(0xdc);
3347     emit_byte(0xf8+ds); /* div dest by source */
3348     }
3349     else {
3350     make_tos(d);
3351     ds=stackpos(s);
3352    
3353     emit_byte(0xd8);
3354     emit_byte(0xf0+ds); /* div dest by source*/
3355     }
3356     }
3357     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3358    
3359     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3360     {
3361     int ds;
3362    
3363     usereg(s);
3364     usereg(d);
3365    
3366     make_tos2(d,s);
3367     ds=stackpos(s);
3368    
3369     if (ds!=1) {
3370     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3371     abort();
3372     }
3373     emit_byte(0xd9);
3374     emit_byte(0xf8); /* take rem from dest by source */
3375     }
3376     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3377    
3378     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3379     {
3380     int ds;
3381    
3382     usereg(s);
3383     usereg(d);
3384    
3385     make_tos2(d,s);
3386     ds=stackpos(s);
3387    
3388     if (ds!=1) {
3389     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3390     abort();
3391     }
3392     emit_byte(0xd9);
3393     emit_byte(0xf5); /* take rem1 from dest by source */
3394     }
3395     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3396    
3397    
3398     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3399     {
3400     make_tos(r);
3401     emit_byte(0xd9); /* ftst */
3402     emit_byte(0xe4);
3403     }
3404     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3405    
3406     /* %eax register is clobbered if target processor doesn't support fucomi */
3407     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3408     #define FFLAG_NREG EAX_INDEX
3409    
3410     static __inline__ void raw_fflags_into_flags(int r)
3411     {
3412     int p;
3413    
3414     usereg(r);
3415     p=stackpos(r);
3416    
3417     emit_byte(0xd9);
3418     emit_byte(0xee); /* Push 0 */
3419     emit_byte(0xd9);
3420     emit_byte(0xc9+p); /* swap top two around */
3421     if (have_cmov) {
3422     // gb-- fucomi is for P6 cores only, not K6-2 then...
3423     emit_byte(0xdb);
3424     emit_byte(0xe9+p); /* fucomi them */
3425     }
3426     else {
3427     emit_byte(0xdd);
3428     emit_byte(0xe1+p); /* fucom them */
3429     emit_byte(0x9b);
3430     emit_byte(0xdf);
3431     emit_byte(0xe0); /* fstsw ax */
3432     raw_sahf(0); /* sahf */
3433     }
3434     emit_byte(0xdd);
3435     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3436     }