ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.7
Committed: 2002-10-03T16:16:57Z (21 years, 11 months ago) by gbeauche
Branch: MAIN
Changes since 1.6: +2 -0 lines
Log Message:
Don't forget to note CPU detection code mostly comes from Linux kernel.

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6     * Adaptation for Basilisk II and improvements, copyright 2000-2002
7     * Gwenole Beauchesne
8     *
9     * Basilisk II (C) 1997-2002 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45    
46     /* The register in which subroutines return an integer return value */
47     #define REG_RESULT 0
48    
49     /* The registers subroutines take their first and second argument in */
50     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
51     /* Handle the _fastcall parameters of ECX and EDX */
52     #define REG_PAR1 1
53     #define REG_PAR2 2
54     #else
55     #define REG_PAR1 0
56     #define REG_PAR2 2
57     #endif
58    
59     /* Three registers that are not used for any of the above */
60     #define REG_NOPAR1 6
61     #define REG_NOPAR2 5
62     #define REG_NOPAR3 3
63    
64     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     #define REG_PC_TMP 0
67     #else
68     #define REG_PC_TMP 1 /* Another register that is not the above */
69     #endif
70    
71     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
72     -1 if any reg will do */
73     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
74     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
75    
76     uae_s8 always_used[]={4,-1};
77     uae_s8 can_byte[]={0,1,2,3,-1};
78     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
79    
80     /* cpuopti mutate instruction handlers to assume registers are saved
81     by the caller */
82     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
83    
84     /* This *should* be the same as call_saved. But:
85     - We might not really know which registers are saved, and which aren't,
86     so we need to preserve some, but don't want to rely on everyone else
87     also saving those registers
88     - Special registers (such like the stack pointer) should not be "preserved"
89     by pushing, even though they are "saved" across function calls
90     */
91     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
92    
93     /* Whether classes of instructions do or don't clobber the native flags */
94     #define CLOBBER_MOV
95     #define CLOBBER_LEA
96     #define CLOBBER_CMOV
97     #define CLOBBER_POP
98     #define CLOBBER_PUSH
99     #define CLOBBER_SUB clobber_flags()
100     #define CLOBBER_SBB clobber_flags()
101     #define CLOBBER_CMP clobber_flags()
102     #define CLOBBER_ADD clobber_flags()
103     #define CLOBBER_ADC clobber_flags()
104     #define CLOBBER_AND clobber_flags()
105     #define CLOBBER_OR clobber_flags()
106     #define CLOBBER_XOR clobber_flags()
107    
108     #define CLOBBER_ROL clobber_flags()
109     #define CLOBBER_ROR clobber_flags()
110     #define CLOBBER_SHLL clobber_flags()
111     #define CLOBBER_SHRL clobber_flags()
112     #define CLOBBER_SHRA clobber_flags()
113     #define CLOBBER_TEST clobber_flags()
114     #define CLOBBER_CL16
115     #define CLOBBER_CL8
116     #define CLOBBER_SE16
117     #define CLOBBER_SE8
118     #define CLOBBER_ZE16
119     #define CLOBBER_ZE8
120     #define CLOBBER_SW16 clobber_flags()
121     #define CLOBBER_SW32
122     #define CLOBBER_SETCC
123     #define CLOBBER_MUL clobber_flags()
124     #define CLOBBER_BT clobber_flags()
125     #define CLOBBER_BSF clobber_flags()
126    
127 gbeauche 1.2 const bool optimize_accum = true;
128 gbeauche 1.1 const bool optimize_imm8 = true;
129     const bool optimize_shift_once = true;
130    
131     /*************************************************************************
132     * Actual encoding of the instructions on the target CPU *
133     *************************************************************************/
134    
135 gbeauche 1.2 static __inline__ int isaccum(int r)
136     {
137     return (r == EAX_INDEX);
138     }
139    
140 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
141     {
142     return (x>=-128 && x<=127);
143     }
144    
145     static __inline__ int isword(uae_s32 x)
146     {
147     return (x>=-32768 && x<=32767);
148     }
149    
150     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
151     {
152     emit_byte(0x50+r);
153     }
154     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
155    
156     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
157     {
158     emit_byte(0x58+r);
159     }
160     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
161    
162     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
163     {
164     emit_byte(0x0f);
165     emit_byte(0xba);
166     emit_byte(0xe0+r);
167     emit_byte(i);
168     }
169     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
170    
171     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
172     {
173     emit_byte(0x0f);
174     emit_byte(0xa3);
175     emit_byte(0xc0+8*b+r);
176     }
177     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
178    
179     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
180     {
181     emit_byte(0x0f);
182     emit_byte(0xba);
183     emit_byte(0xf8+r);
184     emit_byte(i);
185     }
186     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
187    
188     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
189     {
190     emit_byte(0x0f);
191     emit_byte(0xbb);
192     emit_byte(0xc0+8*b+r);
193     }
194     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
195    
196    
197     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
198     {
199     emit_byte(0x0f);
200     emit_byte(0xba);
201     emit_byte(0xf0+r);
202     emit_byte(i);
203     }
204     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
205    
206     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
207     {
208     emit_byte(0x0f);
209     emit_byte(0xb3);
210     emit_byte(0xc0+8*b+r);
211     }
212     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
213    
214     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
215     {
216     emit_byte(0x0f);
217     emit_byte(0xba);
218     emit_byte(0xe8+r);
219     emit_byte(i);
220     }
221     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
222    
223     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
224     {
225     emit_byte(0x0f);
226     emit_byte(0xab);
227     emit_byte(0xc0+8*b+r);
228     }
229     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
230    
231     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
232     {
233     emit_byte(0x66);
234     if (isbyte(i)) {
235     emit_byte(0x83);
236     emit_byte(0xe8+d);
237     emit_byte(i);
238     }
239     else {
240 gbeauche 1.2 if (optimize_accum && isaccum(d))
241     emit_byte(0x2d);
242     else {
243 gbeauche 1.1 emit_byte(0x81);
244     emit_byte(0xe8+d);
245 gbeauche 1.2 }
246 gbeauche 1.1 emit_word(i);
247     }
248     }
249     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
250    
251    
252     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
253     {
254     emit_byte(0x8b);
255     emit_byte(0x05+8*d);
256     emit_long(s);
257     }
258     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
259    
260     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
261     {
262     emit_byte(0xc7);
263     emit_byte(0x05);
264     emit_long(d);
265     emit_long(s);
266     }
267     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
268    
269     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
270     {
271     emit_byte(0x66);
272     emit_byte(0xc7);
273     emit_byte(0x05);
274     emit_long(d);
275     emit_word(s);
276     }
277     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
278    
279     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
280     {
281     emit_byte(0xc6);
282     emit_byte(0x05);
283     emit_long(d);
284     emit_byte(s);
285     }
286     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
287    
288     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
289     {
290     if (optimize_shift_once && (i == 1)) {
291     emit_byte(0xd0);
292     emit_byte(0x05);
293     emit_long(d);
294     }
295     else {
296     emit_byte(0xc0);
297     emit_byte(0x05);
298     emit_long(d);
299     emit_byte(i);
300     }
301     }
302     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
303    
304     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
305     {
306     if (optimize_shift_once && (i == 1)) {
307     emit_byte(0xd0);
308     emit_byte(0xc0+r);
309     }
310     else {
311     emit_byte(0xc0);
312     emit_byte(0xc0+r);
313     emit_byte(i);
314     }
315     }
316     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
317    
318     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
319     {
320     emit_byte(0x66);
321     emit_byte(0xc1);
322     emit_byte(0xc0+r);
323     emit_byte(i);
324     }
325     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
326    
327     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
328     {
329     if (optimize_shift_once && (i == 1)) {
330     emit_byte(0xd1);
331     emit_byte(0xc0+r);
332     }
333     else {
334     emit_byte(0xc1);
335     emit_byte(0xc0+r);
336     emit_byte(i);
337     }
338     }
339     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
340    
341     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
342     {
343     emit_byte(0xd3);
344     emit_byte(0xc0+d);
345     }
346     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
347    
348     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
349     {
350     emit_byte(0x66);
351     emit_byte(0xd3);
352     emit_byte(0xc0+d);
353     }
354     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
355    
356     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
357     {
358     emit_byte(0xd2);
359     emit_byte(0xc0+d);
360     }
361     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
362    
363     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
364     {
365     emit_byte(0xd3);
366     emit_byte(0xe0+d);
367     }
368     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
369    
370     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
371     {
372     emit_byte(0x66);
373     emit_byte(0xd3);
374     emit_byte(0xe0+d);
375     }
376     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
377    
378     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
379     {
380     emit_byte(0xd2);
381     emit_byte(0xe0+d);
382     }
383     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
384    
385     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
386     {
387     if (optimize_shift_once && (i == 1)) {
388     emit_byte(0xd0);
389     emit_byte(0xc8+r);
390     }
391     else {
392     emit_byte(0xc0);
393     emit_byte(0xc8+r);
394     emit_byte(i);
395     }
396     }
397     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
398    
399     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
400     {
401     emit_byte(0x66);
402     emit_byte(0xc1);
403     emit_byte(0xc8+r);
404     emit_byte(i);
405     }
406     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
407    
408     // gb-- used for making an fpcr value in compemu_fpp.cpp
409     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
410     {
411     emit_byte(0x0b);
412     emit_byte(0x05+8*d);
413     emit_long(s);
414     }
415     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
416    
417     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
418     {
419     if (optimize_shift_once && (i == 1)) {
420     emit_byte(0xd1);
421     emit_byte(0xc8+r);
422     }
423     else {
424     emit_byte(0xc1);
425     emit_byte(0xc8+r);
426     emit_byte(i);
427     }
428     }
429     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
430    
431     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
432     {
433     emit_byte(0xd3);
434     emit_byte(0xc8+d);
435     }
436     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
437    
438     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
439     {
440     emit_byte(0x66);
441     emit_byte(0xd3);
442     emit_byte(0xc8+d);
443     }
444     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
445    
446     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
447     {
448     emit_byte(0xd2);
449     emit_byte(0xc8+d);
450     }
451     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
452    
453     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
454     {
455     emit_byte(0xd3);
456     emit_byte(0xe8+d);
457     }
458     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
459    
460     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
461     {
462     emit_byte(0x66);
463     emit_byte(0xd3);
464     emit_byte(0xe8+d);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
469     {
470     emit_byte(0xd2);
471     emit_byte(0xe8+d);
472     }
473     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
474    
475     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
476     {
477     emit_byte(0xd3);
478     emit_byte(0xf8+d);
479     }
480     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
481    
482     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
483     {
484     emit_byte(0x66);
485     emit_byte(0xd3);
486     emit_byte(0xf8+d);
487     }
488     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
489    
490     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
491     {
492     emit_byte(0xd2);
493     emit_byte(0xf8+d);
494     }
495     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
496    
497     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
498     {
499     if (optimize_shift_once && (i == 1)) {
500     emit_byte(0xd1);
501     emit_byte(0xe0+r);
502     }
503     else {
504     emit_byte(0xc1);
505     emit_byte(0xe0+r);
506     emit_byte(i);
507     }
508     }
509     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
510    
511     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
512     {
513     emit_byte(0x66);
514     emit_byte(0xc1);
515     emit_byte(0xe0+r);
516     emit_byte(i);
517     }
518     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
519    
520     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
521     {
522     if (optimize_shift_once && (i == 1)) {
523     emit_byte(0xd0);
524     emit_byte(0xe0+r);
525     }
526     else {
527     emit_byte(0xc0);
528     emit_byte(0xe0+r);
529     emit_byte(i);
530     }
531     }
532     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
533    
534     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
535     {
536     if (optimize_shift_once && (i == 1)) {
537     emit_byte(0xd1);
538     emit_byte(0xe8+r);
539     }
540     else {
541     emit_byte(0xc1);
542     emit_byte(0xe8+r);
543     emit_byte(i);
544     }
545     }
546     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
547    
548     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
549     {
550     emit_byte(0x66);
551     emit_byte(0xc1);
552     emit_byte(0xe8+r);
553     emit_byte(i);
554     }
555     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
556    
557     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
558     {
559     if (optimize_shift_once && (i == 1)) {
560     emit_byte(0xd0);
561     emit_byte(0xe8+r);
562     }
563     else {
564     emit_byte(0xc0);
565     emit_byte(0xe8+r);
566     emit_byte(i);
567     }
568     }
569     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
570    
571     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
572     {
573     if (optimize_shift_once && (i == 1)) {
574     emit_byte(0xd1);
575     emit_byte(0xf8+r);
576     }
577     else {
578     emit_byte(0xc1);
579     emit_byte(0xf8+r);
580     emit_byte(i);
581     }
582     }
583     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
584    
585     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
586     {
587     emit_byte(0x66);
588     emit_byte(0xc1);
589     emit_byte(0xf8+r);
590     emit_byte(i);
591     }
592     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
593    
594     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
595     {
596     if (optimize_shift_once && (i == 1)) {
597     emit_byte(0xd0);
598     emit_byte(0xf8+r);
599     }
600     else {
601     emit_byte(0xc0);
602     emit_byte(0xf8+r);
603     emit_byte(i);
604     }
605     }
606     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
607    
608     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
609     {
610     emit_byte(0x9e);
611     }
612     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
613    
614     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
615     {
616     emit_byte(0x0f);
617     emit_byte(0xa2);
618     }
619     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
620    
621     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
622     {
623     emit_byte(0x9f);
624     }
625     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
626    
627     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
628     {
629     emit_byte(0x0f);
630     emit_byte(0x90+cc);
631     emit_byte(0xc0+d);
632     }
633     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
634    
635     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
636     {
637     emit_byte(0x0f);
638     emit_byte(0x90+cc);
639     emit_byte(0x05);
640     emit_long(d);
641     }
642     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
643    
644     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
645     {
646     if (have_cmov) {
647     emit_byte(0x0f);
648     emit_byte(0x40+cc);
649     emit_byte(0xc0+8*d+s);
650     }
651     else { /* replacement using branch and mov */
652     int uncc=(cc^1);
653     emit_byte(0x70+uncc);
654     emit_byte(2); /* skip next 2 bytes if not cc=true */
655     emit_byte(0x89);
656     emit_byte(0xc0+8*s+d);
657     }
658     }
659     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
660    
661     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
662     {
663     emit_byte(0x0f);
664     emit_byte(0xbc);
665     emit_byte(0xc0+8*d+s);
666     }
667     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
668    
669     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
670     {
671     emit_byte(0x0f);
672     emit_byte(0xbf);
673     emit_byte(0xc0+8*d+s);
674     }
675     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
676    
677     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
678     {
679     emit_byte(0x0f);
680     emit_byte(0xbe);
681     emit_byte(0xc0+8*d+s);
682     }
683     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
684    
685     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
686     {
687     emit_byte(0x0f);
688     emit_byte(0xb7);
689     emit_byte(0xc0+8*d+s);
690     }
691     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
692    
693     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
694     {
695     emit_byte(0x0f);
696     emit_byte(0xb6);
697     emit_byte(0xc0+8*d+s);
698     }
699     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
700    
701     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
702     {
703     emit_byte(0x0f);
704     emit_byte(0xaf);
705     emit_byte(0xc0+8*d+s);
706     }
707     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
708    
709     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
710     {
711     if (d!=MUL_NREG1 || s!=MUL_NREG2)
712     abort();
713     emit_byte(0xf7);
714     emit_byte(0xea);
715     }
716     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
717    
718     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
719     {
720     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
721     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
722     abort();
723     }
724     emit_byte(0xf7);
725     emit_byte(0xe2);
726     }
727     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
728    
729     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
730     {
731     abort(); /* %^$&%^$%#^ x86! */
732     emit_byte(0x0f);
733     emit_byte(0xaf);
734     emit_byte(0xc0+8*d+s);
735     }
736     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
737    
738     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
739     {
740     emit_byte(0x88);
741     emit_byte(0xc0+8*s+d);
742     }
743     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
744    
745     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
746     {
747     emit_byte(0x66);
748     emit_byte(0x89);
749     emit_byte(0xc0+8*s+d);
750     }
751     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
752    
753     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
754     {
755     int isebp=(baser==5)?0x40:0;
756     int fi;
757    
758     switch(factor) {
759     case 1: fi=0; break;
760     case 2: fi=1; break;
761     case 4: fi=2; break;
762     case 8: fi=3; break;
763     default: abort();
764     }
765    
766    
767     emit_byte(0x8b);
768     emit_byte(0x04+8*d+isebp);
769     emit_byte(baser+8*index+0x40*fi);
770     if (isebp)
771     emit_byte(0x00);
772     }
773     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
774    
775     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
776     {
777     int fi;
778     int isebp;
779    
780     switch(factor) {
781     case 1: fi=0; break;
782     case 2: fi=1; break;
783     case 4: fi=2; break;
784     case 8: fi=3; break;
785     default: abort();
786     }
787     isebp=(baser==5)?0x40:0;
788    
789     emit_byte(0x66);
790     emit_byte(0x8b);
791     emit_byte(0x04+8*d+isebp);
792     emit_byte(baser+8*index+0x40*fi);
793     if (isebp)
794     emit_byte(0x00);
795     }
796     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
797    
798     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
799     {
800     int fi;
801     int isebp;
802    
803     switch(factor) {
804     case 1: fi=0; break;
805     case 2: fi=1; break;
806     case 4: fi=2; break;
807     case 8: fi=3; break;
808     default: abort();
809     }
810     isebp=(baser==5)?0x40:0;
811    
812     emit_byte(0x8a);
813     emit_byte(0x04+8*d+isebp);
814     emit_byte(baser+8*index+0x40*fi);
815     if (isebp)
816     emit_byte(0x00);
817     }
818     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
819    
820     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
821     {
822     int fi;
823     int isebp;
824    
825     switch(factor) {
826     case 1: fi=0; break;
827     case 2: fi=1; break;
828     case 4: fi=2; break;
829     case 8: fi=3; break;
830     default: abort();
831     }
832    
833    
834     isebp=(baser==5)?0x40:0;
835    
836     emit_byte(0x89);
837     emit_byte(0x04+8*s+isebp);
838     emit_byte(baser+8*index+0x40*fi);
839     if (isebp)
840     emit_byte(0x00);
841     }
842     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
843    
844     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
845     {
846     int fi;
847     int isebp;
848    
849     switch(factor) {
850     case 1: fi=0; break;
851     case 2: fi=1; break;
852     case 4: fi=2; break;
853     case 8: fi=3; break;
854     default: abort();
855     }
856     isebp=(baser==5)?0x40:0;
857    
858     emit_byte(0x66);
859     emit_byte(0x89);
860     emit_byte(0x04+8*s+isebp);
861     emit_byte(baser+8*index+0x40*fi);
862     if (isebp)
863     emit_byte(0x00);
864     }
865     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
866    
867     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
868     {
869     int fi;
870     int isebp;
871    
872     switch(factor) {
873     case 1: fi=0; break;
874     case 2: fi=1; break;
875     case 4: fi=2; break;
876     case 8: fi=3; break;
877     default: abort();
878     }
879     isebp=(baser==5)?0x40:0;
880    
881     emit_byte(0x88);
882     emit_byte(0x04+8*s+isebp);
883     emit_byte(baser+8*index+0x40*fi);
884     if (isebp)
885     emit_byte(0x00);
886     }
887     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
888    
889     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
890     {
891     int fi;
892    
893     switch(factor) {
894     case 1: fi=0; break;
895     case 2: fi=1; break;
896     case 4: fi=2; break;
897     case 8: fi=3; break;
898     default: abort();
899     }
900    
901     emit_byte(0x89);
902     emit_byte(0x84+8*s);
903     emit_byte(baser+8*index+0x40*fi);
904     emit_long(base);
905     }
906     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
907    
908     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
909     {
910     int fi;
911    
912     switch(factor) {
913     case 1: fi=0; break;
914     case 2: fi=1; break;
915     case 4: fi=2; break;
916     case 8: fi=3; break;
917     default: abort();
918     }
919    
920     emit_byte(0x66);
921     emit_byte(0x89);
922     emit_byte(0x84+8*s);
923     emit_byte(baser+8*index+0x40*fi);
924     emit_long(base);
925     }
926     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
927    
928     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
929     {
930     int fi;
931    
932     switch(factor) {
933     case 1: fi=0; break;
934     case 2: fi=1; break;
935     case 4: fi=2; break;
936     case 8: fi=3; break;
937     default: abort();
938     }
939    
940     emit_byte(0x88);
941     emit_byte(0x84+8*s);
942     emit_byte(baser+8*index+0x40*fi);
943     emit_long(base);
944     }
945     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
946    
947     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
948     {
949     int fi;
950    
951     switch(factor) {
952     case 1: fi=0; break;
953     case 2: fi=1; break;
954     case 4: fi=2; break;
955     case 8: fi=3; break;
956     default: abort();
957     }
958    
959     emit_byte(0x8b);
960     emit_byte(0x84+8*d);
961     emit_byte(baser+8*index+0x40*fi);
962     emit_long(base);
963     }
964     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
965    
966     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
967     {
968     int fi;
969    
970     switch(factor) {
971     case 1: fi=0; break;
972     case 2: fi=1; break;
973     case 4: fi=2; break;
974     case 8: fi=3; break;
975     default: abort();
976     }
977    
978     emit_byte(0x66);
979     emit_byte(0x8b);
980     emit_byte(0x84+8*d);
981     emit_byte(baser+8*index+0x40*fi);
982     emit_long(base);
983     }
984     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
985    
986     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
987     {
988     int fi;
989    
990     switch(factor) {
991     case 1: fi=0; break;
992     case 2: fi=1; break;
993     case 4: fi=2; break;
994     case 8: fi=3; break;
995     default: abort();
996     }
997    
998     emit_byte(0x8a);
999     emit_byte(0x84+8*d);
1000     emit_byte(baser+8*index+0x40*fi);
1001     emit_long(base);
1002     }
1003     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
1004    
1005     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1006     {
1007     int fi;
1008     switch(factor) {
1009     case 1: fi=0; break;
1010     case 2: fi=1; break;
1011     case 4: fi=2; break;
1012     case 8: fi=3; break;
1013     default:
1014     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1015     abort();
1016     }
1017     emit_byte(0x8b);
1018     emit_byte(0x04+8*d);
1019     emit_byte(0x05+8*index+64*fi);
1020     emit_long(base);
1021     }
1022     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1023    
1024     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1025     {
1026     int fi;
1027     switch(factor) {
1028     case 1: fi=0; break;
1029     case 2: fi=1; break;
1030     case 4: fi=2; break;
1031     case 8: fi=3; break;
1032     default:
1033     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1034     abort();
1035     }
1036     if (have_cmov) {
1037     emit_byte(0x0f);
1038     emit_byte(0x40+cond);
1039     emit_byte(0x04+8*d);
1040     emit_byte(0x05+8*index+64*fi);
1041     emit_long(base);
1042     }
1043     else { /* replacement using branch and mov */
1044     int uncc=(cond^1);
1045     emit_byte(0x70+uncc);
1046     emit_byte(7); /* skip next 7 bytes if not cc=true */
1047     emit_byte(0x8b);
1048     emit_byte(0x04+8*d);
1049     emit_byte(0x05+8*index+64*fi);
1050     emit_long(base);
1051     }
1052     }
1053     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1054    
1055     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1056     {
1057     if (have_cmov) {
1058     emit_byte(0x0f);
1059     emit_byte(0x40+cond);
1060     emit_byte(0x05+8*d);
1061     emit_long(mem);
1062     }
1063     else { /* replacement using branch and mov */
1064     int uncc=(cond^1);
1065     emit_byte(0x70+uncc);
1066     emit_byte(6); /* skip next 6 bytes if not cc=true */
1067     emit_byte(0x8b);
1068     emit_byte(0x05+8*d);
1069     emit_long(mem);
1070     }
1071     }
1072     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1073    
1074     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1075     {
1076     emit_byte(0x8b);
1077     emit_byte(0x40+8*d+s);
1078     emit_byte(offset);
1079     }
1080     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1081    
1082     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1083     {
1084     emit_byte(0x66);
1085     emit_byte(0x8b);
1086     emit_byte(0x40+8*d+s);
1087     emit_byte(offset);
1088     }
1089     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1090    
1091     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1092     {
1093     emit_byte(0x8a);
1094     emit_byte(0x40+8*d+s);
1095     emit_byte(offset);
1096     }
1097     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1098    
1099     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1100     {
1101     emit_byte(0x8b);
1102     emit_byte(0x80+8*d+s);
1103     emit_long(offset);
1104     }
1105     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1106    
1107     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1108     {
1109     emit_byte(0x66);
1110     emit_byte(0x8b);
1111     emit_byte(0x80+8*d+s);
1112     emit_long(offset);
1113     }
1114     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1115    
1116     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1117     {
1118     emit_byte(0x8a);
1119     emit_byte(0x80+8*d+s);
1120     emit_long(offset);
1121     }
1122     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1123    
1124     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1125     {
1126     emit_byte(0xc7);
1127     emit_byte(0x40+d);
1128     emit_byte(offset);
1129     emit_long(i);
1130     }
1131     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1132    
1133     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1134     {
1135     emit_byte(0x66);
1136     emit_byte(0xc7);
1137     emit_byte(0x40+d);
1138     emit_byte(offset);
1139     emit_word(i);
1140     }
1141     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1142    
1143     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1144     {
1145     emit_byte(0xc6);
1146     emit_byte(0x40+d);
1147     emit_byte(offset);
1148     emit_byte(i);
1149     }
1150     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1151    
1152     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1153     {
1154     emit_byte(0x89);
1155     emit_byte(0x40+8*s+d);
1156     emit_byte(offset);
1157     }
1158     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1159    
1160     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1161     {
1162     emit_byte(0x66);
1163     emit_byte(0x89);
1164     emit_byte(0x40+8*s+d);
1165     emit_byte(offset);
1166     }
1167     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1168    
1169     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1170     {
1171     emit_byte(0x88);
1172     emit_byte(0x40+8*s+d);
1173     emit_byte(offset);
1174     }
1175     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1176    
1177     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1178     {
1179     if (optimize_imm8 && isbyte(offset)) {
1180     emit_byte(0x8d);
1181     emit_byte(0x40+8*d+s);
1182     emit_byte(offset);
1183     }
1184     else {
1185     emit_byte(0x8d);
1186     emit_byte(0x80+8*d+s);
1187     emit_long(offset);
1188     }
1189     }
1190     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1191    
1192     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1193     {
1194     int fi;
1195    
1196     switch(factor) {
1197     case 1: fi=0; break;
1198     case 2: fi=1; break;
1199     case 4: fi=2; break;
1200     case 8: fi=3; break;
1201     default: abort();
1202     }
1203    
1204     if (optimize_imm8 && isbyte(offset)) {
1205     emit_byte(0x8d);
1206     emit_byte(0x44+8*d);
1207     emit_byte(0x40*fi+8*index+s);
1208     emit_byte(offset);
1209     }
1210     else {
1211     emit_byte(0x8d);
1212     emit_byte(0x84+8*d);
1213     emit_byte(0x40*fi+8*index+s);
1214     emit_long(offset);
1215     }
1216     }
1217     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1218    
1219     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1220     {
1221     int isebp=(s==5)?0x40:0;
1222     int fi;
1223    
1224     switch(factor) {
1225     case 1: fi=0; break;
1226     case 2: fi=1; break;
1227     case 4: fi=2; break;
1228     case 8: fi=3; break;
1229     default: abort();
1230     }
1231    
1232     emit_byte(0x8d);
1233     emit_byte(0x04+8*d+isebp);
1234     emit_byte(0x40*fi+8*index+s);
1235     if (isebp)
1236     emit_byte(0);
1237     }
1238     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1239    
1240     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1241     {
1242     if (optimize_imm8 && isbyte(offset)) {
1243     emit_byte(0x89);
1244     emit_byte(0x40+8*s+d);
1245     emit_byte(offset);
1246     }
1247     else {
1248     emit_byte(0x89);
1249     emit_byte(0x80+8*s+d);
1250     emit_long(offset);
1251     }
1252     }
1253     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1254    
1255     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1256     {
1257     emit_byte(0x66);
1258     emit_byte(0x89);
1259     emit_byte(0x80+8*s+d);
1260     emit_long(offset);
1261     }
1262     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1263    
1264     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1265     {
1266     if (optimize_imm8 && isbyte(offset)) {
1267     emit_byte(0x88);
1268     emit_byte(0x40+8*s+d);
1269     emit_byte(offset);
1270     }
1271     else {
1272     emit_byte(0x88);
1273     emit_byte(0x80+8*s+d);
1274     emit_long(offset);
1275     }
1276     }
1277     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1278    
1279     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1280     {
1281     emit_byte(0x0f);
1282     emit_byte(0xc8+r);
1283     }
1284     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1285    
1286     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1287     {
1288     emit_byte(0x66);
1289     emit_byte(0xc1);
1290     emit_byte(0xc0+r);
1291     emit_byte(0x08);
1292     }
1293     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1294    
1295     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1296     {
1297     emit_byte(0x89);
1298     emit_byte(0xc0+8*s+d);
1299     }
1300     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1301    
1302     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1303     {
1304     emit_byte(0x89);
1305     emit_byte(0x05+8*s);
1306     emit_long(d);
1307     }
1308     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1309    
1310     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1311     {
1312     emit_byte(0x66);
1313     emit_byte(0x89);
1314     emit_byte(0x05+8*s);
1315     emit_long(d);
1316     }
1317     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1318    
1319     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1320     {
1321     emit_byte(0x66);
1322     emit_byte(0x8b);
1323     emit_byte(0x05+8*d);
1324     emit_long(s);
1325     }
1326     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1327    
1328     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1329     {
1330     emit_byte(0x88);
1331     emit_byte(0x05+8*s);
1332     emit_long(d);
1333     }
1334     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1335    
1336     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1337     {
1338     emit_byte(0x8a);
1339     emit_byte(0x05+8*d);
1340     emit_long(s);
1341     }
1342     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1343    
1344     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1345     {
1346     emit_byte(0xb8+d);
1347     emit_long(s);
1348     }
1349     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1350    
1351     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1352     {
1353     emit_byte(0x66);
1354     emit_byte(0xb8+d);
1355     emit_word(s);
1356     }
1357     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1358    
1359     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1360     {
1361     emit_byte(0xb0+d);
1362     emit_byte(s);
1363     }
1364     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1365    
1366     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1367     {
1368     emit_byte(0x81);
1369     emit_byte(0x15);
1370     emit_long(d);
1371     emit_long(s);
1372     }
1373     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1374    
1375     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1376     {
1377     if (optimize_imm8 && isbyte(s)) {
1378     emit_byte(0x83);
1379     emit_byte(0x05);
1380     emit_long(d);
1381     emit_byte(s);
1382     }
1383     else {
1384     emit_byte(0x81);
1385     emit_byte(0x05);
1386     emit_long(d);
1387     emit_long(s);
1388     }
1389     }
1390     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1391    
1392     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1393     {
1394     emit_byte(0x66);
1395     emit_byte(0x81);
1396     emit_byte(0x05);
1397     emit_long(d);
1398     emit_word(s);
1399     }
1400     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1401    
1402     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1403     {
1404     emit_byte(0x80);
1405     emit_byte(0x05);
1406     emit_long(d);
1407     emit_byte(s);
1408     }
1409     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1410    
1411     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1412     {
1413 gbeauche 1.2 if (optimize_accum && isaccum(d))
1414     emit_byte(0xa9);
1415     else {
1416 gbeauche 1.1 emit_byte(0xf7);
1417     emit_byte(0xc0+d);
1418 gbeauche 1.2 }
1419 gbeauche 1.1 emit_long(i);
1420     }
1421     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1422    
1423     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1424     {
1425     emit_byte(0x85);
1426     emit_byte(0xc0+8*s+d);
1427     }
1428     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1429    
1430     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1431     {
1432     emit_byte(0x66);
1433     emit_byte(0x85);
1434     emit_byte(0xc0+8*s+d);
1435     }
1436     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1437    
1438     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1439     {
1440     emit_byte(0x84);
1441     emit_byte(0xc0+8*s+d);
1442     }
1443     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1444    
1445     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1446     {
1447     if (optimize_imm8 && isbyte(i)) {
1448 gbeauche 1.2 emit_byte(0x83);
1449     emit_byte(0xe0+d);
1450     emit_byte(i);
1451 gbeauche 1.1 }
1452     else {
1453 gbeauche 1.2 if (optimize_accum && isaccum(d))
1454     emit_byte(0x25);
1455     else {
1456     emit_byte(0x81);
1457     emit_byte(0xe0+d);
1458     }
1459     emit_long(i);
1460 gbeauche 1.1 }
1461     }
1462     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1463    
1464     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1465     {
1466 gbeauche 1.2 emit_byte(0x66);
1467     if (optimize_imm8 && isbyte(i)) {
1468     emit_byte(0x83);
1469     emit_byte(0xe0+d);
1470     emit_byte(i);
1471     }
1472     else {
1473     if (optimize_accum && isaccum(d))
1474     emit_byte(0x25);
1475     else {
1476     emit_byte(0x81);
1477     emit_byte(0xe0+d);
1478     }
1479     emit_word(i);
1480     }
1481 gbeauche 1.1 }
1482     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1483    
1484     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1485     {
1486     emit_byte(0x21);
1487     emit_byte(0xc0+8*s+d);
1488     }
1489     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1490    
1491     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1492     {
1493     emit_byte(0x66);
1494     emit_byte(0x21);
1495     emit_byte(0xc0+8*s+d);
1496     }
1497     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1498    
1499     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1500     {
1501     emit_byte(0x20);
1502     emit_byte(0xc0+8*s+d);
1503     }
1504     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1505    
1506     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1507     {
1508     if (optimize_imm8 && isbyte(i)) {
1509     emit_byte(0x83);
1510     emit_byte(0xc8+d);
1511     emit_byte(i);
1512     }
1513     else {
1514 gbeauche 1.2 if (optimize_accum && isaccum(d))
1515     emit_byte(0x0d);
1516     else {
1517 gbeauche 1.1 emit_byte(0x81);
1518     emit_byte(0xc8+d);
1519 gbeauche 1.2 }
1520 gbeauche 1.1 emit_long(i);
1521     }
1522     }
1523     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1524    
1525     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1526     {
1527     emit_byte(0x09);
1528     emit_byte(0xc0+8*s+d);
1529     }
1530     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1531    
1532     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1533     {
1534     emit_byte(0x66);
1535     emit_byte(0x09);
1536     emit_byte(0xc0+8*s+d);
1537     }
1538     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1539    
1540     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1541     {
1542     emit_byte(0x08);
1543     emit_byte(0xc0+8*s+d);
1544     }
1545     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1546    
1547     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1548     {
1549     emit_byte(0x11);
1550     emit_byte(0xc0+8*s+d);
1551     }
1552     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1553    
1554     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1555     {
1556     emit_byte(0x66);
1557     emit_byte(0x11);
1558     emit_byte(0xc0+8*s+d);
1559     }
1560     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1561    
1562     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1563     {
1564     emit_byte(0x10);
1565     emit_byte(0xc0+8*s+d);
1566     }
1567     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1568    
1569     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1570     {
1571     emit_byte(0x01);
1572     emit_byte(0xc0+8*s+d);
1573     }
1574     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1575    
1576     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1577     {
1578     emit_byte(0x66);
1579     emit_byte(0x01);
1580     emit_byte(0xc0+8*s+d);
1581     }
1582     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1583    
1584     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1585     {
1586     emit_byte(0x00);
1587     emit_byte(0xc0+8*s+d);
1588     }
1589     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1590    
1591     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1592     {
1593     if (isbyte(i)) {
1594     emit_byte(0x83);
1595     emit_byte(0xe8+d);
1596     emit_byte(i);
1597     }
1598     else {
1599 gbeauche 1.2 if (optimize_accum && isaccum(d))
1600     emit_byte(0x2d);
1601     else {
1602 gbeauche 1.1 emit_byte(0x81);
1603     emit_byte(0xe8+d);
1604 gbeauche 1.2 }
1605 gbeauche 1.1 emit_long(i);
1606     }
1607     }
1608     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1609    
1610     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1611     {
1612 gbeauche 1.2 if (optimize_accum && isaccum(d))
1613     emit_byte(0x2c);
1614     else {
1615 gbeauche 1.1 emit_byte(0x80);
1616     emit_byte(0xe8+d);
1617 gbeauche 1.2 }
1618 gbeauche 1.1 emit_byte(i);
1619     }
1620     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1621    
1622     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1623     {
1624     if (isbyte(i)) {
1625     emit_byte(0x83);
1626     emit_byte(0xc0+d);
1627     emit_byte(i);
1628     }
1629     else {
1630 gbeauche 1.2 if (optimize_accum && isaccum(d))
1631     emit_byte(0x05);
1632     else {
1633 gbeauche 1.1 emit_byte(0x81);
1634     emit_byte(0xc0+d);
1635 gbeauche 1.2 }
1636 gbeauche 1.1 emit_long(i);
1637     }
1638     }
1639     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1640    
1641     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1642     {
1643 gbeauche 1.2 emit_byte(0x66);
1644 gbeauche 1.1 if (isbyte(i)) {
1645     emit_byte(0x83);
1646     emit_byte(0xc0+d);
1647     emit_byte(i);
1648     }
1649     else {
1650 gbeauche 1.2 if (optimize_accum && isaccum(d))
1651     emit_byte(0x05);
1652     else {
1653 gbeauche 1.1 emit_byte(0x81);
1654     emit_byte(0xc0+d);
1655 gbeauche 1.2 }
1656 gbeauche 1.1 emit_word(i);
1657     }
1658     }
1659     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1660    
1661     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1662     {
1663 gbeauche 1.2 if (optimize_accum && isaccum(d))
1664     emit_byte(0x04);
1665     else {
1666     emit_byte(0x80);
1667     emit_byte(0xc0+d);
1668     }
1669 gbeauche 1.1 emit_byte(i);
1670     }
1671     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1672    
1673     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1674     {
1675     emit_byte(0x19);
1676     emit_byte(0xc0+8*s+d);
1677     }
1678     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1679    
1680     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1681     {
1682     emit_byte(0x66);
1683     emit_byte(0x19);
1684     emit_byte(0xc0+8*s+d);
1685     }
1686     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1687    
1688     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1689     {
1690     emit_byte(0x18);
1691     emit_byte(0xc0+8*s+d);
1692     }
1693     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1694    
1695     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1696     {
1697     emit_byte(0x29);
1698     emit_byte(0xc0+8*s+d);
1699     }
1700     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1701    
1702     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1703     {
1704     emit_byte(0x66);
1705     emit_byte(0x29);
1706     emit_byte(0xc0+8*s+d);
1707     }
1708     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1709    
1710     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1711     {
1712     emit_byte(0x28);
1713     emit_byte(0xc0+8*s+d);
1714     }
1715     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1716    
1717     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1718     {
1719     emit_byte(0x39);
1720     emit_byte(0xc0+8*s+d);
1721     }
1722     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1723    
1724     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1725     {
1726     if (optimize_imm8 && isbyte(i)) {
1727     emit_byte(0x83);
1728     emit_byte(0xf8+r);
1729     emit_byte(i);
1730     }
1731     else {
1732 gbeauche 1.2 if (optimize_accum && isaccum(r))
1733     emit_byte(0x3d);
1734     else {
1735 gbeauche 1.1 emit_byte(0x81);
1736     emit_byte(0xf8+r);
1737 gbeauche 1.2 }
1738 gbeauche 1.1 emit_long(i);
1739     }
1740     }
1741     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1742    
1743     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1744     {
1745     emit_byte(0x66);
1746     emit_byte(0x39);
1747     emit_byte(0xc0+8*s+d);
1748     }
1749     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1750    
1751 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1752     {
1753     emit_byte(0x80);
1754     emit_byte(0x3d);
1755     emit_long(d);
1756     emit_byte(s);
1757     }
1758     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1759    
1760 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1761     {
1762 gbeauche 1.2 if (optimize_accum && isaccum(d))
1763     emit_byte(0x3c);
1764     else {
1765 gbeauche 1.1 emit_byte(0x80);
1766     emit_byte(0xf8+d);
1767 gbeauche 1.2 }
1768 gbeauche 1.1 emit_byte(i);
1769     }
1770     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1771    
1772     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1773     {
1774     emit_byte(0x38);
1775     emit_byte(0xc0+8*s+d);
1776     }
1777     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1778    
1779     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1780     {
1781     int fi;
1782    
1783     switch(factor) {
1784     case 1: fi=0; break;
1785     case 2: fi=1; break;
1786     case 4: fi=2; break;
1787     case 8: fi=3; break;
1788     default: abort();
1789     }
1790     emit_byte(0x39);
1791     emit_byte(0x04+8*d);
1792     emit_byte(5+8*index+0x40*fi);
1793     emit_long(offset);
1794     }
1795     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1796    
1797     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1798     {
1799     emit_byte(0x31);
1800     emit_byte(0xc0+8*s+d);
1801     }
1802     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1803    
1804     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1805     {
1806     emit_byte(0x66);
1807     emit_byte(0x31);
1808     emit_byte(0xc0+8*s+d);
1809     }
1810     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1811    
1812     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1813     {
1814     emit_byte(0x30);
1815     emit_byte(0xc0+8*s+d);
1816     }
1817     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1818    
1819     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1820     {
1821     if (optimize_imm8 && isbyte(s)) {
1822     emit_byte(0x83);
1823     emit_byte(0x2d);
1824     emit_long(d);
1825     emit_byte(s);
1826     }
1827     else {
1828     emit_byte(0x81);
1829     emit_byte(0x2d);
1830     emit_long(d);
1831     emit_long(s);
1832     }
1833     }
1834     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1835    
1836     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1837     {
1838     if (optimize_imm8 && isbyte(s)) {
1839     emit_byte(0x83);
1840     emit_byte(0x3d);
1841     emit_long(d);
1842     emit_byte(s);
1843     }
1844     else {
1845     emit_byte(0x81);
1846     emit_byte(0x3d);
1847     emit_long(d);
1848     emit_long(s);
1849     }
1850     }
1851     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1852    
1853     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1854     {
1855     emit_byte(0x87);
1856     emit_byte(0xc0+8*r1+r2);
1857     }
1858     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1859    
1860     /*************************************************************************
1861     * FIXME: string-related instructions *
1862     *************************************************************************/
1863    
1864     LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1865     {
1866     emit_byte(0xfc);
1867     }
1868     LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1869    
1870     LOWFUNC(WRITE,NONE,0,raw_std,(void))
1871     {
1872     emit_byte(0xfd);
1873     }
1874     LENDFUNC(WRITE,NONE,0,raw_std,(void))
1875    
1876     LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1877     {
1878     emit_byte(0xa4);
1879     }
1880     LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1881    
1882     LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1883     {
1884     emit_byte(0xa5);
1885     }
1886     LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1887    
1888     LOWFUNC(NONE,RMW,0,raw_rep,(void))
1889     {
1890     emit_byte(0xf3);
1891     }
1892     LENDFUNC(NONE,RMW,0,raw_rep,(void))
1893    
1894     LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1895     {
1896     raw_rep();
1897     raw_movs_b();
1898     }
1899     LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1900    
1901     LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1902     {
1903     raw_rep();
1904     raw_movs_l();
1905     }
1906     LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1907    
1908     /*************************************************************************
1909     * FIXME: mem access modes probably wrong *
1910     *************************************************************************/
1911    
1912     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1913     {
1914     emit_byte(0x9c);
1915     }
1916     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1917    
1918     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1919     {
1920     emit_byte(0x9d);
1921     }
1922     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1923    
1924     /*************************************************************************
1925     * Unoptimizable stuff --- jump *
1926     *************************************************************************/
1927    
1928     static __inline__ void raw_call_r(R4 r)
1929     {
1930     emit_byte(0xff);
1931     emit_byte(0xd0+r);
1932 gbeauche 1.5 }
1933    
1934     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1935     {
1936     int mu;
1937     switch(m) {
1938     case 1: mu=0; break;
1939     case 2: mu=1; break;
1940     case 4: mu=2; break;
1941     case 8: mu=3; break;
1942     default: abort();
1943     }
1944     emit_byte(0xff);
1945     emit_byte(0x14);
1946     emit_byte(0x05+8*r+0x40*mu);
1947     emit_long(base);
1948 gbeauche 1.1 }
1949    
1950     static __inline__ void raw_jmp_r(R4 r)
1951     {
1952     emit_byte(0xff);
1953     emit_byte(0xe0+r);
1954     }
1955    
1956     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1957     {
1958     int mu;
1959     switch(m) {
1960     case 1: mu=0; break;
1961     case 2: mu=1; break;
1962     case 4: mu=2; break;
1963     case 8: mu=3; break;
1964     default: abort();
1965     }
1966     emit_byte(0xff);
1967     emit_byte(0x24);
1968     emit_byte(0x05+8*r+0x40*mu);
1969     emit_long(base);
1970     }
1971    
1972     static __inline__ void raw_jmp_m(uae_u32 base)
1973     {
1974     emit_byte(0xff);
1975     emit_byte(0x25);
1976     emit_long(base);
1977     }
1978    
1979    
1980     static __inline__ void raw_call(uae_u32 t)
1981     {
1982     emit_byte(0xe8);
1983     emit_long(t-(uae_u32)target-4);
1984     }
1985    
1986     static __inline__ void raw_jmp(uae_u32 t)
1987     {
1988     emit_byte(0xe9);
1989     emit_long(t-(uae_u32)target-4);
1990     }
1991    
1992     static __inline__ void raw_jl(uae_u32 t)
1993     {
1994     emit_byte(0x0f);
1995     emit_byte(0x8c);
1996     emit_long(t-(uae_u32)target-4);
1997     }
1998    
1999     static __inline__ void raw_jz(uae_u32 t)
2000     {
2001     emit_byte(0x0f);
2002     emit_byte(0x84);
2003     emit_long(t-(uae_u32)target-4);
2004     }
2005    
2006     static __inline__ void raw_jnz(uae_u32 t)
2007     {
2008     emit_byte(0x0f);
2009     emit_byte(0x85);
2010     emit_long(t-(uae_u32)target-4);
2011     }
2012    
2013     static __inline__ void raw_jnz_l_oponly(void)
2014     {
2015     emit_byte(0x0f);
2016     emit_byte(0x85);
2017     }
2018    
2019     static __inline__ void raw_jcc_l_oponly(int cc)
2020     {
2021     emit_byte(0x0f);
2022     emit_byte(0x80+cc);
2023     }
2024    
2025     static __inline__ void raw_jnz_b_oponly(void)
2026     {
2027     emit_byte(0x75);
2028     }
2029    
2030     static __inline__ void raw_jz_b_oponly(void)
2031     {
2032     emit_byte(0x74);
2033     }
2034    
2035     static __inline__ void raw_jcc_b_oponly(int cc)
2036     {
2037     emit_byte(0x70+cc);
2038     }
2039    
2040     static __inline__ void raw_jmp_l_oponly(void)
2041     {
2042     emit_byte(0xe9);
2043     }
2044    
2045     static __inline__ void raw_jmp_b_oponly(void)
2046     {
2047     emit_byte(0xeb);
2048     }
2049    
2050     static __inline__ void raw_ret(void)
2051     {
2052     emit_byte(0xc3);
2053     }
2054    
2055     static __inline__ void raw_nop(void)
2056     {
2057     emit_byte(0x90);
2058     }
2059    
2060    
2061     /*************************************************************************
2062     * Flag handling, to and fro UAE flag register *
2063     *************************************************************************/
2064    
2065     #ifdef SAHF_SETO_PROFITABLE
2066    
2067     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
2068    
2069     static __inline__ void raw_flags_to_reg(int r)
2070     {
2071     raw_lahf(0); /* Most flags in AH */
2072     //raw_setcc(r,0); /* V flag in AL */
2073     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2074    
2075     #if 1 /* Let's avoid those nasty partial register stalls */
2076     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2077     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
2078     //live.state[FLAGTMP].status=CLEAN;
2079     live.state[FLAGTMP].status=INMEM;
2080     live.state[FLAGTMP].realreg=-1;
2081     /* We just "evicted" FLAGTMP. */
2082     if (live.nat[r].nholds!=1) {
2083     /* Huh? */
2084     abort();
2085     }
2086     live.nat[r].nholds=0;
2087     #endif
2088     }
2089    
2090     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
2091     static __inline__ void raw_reg_to_flags(int r)
2092     {
2093     raw_cmp_b_ri(r,-127); /* set V */
2094     raw_sahf(0);
2095     }
2096    
2097     #else
2098    
2099     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
2100     static __inline__ void raw_flags_to_reg(int r)
2101     {
2102     raw_pushfl();
2103     raw_pop_l_r(r);
2104     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
2105     // live.state[FLAGTMP].status=CLEAN;
2106     live.state[FLAGTMP].status=INMEM;
2107     live.state[FLAGTMP].realreg=-1;
2108     /* We just "evicted" FLAGTMP. */
2109     if (live.nat[r].nholds!=1) {
2110     /* Huh? */
2111     abort();
2112     }
2113     live.nat[r].nholds=0;
2114     }
2115    
2116     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2117     static __inline__ void raw_reg_to_flags(int r)
2118     {
2119     raw_push_l_r(r);
2120     raw_popfl();
2121     }
2122    
2123     #endif
2124    
2125     /* Apparently, there are enough instructions between flag store and
2126     flag reload to avoid the partial memory stall */
2127     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2128     {
2129     #if 1
2130     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2131     #else
2132     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2133     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2134     #endif
2135     }
2136    
2137     /* FLAGX is byte sized, and we *do* write it at that size */
2138     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2139     {
2140     if (live.nat[target].canbyte)
2141     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2142     else if (live.nat[target].canword)
2143     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2144     else
2145     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2146     }
2147    
2148    
2149     static __inline__ void raw_inc_sp(int off)
2150     {
2151 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
2152 gbeauche 1.1 }
2153    
2154     /*************************************************************************
2155     * Handling mistaken direct memory access *
2156     *************************************************************************/
2157    
2158     // gb-- I don't need that part for JIT Basilisk II
2159     #if defined(NATMEM_OFFSET) && 0
2160     #include <asm/sigcontext.h>
2161     #include <signal.h>
2162    
2163     #define SIG_READ 1
2164     #define SIG_WRITE 2
2165    
2166     static int in_handler=0;
2167     static uae_u8 veccode[256];
2168    
2169     static void vec(int x, struct sigcontext sc)
2170     {
2171     uae_u8* i=(uae_u8*)sc.eip;
2172     uae_u32 addr=sc.cr2;
2173     int r=-1;
2174     int size=4;
2175     int dir=-1;
2176     int len=0;
2177     int j;
2178    
2179     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2180     if (!canbang)
2181     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2182     if (in_handler)
2183     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2184    
2185     if (canbang && i>=compiled_code && i<=current_compile_p) {
2186     if (*i==0x66) {
2187     i++;
2188     size=2;
2189     len++;
2190     }
2191    
2192     switch(i[0]) {
2193     case 0x8a:
2194     if ((i[1]&0xc0)==0x80) {
2195     r=(i[1]>>3)&7;
2196     dir=SIG_READ;
2197     size=1;
2198     len+=6;
2199     break;
2200     }
2201     break;
2202     case 0x88:
2203     if ((i[1]&0xc0)==0x80) {
2204     r=(i[1]>>3)&7;
2205     dir=SIG_WRITE;
2206     size=1;
2207     len+=6;
2208     break;
2209     }
2210     break;
2211     case 0x8b:
2212     if ((i[1]&0xc0)==0x80) {
2213     r=(i[1]>>3)&7;
2214     dir=SIG_READ;
2215     len+=6;
2216     break;
2217     }
2218     if ((i[1]&0xc0)==0x40) {
2219     r=(i[1]>>3)&7;
2220     dir=SIG_READ;
2221     len+=3;
2222     break;
2223     }
2224     break;
2225     case 0x89:
2226     if ((i[1]&0xc0)==0x80) {
2227     r=(i[1]>>3)&7;
2228     dir=SIG_WRITE;
2229     len+=6;
2230     break;
2231     }
2232     if ((i[1]&0xc0)==0x40) {
2233     r=(i[1]>>3)&7;
2234     dir=SIG_WRITE;
2235     len+=3;
2236     break;
2237     }
2238     break;
2239     }
2240     }
2241    
2242     if (r!=-1) {
2243     void* pr=NULL;
2244     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2245    
2246     switch(r) {
2247     case 0: pr=&(sc.eax); break;
2248     case 1: pr=&(sc.ecx); break;
2249     case 2: pr=&(sc.edx); break;
2250     case 3: pr=&(sc.ebx); break;
2251     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2252     case 5: pr=(size>1)?
2253     (void*)(&(sc.ebp)):
2254     (void*)(((uae_u8*)&(sc.ecx))+1); break;
2255     case 6: pr=(size>1)?
2256     (void*)(&(sc.esi)):
2257     (void*)(((uae_u8*)&(sc.edx))+1); break;
2258     case 7: pr=(size>1)?
2259     (void*)(&(sc.edi)):
2260     (void*)(((uae_u8*)&(sc.ebx))+1); break;
2261     default: abort();
2262     }
2263     if (pr) {
2264     blockinfo* bi;
2265    
2266     if (currprefs.comp_oldsegv) {
2267     addr-=NATMEM_OFFSET;
2268    
2269     if ((addr>=0x10000000 && addr<0x40000000) ||
2270     (addr>=0x50000000)) {
2271     write_log("Suspicious address in %x SEGV handler.\n",addr);
2272     }
2273     if (dir==SIG_READ) {
2274     switch(size) {
2275     case 1: *((uae_u8*)pr)=get_byte(addr); break;
2276     case 2: *((uae_u16*)pr)=get_word(addr); break;
2277     case 4: *((uae_u32*)pr)=get_long(addr); break;
2278     default: abort();
2279     }
2280     }
2281     else { /* write */
2282     switch(size) {
2283     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2284     case 2: put_word(addr,*((uae_u16*)pr)); break;
2285     case 4: put_long(addr,*((uae_u32*)pr)); break;
2286     default: abort();
2287     }
2288     }
2289     write_log("Handled one access!\n");
2290     fflush(stdout);
2291     segvcount++;
2292     sc.eip+=len;
2293     }
2294     else {
2295     void* tmp=target;
2296     int i;
2297     uae_u8 vecbuf[5];
2298    
2299     addr-=NATMEM_OFFSET;
2300    
2301     if ((addr>=0x10000000 && addr<0x40000000) ||
2302     (addr>=0x50000000)) {
2303     write_log("Suspicious address in %x SEGV handler.\n",addr);
2304     }
2305    
2306     target=(uae_u8*)sc.eip;
2307     for (i=0;i<5;i++)
2308     vecbuf[i]=target[i];
2309     emit_byte(0xe9);
2310     emit_long((uae_u32)veccode-(uae_u32)target-4);
2311     write_log("Create jump to %p\n",veccode);
2312    
2313     write_log("Handled one access!\n");
2314     fflush(stdout);
2315     segvcount++;
2316    
2317     target=veccode;
2318    
2319     if (dir==SIG_READ) {
2320     switch(size) {
2321     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2322     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2323     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2324     default: abort();
2325     }
2326     }
2327     else { /* write */
2328     switch(size) {
2329     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2330     case 2: put_word(addr,*((uae_u16*)pr)); break;
2331     case 4: put_long(addr,*((uae_u32*)pr)); break;
2332     default: abort();
2333     }
2334     }
2335     for (i=0;i<5;i++)
2336     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2337     raw_mov_l_mi((uae_u32)&in_handler,0);
2338     emit_byte(0xe9);
2339     emit_long(sc.eip+len-(uae_u32)target-4);
2340     in_handler=1;
2341     target=tmp;
2342     }
2343     bi=active;
2344     while (bi) {
2345     if (bi->handler &&
2346     (uae_u8*)bi->direct_handler<=i &&
2347     (uae_u8*)bi->nexthandler>i) {
2348     write_log("deleted trigger (%p<%p<%p) %p\n",
2349     bi->handler,
2350     i,
2351     bi->nexthandler,
2352     bi->pc_p);
2353     invalidate_block(bi);
2354     raise_in_cl_list(bi);
2355     set_special(0);
2356     return;
2357     }
2358     bi=bi->next;
2359     }
2360     /* Not found in the active list. Might be a rom routine that
2361     is in the dormant list */
2362     bi=dormant;
2363     while (bi) {
2364     if (bi->handler &&
2365     (uae_u8*)bi->direct_handler<=i &&
2366     (uae_u8*)bi->nexthandler>i) {
2367     write_log("deleted trigger (%p<%p<%p) %p\n",
2368     bi->handler,
2369     i,
2370     bi->nexthandler,
2371     bi->pc_p);
2372     invalidate_block(bi);
2373     raise_in_cl_list(bi);
2374     set_special(0);
2375     return;
2376     }
2377     bi=bi->next;
2378     }
2379     write_log("Huh? Could not find trigger!\n");
2380     return;
2381     }
2382     }
2383     write_log("Can't handle access!\n");
2384     for (j=0;j<10;j++) {
2385     write_log("instruction byte %2d is %02x\n",j,i[j]);
2386     }
2387     write_log("Please send the above info (starting at \"fault address\") to\n"
2388     "bmeyer@csse.monash.edu.au\n"
2389     "This shouldn't happen ;-)\n");
2390     fflush(stdout);
2391     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2392     }
2393     #endif
2394    
2395    
2396     /*************************************************************************
2397     * Checking for CPU features *
2398     *************************************************************************/
2399    
2400 gbeauche 1.3 struct cpuinfo_x86 {
2401     uae_u8 x86; // CPU family
2402     uae_u8 x86_vendor; // CPU vendor
2403     uae_u8 x86_processor; // CPU canonical processor type
2404     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
2405     uae_u32 x86_hwcap;
2406     uae_u8 x86_model;
2407     uae_u8 x86_mask;
2408     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
2409     char x86_vendor_id[16];
2410     };
2411     struct cpuinfo_x86 cpuinfo;
2412    
2413     enum {
2414     X86_VENDOR_INTEL = 0,
2415     X86_VENDOR_CYRIX = 1,
2416     X86_VENDOR_AMD = 2,
2417     X86_VENDOR_UMC = 3,
2418     X86_VENDOR_NEXGEN = 4,
2419     X86_VENDOR_CENTAUR = 5,
2420     X86_VENDOR_RISE = 6,
2421     X86_VENDOR_TRANSMETA = 7,
2422     X86_VENDOR_NSC = 8,
2423     X86_VENDOR_UNKNOWN = 0xff
2424     };
2425    
2426     enum {
2427     X86_PROCESSOR_I386, /* 80386 */
2428     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
2429     X86_PROCESSOR_PENTIUM,
2430     X86_PROCESSOR_PENTIUMPRO,
2431     X86_PROCESSOR_K6,
2432     X86_PROCESSOR_ATHLON,
2433     X86_PROCESSOR_PENTIUM4,
2434     X86_PROCESSOR_max
2435     };
2436    
2437     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2438     "80386",
2439     "80486",
2440     "Pentium",
2441     "PentiumPro",
2442     "K6",
2443     "Athlon",
2444     "Pentium4"
2445     };
2446    
2447     static struct ptt {
2448     const int align_loop;
2449     const int align_loop_max_skip;
2450     const int align_jump;
2451     const int align_jump_max_skip;
2452     const int align_func;
2453     }
2454     x86_alignments[X86_PROCESSOR_max] = {
2455     { 4, 3, 4, 3, 4 },
2456     { 16, 15, 16, 15, 16 },
2457     { 16, 7, 16, 7, 16 },
2458     { 16, 15, 16, 7, 16 },
2459     { 32, 7, 32, 7, 32 },
2460 gbeauche 1.4 { 16, 7, 16, 7, 16 },
2461 gbeauche 1.3 { 0, 0, 0, 0, 0 }
2462     };
2463 gbeauche 1.1
2464 gbeauche 1.3 static void
2465     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2466 gbeauche 1.1 {
2467 gbeauche 1.3 char *v = c->x86_vendor_id;
2468    
2469     if (!strcmp(v, "GenuineIntel"))
2470     c->x86_vendor = X86_VENDOR_INTEL;
2471     else if (!strcmp(v, "AuthenticAMD"))
2472     c->x86_vendor = X86_VENDOR_AMD;
2473     else if (!strcmp(v, "CyrixInstead"))
2474     c->x86_vendor = X86_VENDOR_CYRIX;
2475     else if (!strcmp(v, "Geode by NSC"))
2476     c->x86_vendor = X86_VENDOR_NSC;
2477     else if (!strcmp(v, "UMC UMC UMC "))
2478     c->x86_vendor = X86_VENDOR_UMC;
2479     else if (!strcmp(v, "CentaurHauls"))
2480     c->x86_vendor = X86_VENDOR_CENTAUR;
2481     else if (!strcmp(v, "NexGenDriven"))
2482     c->x86_vendor = X86_VENDOR_NEXGEN;
2483     else if (!strcmp(v, "RiseRiseRise"))
2484     c->x86_vendor = X86_VENDOR_RISE;
2485     else if (!strcmp(v, "GenuineTMx86") ||
2486     !strcmp(v, "TransmetaCPU"))
2487     c->x86_vendor = X86_VENDOR_TRANSMETA;
2488     else
2489     c->x86_vendor = X86_VENDOR_UNKNOWN;
2490     }
2491 gbeauche 1.1
2492 gbeauche 1.3 static void
2493     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2494     {
2495     static uae_u8 cpuid_space[256];
2496     uae_u8* tmp=get_target();
2497 gbeauche 1.1
2498 gbeauche 1.3 set_target(cpuid_space);
2499     raw_push_l_r(0); /* eax */
2500     raw_push_l_r(1); /* ecx */
2501     raw_push_l_r(2); /* edx */
2502     raw_push_l_r(3); /* ebx */
2503     raw_mov_l_rm(0,(uae_u32)&op);
2504     raw_cpuid(0);
2505     if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2506     if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2507     if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2508     if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2509     raw_pop_l_r(3);
2510     raw_pop_l_r(2);
2511     raw_pop_l_r(1);
2512     raw_pop_l_r(0);
2513     raw_ret();
2514     set_target(tmp);
2515 gbeauche 1.1
2516 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
2517 gbeauche 1.1 }
2518    
2519 gbeauche 1.3 static void
2520     raw_init_cpu(void)
2521 gbeauche 1.1 {
2522 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
2523    
2524     /* Defaults */
2525     c->x86_vendor = X86_VENDOR_UNKNOWN;
2526     c->cpuid_level = -1; /* CPUID not detected */
2527     c->x86_model = c->x86_mask = 0; /* So far unknown... */
2528     c->x86_vendor_id[0] = '\0'; /* Unset */
2529     c->x86_hwcap = 0;
2530    
2531     /* Get vendor name */
2532     c->x86_vendor_id[12] = '\0';
2533     cpuid(0x00000000,
2534     (uae_u32 *)&c->cpuid_level,
2535     (uae_u32 *)&c->x86_vendor_id[0],
2536     (uae_u32 *)&c->x86_vendor_id[8],
2537     (uae_u32 *)&c->x86_vendor_id[4]);
2538     x86_get_cpu_vendor(c);
2539    
2540     /* Intel-defined flags: level 0x00000001 */
2541     c->x86_brand_id = 0;
2542     if ( c->cpuid_level >= 0x00000001 ) {
2543     uae_u32 tfms, brand_id;
2544     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2545     c->x86 = (tfms >> 8) & 15;
2546     c->x86_model = (tfms >> 4) & 15;
2547     c->x86_brand_id = brand_id & 0xff;
2548     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2549     (c->x86 == 0xf)) {
2550     /* AMD Extended Family and Model Values */
2551     c->x86 += (tfms >> 20) & 0xff;
2552     c->x86_model += (tfms >> 12) & 0xf0;
2553     }
2554     c->x86_mask = tfms & 15;
2555     } else {
2556     /* Have CPUID level 0 only - unheard of */
2557     c->x86 = 4;
2558     }
2559    
2560     /* Canonicalize processor ID */
2561     c->x86_processor = X86_PROCESSOR_max;
2562     switch (c->x86) {
2563     case 3:
2564     c->x86_processor = X86_PROCESSOR_I386;
2565     break;
2566     case 4:
2567     c->x86_processor = X86_PROCESSOR_I486;
2568     break;
2569     case 5:
2570     if (c->x86_vendor == X86_VENDOR_AMD)
2571     c->x86_processor = X86_PROCESSOR_K6;
2572     else
2573     c->x86_processor = X86_PROCESSOR_PENTIUM;
2574     break;
2575     case 6:
2576     if (c->x86_vendor == X86_VENDOR_AMD)
2577     c->x86_processor = X86_PROCESSOR_ATHLON;
2578     else
2579     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2580     break;
2581     case 15:
2582     if (c->x86_vendor == X86_VENDOR_INTEL) {
2583     /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2584     if (c->x86_brand_id >= 8)
2585     c->x86_processor = X86_PROCESSOR_PENTIUM4;
2586     }
2587     break;
2588     }
2589     if (c->x86_processor == X86_PROCESSOR_max) {
2590     fprintf(stderr, "Error: unknown processor type\n");
2591     fprintf(stderr, " Family : %d\n", c->x86);
2592     fprintf(stderr, " Model : %d\n", c->x86_model);
2593     fprintf(stderr, " Mask : %d\n", c->x86_mask);
2594     if (c->x86_brand_id)
2595     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
2596     abort();
2597     }
2598    
2599     /* Have CMOV support? */
2600     have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2601    
2602     /* Can the host CPU suffer from partial register stalls? */
2603     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2604     #if 1
2605     /* It appears that partial register writes are a bad idea even on
2606 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
2607     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2608 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
2609     have_rat_stall = true;
2610 gbeauche 1.1 #endif
2611 gbeauche 1.3
2612     /* Alignments */
2613     if (tune_alignment) {
2614     align_loops = x86_alignments[c->x86_processor].align_loop;
2615     align_jumps = x86_alignments[c->x86_processor].align_jump;
2616     }
2617    
2618     write_log("Max CPUID level=%d Processor is %s [%s]\n",
2619     c->cpuid_level, c->x86_vendor_id,
2620     x86_processor_string_table[c->x86_processor]);
2621 gbeauche 1.1 }
2622    
2623    
2624     /*************************************************************************
2625     * FPU stuff *
2626     *************************************************************************/
2627    
2628    
2629     static __inline__ void raw_fp_init(void)
2630     {
2631     int i;
2632    
2633     for (i=0;i<N_FREGS;i++)
2634     live.spos[i]=-2;
2635     live.tos=-1; /* Stack is empty */
2636     }
2637    
2638     static __inline__ void raw_fp_cleanup_drop(void)
2639     {
2640     #if 0
2641     /* using FINIT instead of popping all the entries.
2642     Seems to have side effects --- there is display corruption in
2643     Quake when this is used */
2644     if (live.tos>1) {
2645     emit_byte(0x9b);
2646     emit_byte(0xdb);
2647     emit_byte(0xe3);
2648     live.tos=-1;
2649     }
2650     #endif
2651     while (live.tos>=1) {
2652     emit_byte(0xde);
2653     emit_byte(0xd9);
2654     live.tos-=2;
2655     }
2656     while (live.tos>=0) {
2657     emit_byte(0xdd);
2658     emit_byte(0xd8);
2659     live.tos--;
2660     }
2661     raw_fp_init();
2662     }
2663    
2664     static __inline__ void make_tos(int r)
2665     {
2666     int p,q;
2667    
2668     if (live.spos[r]<0) { /* Register not yet on stack */
2669     emit_byte(0xd9);
2670     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2671     live.tos++;
2672     live.spos[r]=live.tos;
2673     live.onstack[live.tos]=r;
2674     return;
2675     }
2676     /* Register is on stack */
2677     if (live.tos==live.spos[r])
2678     return;
2679     p=live.spos[r];
2680     q=live.onstack[live.tos];
2681    
2682     emit_byte(0xd9);
2683     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2684     live.onstack[live.tos]=r;
2685     live.spos[r]=live.tos;
2686     live.onstack[p]=q;
2687     live.spos[q]=p;
2688     }
2689    
2690     static __inline__ void make_tos2(int r, int r2)
2691     {
2692     int q;
2693    
2694     make_tos(r2); /* Put the reg that's supposed to end up in position2
2695     on top */
2696    
2697     if (live.spos[r]<0) { /* Register not yet on stack */
2698     make_tos(r); /* This will extend the stack */
2699     return;
2700     }
2701     /* Register is on stack */
2702     emit_byte(0xd9);
2703     emit_byte(0xc9); /* Move r2 into position 2 */
2704    
2705     q=live.onstack[live.tos-1];
2706     live.onstack[live.tos]=q;
2707     live.spos[q]=live.tos;
2708     live.onstack[live.tos-1]=r2;
2709     live.spos[r2]=live.tos-1;
2710    
2711     make_tos(r); /* And r into 1 */
2712     }
2713    
2714     static __inline__ int stackpos(int r)
2715     {
2716     if (live.spos[r]<0)
2717     abort();
2718     if (live.tos<live.spos[r]) {
2719     printf("Looking for spos for fnreg %d\n",r);
2720     abort();
2721     }
2722     return live.tos-live.spos[r];
2723     }
2724    
2725     static __inline__ void usereg(int r)
2726     {
2727     if (live.spos[r]<0)
2728     make_tos(r);
2729     }
2730    
2731     /* This is called with one FP value in a reg *above* tos, which it will
2732     pop off the stack if necessary */
2733     static __inline__ void tos_make(int r)
2734     {
2735     if (live.spos[r]<0) {
2736     live.tos++;
2737     live.spos[r]=live.tos;
2738     live.onstack[live.tos]=r;
2739     return;
2740     }
2741     emit_byte(0xdd);
2742     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2743     and pop it*/
2744     }
2745    
2746    
2747     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2748     {
2749     make_tos(r);
2750     emit_byte(0xdd);
2751     emit_byte(0x15);
2752     emit_long(m);
2753     }
2754     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2755    
2756     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2757     {
2758     make_tos(r);
2759     emit_byte(0xdd);
2760     emit_byte(0x1d);
2761     emit_long(m);
2762     live.onstack[live.tos]=-1;
2763     live.tos--;
2764     live.spos[r]=-2;
2765     }
2766     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2767    
2768     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2769     {
2770     emit_byte(0xdd);
2771     emit_byte(0x05);
2772     emit_long(m);
2773     tos_make(r);
2774     }
2775     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2776    
2777     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2778     {
2779     emit_byte(0xdb);
2780     emit_byte(0x05);
2781     emit_long(m);
2782     tos_make(r);
2783     }
2784     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2785    
2786     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2787     {
2788     make_tos(r);
2789     emit_byte(0xdb);
2790     emit_byte(0x15);
2791     emit_long(m);
2792     }
2793     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2794    
2795     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2796     {
2797     emit_byte(0xd9);
2798     emit_byte(0x05);
2799     emit_long(m);
2800     tos_make(r);
2801     }
2802     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2803    
2804     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2805     {
2806     make_tos(r);
2807     emit_byte(0xd9);
2808     emit_byte(0x15);
2809     emit_long(m);
2810     }
2811     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2812    
2813     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2814     {
2815     int rs;
2816    
2817     /* Stupid x87 can't write a long double to mem without popping the
2818     stack! */
2819     usereg(r);
2820     rs=stackpos(r);
2821     emit_byte(0xd9); /* Get a copy to the top of stack */
2822     emit_byte(0xc0+rs);
2823    
2824     emit_byte(0xdb); /* store and pop it */
2825     emit_byte(0x3d);
2826     emit_long(m);
2827     }
2828     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2829    
2830     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2831     {
2832     int rs;
2833    
2834     make_tos(r);
2835     emit_byte(0xdb); /* store and pop it */
2836     emit_byte(0x3d);
2837     emit_long(m);
2838     live.onstack[live.tos]=-1;
2839     live.tos--;
2840     live.spos[r]=-2;
2841     }
2842     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2843    
2844     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2845     {
2846     emit_byte(0xdb);
2847     emit_byte(0x2d);
2848     emit_long(m);
2849     tos_make(r);
2850     }
2851     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2852    
2853     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2854     {
2855     emit_byte(0xd9);
2856     emit_byte(0xeb);
2857     tos_make(r);
2858     }
2859     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2860    
2861     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2862     {
2863     emit_byte(0xd9);
2864     emit_byte(0xec);
2865     tos_make(r);
2866     }
2867     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2868    
2869     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2870     {
2871     emit_byte(0xd9);
2872     emit_byte(0xea);
2873     tos_make(r);
2874     }
2875     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2876    
2877     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2878     {
2879     emit_byte(0xd9);
2880     emit_byte(0xed);
2881     tos_make(r);
2882     }
2883     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2884    
2885     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2886     {
2887     emit_byte(0xd9);
2888     emit_byte(0xe8);
2889     tos_make(r);
2890     }
2891     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2892    
2893     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2894     {
2895     emit_byte(0xd9);
2896     emit_byte(0xee);
2897     tos_make(r);
2898     }
2899     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2900    
2901     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2902     {
2903     int ds;
2904    
2905     usereg(s);
2906     ds=stackpos(s);
2907     if (ds==0 && live.spos[d]>=0) {
2908     /* source is on top of stack, and we already have the dest */
2909     int dd=stackpos(d);
2910     emit_byte(0xdd);
2911     emit_byte(0xd0+dd);
2912     }
2913     else {
2914     emit_byte(0xd9);
2915     emit_byte(0xc0+ds); /* duplicate source on tos */
2916     tos_make(d); /* store to destination, pop if necessary */
2917     }
2918     }
2919     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2920    
2921     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2922     {
2923     emit_byte(0xd9);
2924     emit_byte(0xa8+index);
2925     emit_long(base);
2926     }
2927     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2928    
2929    
2930     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2931     {
2932     int ds;
2933    
2934     if (d!=s) {
2935     usereg(s);
2936     ds=stackpos(s);
2937     emit_byte(0xd9);
2938     emit_byte(0xc0+ds); /* duplicate source */
2939     emit_byte(0xd9);
2940     emit_byte(0xfa); /* take square root */
2941     tos_make(d); /* store to destination */
2942     }
2943     else {
2944     make_tos(d);
2945     emit_byte(0xd9);
2946     emit_byte(0xfa); /* take square root */
2947     }
2948     }
2949     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2950    
2951     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2952     {
2953     int ds;
2954    
2955     if (d!=s) {
2956     usereg(s);
2957     ds=stackpos(s);
2958     emit_byte(0xd9);
2959     emit_byte(0xc0+ds); /* duplicate source */
2960     emit_byte(0xd9);
2961     emit_byte(0xe1); /* take fabs */
2962     tos_make(d); /* store to destination */
2963     }
2964     else {
2965     make_tos(d);
2966     emit_byte(0xd9);
2967     emit_byte(0xe1); /* take fabs */
2968     }
2969     }
2970     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2971    
2972     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2973     {
2974     int ds;
2975    
2976     if (d!=s) {
2977     usereg(s);
2978     ds=stackpos(s);
2979     emit_byte(0xd9);
2980     emit_byte(0xc0+ds); /* duplicate source */
2981     emit_byte(0xd9);
2982     emit_byte(0xfc); /* take frndint */
2983     tos_make(d); /* store to destination */
2984     }
2985     else {
2986     make_tos(d);
2987     emit_byte(0xd9);
2988     emit_byte(0xfc); /* take frndint */
2989     }
2990     }
2991     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2992    
2993     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2994     {
2995     int ds;
2996    
2997     if (d!=s) {
2998     usereg(s);
2999     ds=stackpos(s);
3000     emit_byte(0xd9);
3001     emit_byte(0xc0+ds); /* duplicate source */
3002     emit_byte(0xd9);
3003     emit_byte(0xff); /* take cos */
3004     tos_make(d); /* store to destination */
3005     }
3006     else {
3007     make_tos(d);
3008     emit_byte(0xd9);
3009     emit_byte(0xff); /* take cos */
3010     }
3011     }
3012     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
3013    
3014     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3015     {
3016     int ds;
3017    
3018     if (d!=s) {
3019     usereg(s);
3020     ds=stackpos(s);
3021     emit_byte(0xd9);
3022     emit_byte(0xc0+ds); /* duplicate source */
3023     emit_byte(0xd9);
3024     emit_byte(0xfe); /* take sin */
3025     tos_make(d); /* store to destination */
3026     }
3027     else {
3028     make_tos(d);
3029     emit_byte(0xd9);
3030     emit_byte(0xfe); /* take sin */
3031     }
3032     }
3033     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3034    
3035     double one=1;
3036     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3037     {
3038     int ds;
3039    
3040     usereg(s);
3041     ds=stackpos(s);
3042     emit_byte(0xd9);
3043     emit_byte(0xc0+ds); /* duplicate source */
3044    
3045     emit_byte(0xd9);
3046     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3047     emit_byte(0xd9);
3048     emit_byte(0xfc); /* rndint */
3049     emit_byte(0xd9);
3050     emit_byte(0xc9); /* swap top two elements */
3051     emit_byte(0xd8);
3052     emit_byte(0xe1); /* subtract rounded from original */
3053     emit_byte(0xd9);
3054     emit_byte(0xf0); /* f2xm1 */
3055     emit_byte(0xdc);
3056     emit_byte(0x05);
3057     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3058     emit_byte(0xd9);
3059     emit_byte(0xfd); /* and scale it */
3060     emit_byte(0xdd);
3061     emit_byte(0xd9); /* take he rounded value off */
3062     tos_make(d); /* store to destination */
3063     }
3064     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3065    
3066     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3067     {
3068     int ds;
3069    
3070     usereg(s);
3071     ds=stackpos(s);
3072     emit_byte(0xd9);
3073     emit_byte(0xc0+ds); /* duplicate source */
3074     emit_byte(0xd9);
3075     emit_byte(0xea); /* fldl2e */
3076     emit_byte(0xde);
3077     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
3078    
3079     emit_byte(0xd9);
3080     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3081     emit_byte(0xd9);
3082     emit_byte(0xfc); /* rndint */
3083     emit_byte(0xd9);
3084     emit_byte(0xc9); /* swap top two elements */
3085     emit_byte(0xd8);
3086     emit_byte(0xe1); /* subtract rounded from original */
3087     emit_byte(0xd9);
3088     emit_byte(0xf0); /* f2xm1 */
3089     emit_byte(0xdc);
3090     emit_byte(0x05);
3091     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3092     emit_byte(0xd9);
3093     emit_byte(0xfd); /* and scale it */
3094     emit_byte(0xdd);
3095     emit_byte(0xd9); /* take he rounded value off */
3096     tos_make(d); /* store to destination */
3097     }
3098     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3099    
3100     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3101     {
3102     int ds;
3103    
3104     usereg(s);
3105     ds=stackpos(s);
3106     emit_byte(0xd9);
3107     emit_byte(0xc0+ds); /* duplicate source */
3108     emit_byte(0xd9);
3109     emit_byte(0xe8); /* push '1' */
3110     emit_byte(0xd9);
3111     emit_byte(0xc9); /* swap top two */
3112     emit_byte(0xd9);
3113     emit_byte(0xf1); /* take 1*log2(x) */
3114     tos_make(d); /* store to destination */
3115     }
3116     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3117    
3118    
3119     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3120     {
3121     int ds;
3122    
3123     if (d!=s) {
3124     usereg(s);
3125     ds=stackpos(s);
3126     emit_byte(0xd9);
3127     emit_byte(0xc0+ds); /* duplicate source */
3128     emit_byte(0xd9);
3129     emit_byte(0xe0); /* take fchs */
3130     tos_make(d); /* store to destination */
3131     }
3132     else {
3133     make_tos(d);
3134     emit_byte(0xd9);
3135     emit_byte(0xe0); /* take fchs */
3136     }
3137     }
3138     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3139    
3140     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3141     {
3142     int ds;
3143    
3144     usereg(s);
3145     usereg(d);
3146    
3147     if (live.spos[s]==live.tos) {
3148     /* Source is on top of stack */
3149     ds=stackpos(d);
3150     emit_byte(0xdc);
3151     emit_byte(0xc0+ds); /* add source to dest*/
3152     }
3153     else {
3154     make_tos(d);
3155     ds=stackpos(s);
3156    
3157     emit_byte(0xd8);
3158     emit_byte(0xc0+ds); /* add source to dest*/
3159     }
3160     }
3161     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3162    
3163     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3164     {
3165     int ds;
3166    
3167     usereg(s);
3168     usereg(d);
3169    
3170     if (live.spos[s]==live.tos) {
3171     /* Source is on top of stack */
3172     ds=stackpos(d);
3173     emit_byte(0xdc);
3174     emit_byte(0xe8+ds); /* sub source from dest*/
3175     }
3176     else {
3177     make_tos(d);
3178     ds=stackpos(s);
3179    
3180     emit_byte(0xd8);
3181     emit_byte(0xe0+ds); /* sub src from dest */
3182     }
3183     }
3184     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3185    
3186     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3187     {
3188     int ds;
3189    
3190     usereg(s);
3191     usereg(d);
3192    
3193     make_tos(d);
3194     ds=stackpos(s);
3195    
3196     emit_byte(0xdd);
3197     emit_byte(0xe0+ds); /* cmp dest with source*/
3198     }
3199     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3200    
3201     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3202     {
3203     int ds;
3204    
3205     usereg(s);
3206     usereg(d);
3207    
3208     if (live.spos[s]==live.tos) {
3209     /* Source is on top of stack */
3210     ds=stackpos(d);
3211     emit_byte(0xdc);
3212     emit_byte(0xc8+ds); /* mul dest by source*/
3213     }
3214     else {
3215     make_tos(d);
3216     ds=stackpos(s);
3217    
3218     emit_byte(0xd8);
3219     emit_byte(0xc8+ds); /* mul dest by source*/
3220     }
3221     }
3222     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3223    
3224     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3225     {
3226     int ds;
3227    
3228     usereg(s);
3229     usereg(d);
3230    
3231     if (live.spos[s]==live.tos) {
3232     /* Source is on top of stack */
3233     ds=stackpos(d);
3234     emit_byte(0xdc);
3235     emit_byte(0xf8+ds); /* div dest by source */
3236     }
3237     else {
3238     make_tos(d);
3239     ds=stackpos(s);
3240    
3241     emit_byte(0xd8);
3242     emit_byte(0xf0+ds); /* div dest by source*/
3243     }
3244     }
3245     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3246    
3247     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3248     {
3249     int ds;
3250    
3251     usereg(s);
3252     usereg(d);
3253    
3254     make_tos2(d,s);
3255     ds=stackpos(s);
3256    
3257     if (ds!=1) {
3258     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3259     abort();
3260     }
3261     emit_byte(0xd9);
3262     emit_byte(0xf8); /* take rem from dest by source */
3263     }
3264     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3265    
3266     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3267     {
3268     int ds;
3269    
3270     usereg(s);
3271     usereg(d);
3272    
3273     make_tos2(d,s);
3274     ds=stackpos(s);
3275    
3276     if (ds!=1) {
3277     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3278     abort();
3279     }
3280     emit_byte(0xd9);
3281     emit_byte(0xf5); /* take rem1 from dest by source */
3282     }
3283     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3284    
3285    
3286     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3287     {
3288     make_tos(r);
3289     emit_byte(0xd9); /* ftst */
3290     emit_byte(0xe4);
3291     }
3292     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3293    
3294     /* %eax register is clobbered if target processor doesn't support fucomi */
3295     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3296     #define FFLAG_NREG EAX_INDEX
3297    
3298     static __inline__ void raw_fflags_into_flags(int r)
3299     {
3300     int p;
3301    
3302     usereg(r);
3303     p=stackpos(r);
3304    
3305     emit_byte(0xd9);
3306     emit_byte(0xee); /* Push 0 */
3307     emit_byte(0xd9);
3308     emit_byte(0xc9+p); /* swap top two around */
3309     if (have_cmov) {
3310     // gb-- fucomi is for P6 cores only, not K6-2 then...
3311     emit_byte(0xdb);
3312     emit_byte(0xe9+p); /* fucomi them */
3313     }
3314     else {
3315     emit_byte(0xdd);
3316     emit_byte(0xe1+p); /* fucom them */
3317     emit_byte(0x9b);
3318     emit_byte(0xdf);
3319     emit_byte(0xe0); /* fstsw ax */
3320     raw_sahf(0); /* sahf */
3321     }
3322     emit_byte(0xdd);
3323     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3324     }