ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.6
Committed: 2002-10-03T16:13:46Z (22 years ago) by gbeauche
Branch: MAIN
Changes since 1.5: +25 -0 lines
Log Message:
JIT add copyright notices just to notify people that's real derivative
work from GPL code (UAE-JIT). Additions and improvements are from B2
developers.

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6     * Adaptation for Basilisk II and improvements, copyright 2000-2002
7     * Gwenole Beauchesne
8     *
9     * Basilisk II (C) 1997-2002 Christian Bauer
10     *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
27     only target, and it's easier this way... */
28    
29 gbeauche 1.5 #include "flags_x86.h"
30    
31 gbeauche 1.1 /*************************************************************************
32     * Some basic information about the the target CPU *
33     *************************************************************************/
34    
35     #define EAX_INDEX 0
36     #define ECX_INDEX 1
37     #define EDX_INDEX 2
38     #define EBX_INDEX 3
39     #define ESP_INDEX 4
40     #define EBP_INDEX 5
41     #define ESI_INDEX 6
42     #define EDI_INDEX 7
43    
44     /* The register in which subroutines return an integer return value */
45     #define REG_RESULT 0
46    
47     /* The registers subroutines take their first and second argument in */
48     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
49     /* Handle the _fastcall parameters of ECX and EDX */
50     #define REG_PAR1 1
51     #define REG_PAR2 2
52     #else
53     #define REG_PAR1 0
54     #define REG_PAR2 2
55     #endif
56    
57     /* Three registers that are not used for any of the above */
58     #define REG_NOPAR1 6
59     #define REG_NOPAR2 5
60     #define REG_NOPAR3 3
61    
62     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
63     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
64     #define REG_PC_TMP 0
65     #else
66     #define REG_PC_TMP 1 /* Another register that is not the above */
67     #endif
68    
69     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
70     -1 if any reg will do */
71     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
72     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
73    
74     uae_s8 always_used[]={4,-1};
75     uae_s8 can_byte[]={0,1,2,3,-1};
76     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
77    
78     /* cpuopti mutate instruction handlers to assume registers are saved
79     by the caller */
80     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
81    
82     /* This *should* be the same as call_saved. But:
83     - We might not really know which registers are saved, and which aren't,
84     so we need to preserve some, but don't want to rely on everyone else
85     also saving those registers
86     - Special registers (such like the stack pointer) should not be "preserved"
87     by pushing, even though they are "saved" across function calls
88     */
89     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
90    
91     /* Whether classes of instructions do or don't clobber the native flags */
92     #define CLOBBER_MOV
93     #define CLOBBER_LEA
94     #define CLOBBER_CMOV
95     #define CLOBBER_POP
96     #define CLOBBER_PUSH
97     #define CLOBBER_SUB clobber_flags()
98     #define CLOBBER_SBB clobber_flags()
99     #define CLOBBER_CMP clobber_flags()
100     #define CLOBBER_ADD clobber_flags()
101     #define CLOBBER_ADC clobber_flags()
102     #define CLOBBER_AND clobber_flags()
103     #define CLOBBER_OR clobber_flags()
104     #define CLOBBER_XOR clobber_flags()
105    
106     #define CLOBBER_ROL clobber_flags()
107     #define CLOBBER_ROR clobber_flags()
108     #define CLOBBER_SHLL clobber_flags()
109     #define CLOBBER_SHRL clobber_flags()
110     #define CLOBBER_SHRA clobber_flags()
111     #define CLOBBER_TEST clobber_flags()
112     #define CLOBBER_CL16
113     #define CLOBBER_CL8
114     #define CLOBBER_SE16
115     #define CLOBBER_SE8
116     #define CLOBBER_ZE16
117     #define CLOBBER_ZE8
118     #define CLOBBER_SW16 clobber_flags()
119     #define CLOBBER_SW32
120     #define CLOBBER_SETCC
121     #define CLOBBER_MUL clobber_flags()
122     #define CLOBBER_BT clobber_flags()
123     #define CLOBBER_BSF clobber_flags()
124    
125 gbeauche 1.2 const bool optimize_accum = true;
126 gbeauche 1.1 const bool optimize_imm8 = true;
127     const bool optimize_shift_once = true;
128    
129     /*************************************************************************
130     * Actual encoding of the instructions on the target CPU *
131     *************************************************************************/
132    
133 gbeauche 1.2 static __inline__ int isaccum(int r)
134     {
135     return (r == EAX_INDEX);
136     }
137    
138 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
139     {
140     return (x>=-128 && x<=127);
141     }
142    
143     static __inline__ int isword(uae_s32 x)
144     {
145     return (x>=-32768 && x<=32767);
146     }
147    
148     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
149     {
150     emit_byte(0x50+r);
151     }
152     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
153    
154     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
155     {
156     emit_byte(0x58+r);
157     }
158     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
159    
160     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
161     {
162     emit_byte(0x0f);
163     emit_byte(0xba);
164     emit_byte(0xe0+r);
165     emit_byte(i);
166     }
167     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
168    
169     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
170     {
171     emit_byte(0x0f);
172     emit_byte(0xa3);
173     emit_byte(0xc0+8*b+r);
174     }
175     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
176    
177     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
178     {
179     emit_byte(0x0f);
180     emit_byte(0xba);
181     emit_byte(0xf8+r);
182     emit_byte(i);
183     }
184     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
185    
186     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
187     {
188     emit_byte(0x0f);
189     emit_byte(0xbb);
190     emit_byte(0xc0+8*b+r);
191     }
192     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
193    
194    
195     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
196     {
197     emit_byte(0x0f);
198     emit_byte(0xba);
199     emit_byte(0xf0+r);
200     emit_byte(i);
201     }
202     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
203    
204     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
205     {
206     emit_byte(0x0f);
207     emit_byte(0xb3);
208     emit_byte(0xc0+8*b+r);
209     }
210     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
211    
212     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
213     {
214     emit_byte(0x0f);
215     emit_byte(0xba);
216     emit_byte(0xe8+r);
217     emit_byte(i);
218     }
219     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
220    
221     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
222     {
223     emit_byte(0x0f);
224     emit_byte(0xab);
225     emit_byte(0xc0+8*b+r);
226     }
227     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
228    
229     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
230     {
231     emit_byte(0x66);
232     if (isbyte(i)) {
233     emit_byte(0x83);
234     emit_byte(0xe8+d);
235     emit_byte(i);
236     }
237     else {
238 gbeauche 1.2 if (optimize_accum && isaccum(d))
239     emit_byte(0x2d);
240     else {
241 gbeauche 1.1 emit_byte(0x81);
242     emit_byte(0xe8+d);
243 gbeauche 1.2 }
244 gbeauche 1.1 emit_word(i);
245     }
246     }
247     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
248    
249    
250     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
251     {
252     emit_byte(0x8b);
253     emit_byte(0x05+8*d);
254     emit_long(s);
255     }
256     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
257    
258     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
259     {
260     emit_byte(0xc7);
261     emit_byte(0x05);
262     emit_long(d);
263     emit_long(s);
264     }
265     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
266    
267     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
268     {
269     emit_byte(0x66);
270     emit_byte(0xc7);
271     emit_byte(0x05);
272     emit_long(d);
273     emit_word(s);
274     }
275     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
276    
277     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
278     {
279     emit_byte(0xc6);
280     emit_byte(0x05);
281     emit_long(d);
282     emit_byte(s);
283     }
284     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
285    
286     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
287     {
288     if (optimize_shift_once && (i == 1)) {
289     emit_byte(0xd0);
290     emit_byte(0x05);
291     emit_long(d);
292     }
293     else {
294     emit_byte(0xc0);
295     emit_byte(0x05);
296     emit_long(d);
297     emit_byte(i);
298     }
299     }
300     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
301    
302     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
303     {
304     if (optimize_shift_once && (i == 1)) {
305     emit_byte(0xd0);
306     emit_byte(0xc0+r);
307     }
308     else {
309     emit_byte(0xc0);
310     emit_byte(0xc0+r);
311     emit_byte(i);
312     }
313     }
314     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
315    
316     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
317     {
318     emit_byte(0x66);
319     emit_byte(0xc1);
320     emit_byte(0xc0+r);
321     emit_byte(i);
322     }
323     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
324    
325     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
326     {
327     if (optimize_shift_once && (i == 1)) {
328     emit_byte(0xd1);
329     emit_byte(0xc0+r);
330     }
331     else {
332     emit_byte(0xc1);
333     emit_byte(0xc0+r);
334     emit_byte(i);
335     }
336     }
337     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
338    
339     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
340     {
341     emit_byte(0xd3);
342     emit_byte(0xc0+d);
343     }
344     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
345    
346     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
347     {
348     emit_byte(0x66);
349     emit_byte(0xd3);
350     emit_byte(0xc0+d);
351     }
352     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
353    
354     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
355     {
356     emit_byte(0xd2);
357     emit_byte(0xc0+d);
358     }
359     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
360    
361     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
362     {
363     emit_byte(0xd3);
364     emit_byte(0xe0+d);
365     }
366     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
367    
368     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
369     {
370     emit_byte(0x66);
371     emit_byte(0xd3);
372     emit_byte(0xe0+d);
373     }
374     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
375    
376     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
377     {
378     emit_byte(0xd2);
379     emit_byte(0xe0+d);
380     }
381     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
382    
383     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
384     {
385     if (optimize_shift_once && (i == 1)) {
386     emit_byte(0xd0);
387     emit_byte(0xc8+r);
388     }
389     else {
390     emit_byte(0xc0);
391     emit_byte(0xc8+r);
392     emit_byte(i);
393     }
394     }
395     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
396    
397     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
398     {
399     emit_byte(0x66);
400     emit_byte(0xc1);
401     emit_byte(0xc8+r);
402     emit_byte(i);
403     }
404     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
405    
406     // gb-- used for making an fpcr value in compemu_fpp.cpp
407     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
408     {
409     emit_byte(0x0b);
410     emit_byte(0x05+8*d);
411     emit_long(s);
412     }
413     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
414    
415     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
416     {
417     if (optimize_shift_once && (i == 1)) {
418     emit_byte(0xd1);
419     emit_byte(0xc8+r);
420     }
421     else {
422     emit_byte(0xc1);
423     emit_byte(0xc8+r);
424     emit_byte(i);
425     }
426     }
427     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
428    
429     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
430     {
431     emit_byte(0xd3);
432     emit_byte(0xc8+d);
433     }
434     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
435    
436     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
437     {
438     emit_byte(0x66);
439     emit_byte(0xd3);
440     emit_byte(0xc8+d);
441     }
442     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
443    
444     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
445     {
446     emit_byte(0xd2);
447     emit_byte(0xc8+d);
448     }
449     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
450    
451     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
452     {
453     emit_byte(0xd3);
454     emit_byte(0xe8+d);
455     }
456     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
457    
458     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
459     {
460     emit_byte(0x66);
461     emit_byte(0xd3);
462     emit_byte(0xe8+d);
463     }
464     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
465    
466     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
467     {
468     emit_byte(0xd2);
469     emit_byte(0xe8+d);
470     }
471     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
472    
473     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
474     {
475     emit_byte(0xd3);
476     emit_byte(0xf8+d);
477     }
478     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
479    
480     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
481     {
482     emit_byte(0x66);
483     emit_byte(0xd3);
484     emit_byte(0xf8+d);
485     }
486     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
487    
488     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
489     {
490     emit_byte(0xd2);
491     emit_byte(0xf8+d);
492     }
493     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
494    
495     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
496     {
497     if (optimize_shift_once && (i == 1)) {
498     emit_byte(0xd1);
499     emit_byte(0xe0+r);
500     }
501     else {
502     emit_byte(0xc1);
503     emit_byte(0xe0+r);
504     emit_byte(i);
505     }
506     }
507     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
508    
509     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
510     {
511     emit_byte(0x66);
512     emit_byte(0xc1);
513     emit_byte(0xe0+r);
514     emit_byte(i);
515     }
516     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
517    
518     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
519     {
520     if (optimize_shift_once && (i == 1)) {
521     emit_byte(0xd0);
522     emit_byte(0xe0+r);
523     }
524     else {
525     emit_byte(0xc0);
526     emit_byte(0xe0+r);
527     emit_byte(i);
528     }
529     }
530     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
531    
532     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
533     {
534     if (optimize_shift_once && (i == 1)) {
535     emit_byte(0xd1);
536     emit_byte(0xe8+r);
537     }
538     else {
539     emit_byte(0xc1);
540     emit_byte(0xe8+r);
541     emit_byte(i);
542     }
543     }
544     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
545    
546     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
547     {
548     emit_byte(0x66);
549     emit_byte(0xc1);
550     emit_byte(0xe8+r);
551     emit_byte(i);
552     }
553     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
554    
555     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
556     {
557     if (optimize_shift_once && (i == 1)) {
558     emit_byte(0xd0);
559     emit_byte(0xe8+r);
560     }
561     else {
562     emit_byte(0xc0);
563     emit_byte(0xe8+r);
564     emit_byte(i);
565     }
566     }
567     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
568    
569     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
570     {
571     if (optimize_shift_once && (i == 1)) {
572     emit_byte(0xd1);
573     emit_byte(0xf8+r);
574     }
575     else {
576     emit_byte(0xc1);
577     emit_byte(0xf8+r);
578     emit_byte(i);
579     }
580     }
581     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
582    
583     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
584     {
585     emit_byte(0x66);
586     emit_byte(0xc1);
587     emit_byte(0xf8+r);
588     emit_byte(i);
589     }
590     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
591    
592     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
593     {
594     if (optimize_shift_once && (i == 1)) {
595     emit_byte(0xd0);
596     emit_byte(0xf8+r);
597     }
598     else {
599     emit_byte(0xc0);
600     emit_byte(0xf8+r);
601     emit_byte(i);
602     }
603     }
604     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
605    
606     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
607     {
608     emit_byte(0x9e);
609     }
610     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
611    
612     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
613     {
614     emit_byte(0x0f);
615     emit_byte(0xa2);
616     }
617     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
618    
619     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
620     {
621     emit_byte(0x9f);
622     }
623     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
624    
625     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
626     {
627     emit_byte(0x0f);
628     emit_byte(0x90+cc);
629     emit_byte(0xc0+d);
630     }
631     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
632    
633     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
634     {
635     emit_byte(0x0f);
636     emit_byte(0x90+cc);
637     emit_byte(0x05);
638     emit_long(d);
639     }
640     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
641    
642     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
643     {
644     if (have_cmov) {
645     emit_byte(0x0f);
646     emit_byte(0x40+cc);
647     emit_byte(0xc0+8*d+s);
648     }
649     else { /* replacement using branch and mov */
650     int uncc=(cc^1);
651     emit_byte(0x70+uncc);
652     emit_byte(2); /* skip next 2 bytes if not cc=true */
653     emit_byte(0x89);
654     emit_byte(0xc0+8*s+d);
655     }
656     }
657     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
658    
659     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
660     {
661     emit_byte(0x0f);
662     emit_byte(0xbc);
663     emit_byte(0xc0+8*d+s);
664     }
665     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
666    
667     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
668     {
669     emit_byte(0x0f);
670     emit_byte(0xbf);
671     emit_byte(0xc0+8*d+s);
672     }
673     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
674    
675     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
676     {
677     emit_byte(0x0f);
678     emit_byte(0xbe);
679     emit_byte(0xc0+8*d+s);
680     }
681     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
682    
683     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
684     {
685     emit_byte(0x0f);
686     emit_byte(0xb7);
687     emit_byte(0xc0+8*d+s);
688     }
689     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
690    
691     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
692     {
693     emit_byte(0x0f);
694     emit_byte(0xb6);
695     emit_byte(0xc0+8*d+s);
696     }
697     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
698    
699     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
700     {
701     emit_byte(0x0f);
702     emit_byte(0xaf);
703     emit_byte(0xc0+8*d+s);
704     }
705     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
706    
707     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
708     {
709     if (d!=MUL_NREG1 || s!=MUL_NREG2)
710     abort();
711     emit_byte(0xf7);
712     emit_byte(0xea);
713     }
714     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
715    
716     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
717     {
718     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
719     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
720     abort();
721     }
722     emit_byte(0xf7);
723     emit_byte(0xe2);
724     }
725     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
726    
727     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
728     {
729     abort(); /* %^$&%^$%#^ x86! */
730     emit_byte(0x0f);
731     emit_byte(0xaf);
732     emit_byte(0xc0+8*d+s);
733     }
734     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
735    
736     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
737     {
738     emit_byte(0x88);
739     emit_byte(0xc0+8*s+d);
740     }
741     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
742    
743     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
744     {
745     emit_byte(0x66);
746     emit_byte(0x89);
747     emit_byte(0xc0+8*s+d);
748     }
749     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
750    
751     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
752     {
753     int isebp=(baser==5)?0x40:0;
754     int fi;
755    
756     switch(factor) {
757     case 1: fi=0; break;
758     case 2: fi=1; break;
759     case 4: fi=2; break;
760     case 8: fi=3; break;
761     default: abort();
762     }
763    
764    
765     emit_byte(0x8b);
766     emit_byte(0x04+8*d+isebp);
767     emit_byte(baser+8*index+0x40*fi);
768     if (isebp)
769     emit_byte(0x00);
770     }
771     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
772    
773     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
774     {
775     int fi;
776     int isebp;
777    
778     switch(factor) {
779     case 1: fi=0; break;
780     case 2: fi=1; break;
781     case 4: fi=2; break;
782     case 8: fi=3; break;
783     default: abort();
784     }
785     isebp=(baser==5)?0x40:0;
786    
787     emit_byte(0x66);
788     emit_byte(0x8b);
789     emit_byte(0x04+8*d+isebp);
790     emit_byte(baser+8*index+0x40*fi);
791     if (isebp)
792     emit_byte(0x00);
793     }
794     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
795    
796     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
797     {
798     int fi;
799     int isebp;
800    
801     switch(factor) {
802     case 1: fi=0; break;
803     case 2: fi=1; break;
804     case 4: fi=2; break;
805     case 8: fi=3; break;
806     default: abort();
807     }
808     isebp=(baser==5)?0x40:0;
809    
810     emit_byte(0x8a);
811     emit_byte(0x04+8*d+isebp);
812     emit_byte(baser+8*index+0x40*fi);
813     if (isebp)
814     emit_byte(0x00);
815     }
816     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
817    
818     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
819     {
820     int fi;
821     int isebp;
822    
823     switch(factor) {
824     case 1: fi=0; break;
825     case 2: fi=1; break;
826     case 4: fi=2; break;
827     case 8: fi=3; break;
828     default: abort();
829     }
830    
831    
832     isebp=(baser==5)?0x40:0;
833    
834     emit_byte(0x89);
835     emit_byte(0x04+8*s+isebp);
836     emit_byte(baser+8*index+0x40*fi);
837     if (isebp)
838     emit_byte(0x00);
839     }
840     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
841    
842     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
843     {
844     int fi;
845     int isebp;
846    
847     switch(factor) {
848     case 1: fi=0; break;
849     case 2: fi=1; break;
850     case 4: fi=2; break;
851     case 8: fi=3; break;
852     default: abort();
853     }
854     isebp=(baser==5)?0x40:0;
855    
856     emit_byte(0x66);
857     emit_byte(0x89);
858     emit_byte(0x04+8*s+isebp);
859     emit_byte(baser+8*index+0x40*fi);
860     if (isebp)
861     emit_byte(0x00);
862     }
863     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
864    
865     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
866     {
867     int fi;
868     int isebp;
869    
870     switch(factor) {
871     case 1: fi=0; break;
872     case 2: fi=1; break;
873     case 4: fi=2; break;
874     case 8: fi=3; break;
875     default: abort();
876     }
877     isebp=(baser==5)?0x40:0;
878    
879     emit_byte(0x88);
880     emit_byte(0x04+8*s+isebp);
881     emit_byte(baser+8*index+0x40*fi);
882     if (isebp)
883     emit_byte(0x00);
884     }
885     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
886    
887     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
888     {
889     int fi;
890    
891     switch(factor) {
892     case 1: fi=0; break;
893     case 2: fi=1; break;
894     case 4: fi=2; break;
895     case 8: fi=3; break;
896     default: abort();
897     }
898    
899     emit_byte(0x89);
900     emit_byte(0x84+8*s);
901     emit_byte(baser+8*index+0x40*fi);
902     emit_long(base);
903     }
904     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
905    
906     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
907     {
908     int fi;
909    
910     switch(factor) {
911     case 1: fi=0; break;
912     case 2: fi=1; break;
913     case 4: fi=2; break;
914     case 8: fi=3; break;
915     default: abort();
916     }
917    
918     emit_byte(0x66);
919     emit_byte(0x89);
920     emit_byte(0x84+8*s);
921     emit_byte(baser+8*index+0x40*fi);
922     emit_long(base);
923     }
924     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
925    
926     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
927     {
928     int fi;
929    
930     switch(factor) {
931     case 1: fi=0; break;
932     case 2: fi=1; break;
933     case 4: fi=2; break;
934     case 8: fi=3; break;
935     default: abort();
936     }
937    
938     emit_byte(0x88);
939     emit_byte(0x84+8*s);
940     emit_byte(baser+8*index+0x40*fi);
941     emit_long(base);
942     }
943     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
944    
945     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
946     {
947     int fi;
948    
949     switch(factor) {
950     case 1: fi=0; break;
951     case 2: fi=1; break;
952     case 4: fi=2; break;
953     case 8: fi=3; break;
954     default: abort();
955     }
956    
957     emit_byte(0x8b);
958     emit_byte(0x84+8*d);
959     emit_byte(baser+8*index+0x40*fi);
960     emit_long(base);
961     }
962     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
963    
964     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
965     {
966     int fi;
967    
968     switch(factor) {
969     case 1: fi=0; break;
970     case 2: fi=1; break;
971     case 4: fi=2; break;
972     case 8: fi=3; break;
973     default: abort();
974     }
975    
976     emit_byte(0x66);
977     emit_byte(0x8b);
978     emit_byte(0x84+8*d);
979     emit_byte(baser+8*index+0x40*fi);
980     emit_long(base);
981     }
982     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
983    
984     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
985     {
986     int fi;
987    
988     switch(factor) {
989     case 1: fi=0; break;
990     case 2: fi=1; break;
991     case 4: fi=2; break;
992     case 8: fi=3; break;
993     default: abort();
994     }
995    
996     emit_byte(0x8a);
997     emit_byte(0x84+8*d);
998     emit_byte(baser+8*index+0x40*fi);
999     emit_long(base);
1000     }
1001     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
1002    
1003     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1004     {
1005     int fi;
1006     switch(factor) {
1007     case 1: fi=0; break;
1008     case 2: fi=1; break;
1009     case 4: fi=2; break;
1010     case 8: fi=3; break;
1011     default:
1012     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1013     abort();
1014     }
1015     emit_byte(0x8b);
1016     emit_byte(0x04+8*d);
1017     emit_byte(0x05+8*index+64*fi);
1018     emit_long(base);
1019     }
1020     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1021    
1022     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1023     {
1024     int fi;
1025     switch(factor) {
1026     case 1: fi=0; break;
1027     case 2: fi=1; break;
1028     case 4: fi=2; break;
1029     case 8: fi=3; break;
1030     default:
1031     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1032     abort();
1033     }
1034     if (have_cmov) {
1035     emit_byte(0x0f);
1036     emit_byte(0x40+cond);
1037     emit_byte(0x04+8*d);
1038     emit_byte(0x05+8*index+64*fi);
1039     emit_long(base);
1040     }
1041     else { /* replacement using branch and mov */
1042     int uncc=(cond^1);
1043     emit_byte(0x70+uncc);
1044     emit_byte(7); /* skip next 7 bytes if not cc=true */
1045     emit_byte(0x8b);
1046     emit_byte(0x04+8*d);
1047     emit_byte(0x05+8*index+64*fi);
1048     emit_long(base);
1049     }
1050     }
1051     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1052    
1053     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1054     {
1055     if (have_cmov) {
1056     emit_byte(0x0f);
1057     emit_byte(0x40+cond);
1058     emit_byte(0x05+8*d);
1059     emit_long(mem);
1060     }
1061     else { /* replacement using branch and mov */
1062     int uncc=(cond^1);
1063     emit_byte(0x70+uncc);
1064     emit_byte(6); /* skip next 6 bytes if not cc=true */
1065     emit_byte(0x8b);
1066     emit_byte(0x05+8*d);
1067     emit_long(mem);
1068     }
1069     }
1070     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1071    
1072     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1073     {
1074     emit_byte(0x8b);
1075     emit_byte(0x40+8*d+s);
1076     emit_byte(offset);
1077     }
1078     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1079    
1080     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1081     {
1082     emit_byte(0x66);
1083     emit_byte(0x8b);
1084     emit_byte(0x40+8*d+s);
1085     emit_byte(offset);
1086     }
1087     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1088    
1089     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1090     {
1091     emit_byte(0x8a);
1092     emit_byte(0x40+8*d+s);
1093     emit_byte(offset);
1094     }
1095     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1096    
1097     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1098     {
1099     emit_byte(0x8b);
1100     emit_byte(0x80+8*d+s);
1101     emit_long(offset);
1102     }
1103     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1104    
1105     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1106     {
1107     emit_byte(0x66);
1108     emit_byte(0x8b);
1109     emit_byte(0x80+8*d+s);
1110     emit_long(offset);
1111     }
1112     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1113    
1114     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1115     {
1116     emit_byte(0x8a);
1117     emit_byte(0x80+8*d+s);
1118     emit_long(offset);
1119     }
1120     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1121    
1122     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1123     {
1124     emit_byte(0xc7);
1125     emit_byte(0x40+d);
1126     emit_byte(offset);
1127     emit_long(i);
1128     }
1129     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1130    
1131     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1132     {
1133     emit_byte(0x66);
1134     emit_byte(0xc7);
1135     emit_byte(0x40+d);
1136     emit_byte(offset);
1137     emit_word(i);
1138     }
1139     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1140    
1141     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1142     {
1143     emit_byte(0xc6);
1144     emit_byte(0x40+d);
1145     emit_byte(offset);
1146     emit_byte(i);
1147     }
1148     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1149    
1150     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1151     {
1152     emit_byte(0x89);
1153     emit_byte(0x40+8*s+d);
1154     emit_byte(offset);
1155     }
1156     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1157    
1158     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1159     {
1160     emit_byte(0x66);
1161     emit_byte(0x89);
1162     emit_byte(0x40+8*s+d);
1163     emit_byte(offset);
1164     }
1165     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1166    
1167     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1168     {
1169     emit_byte(0x88);
1170     emit_byte(0x40+8*s+d);
1171     emit_byte(offset);
1172     }
1173     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1174    
1175     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1176     {
1177     if (optimize_imm8 && isbyte(offset)) {
1178     emit_byte(0x8d);
1179     emit_byte(0x40+8*d+s);
1180     emit_byte(offset);
1181     }
1182     else {
1183     emit_byte(0x8d);
1184     emit_byte(0x80+8*d+s);
1185     emit_long(offset);
1186     }
1187     }
1188     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1189    
1190     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1191     {
1192     int fi;
1193    
1194     switch(factor) {
1195     case 1: fi=0; break;
1196     case 2: fi=1; break;
1197     case 4: fi=2; break;
1198     case 8: fi=3; break;
1199     default: abort();
1200     }
1201    
1202     if (optimize_imm8 && isbyte(offset)) {
1203     emit_byte(0x8d);
1204     emit_byte(0x44+8*d);
1205     emit_byte(0x40*fi+8*index+s);
1206     emit_byte(offset);
1207     }
1208     else {
1209     emit_byte(0x8d);
1210     emit_byte(0x84+8*d);
1211     emit_byte(0x40*fi+8*index+s);
1212     emit_long(offset);
1213     }
1214     }
1215     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1216    
1217     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1218     {
1219     int isebp=(s==5)?0x40:0;
1220     int fi;
1221    
1222     switch(factor) {
1223     case 1: fi=0; break;
1224     case 2: fi=1; break;
1225     case 4: fi=2; break;
1226     case 8: fi=3; break;
1227     default: abort();
1228     }
1229    
1230     emit_byte(0x8d);
1231     emit_byte(0x04+8*d+isebp);
1232     emit_byte(0x40*fi+8*index+s);
1233     if (isebp)
1234     emit_byte(0);
1235     }
1236     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1237    
1238     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1239     {
1240     if (optimize_imm8 && isbyte(offset)) {
1241     emit_byte(0x89);
1242     emit_byte(0x40+8*s+d);
1243     emit_byte(offset);
1244     }
1245     else {
1246     emit_byte(0x89);
1247     emit_byte(0x80+8*s+d);
1248     emit_long(offset);
1249     }
1250     }
1251     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1252    
1253     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1254     {
1255     emit_byte(0x66);
1256     emit_byte(0x89);
1257     emit_byte(0x80+8*s+d);
1258     emit_long(offset);
1259     }
1260     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1261    
1262     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1263     {
1264     if (optimize_imm8 && isbyte(offset)) {
1265     emit_byte(0x88);
1266     emit_byte(0x40+8*s+d);
1267     emit_byte(offset);
1268     }
1269     else {
1270     emit_byte(0x88);
1271     emit_byte(0x80+8*s+d);
1272     emit_long(offset);
1273     }
1274     }
1275     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1276    
1277     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1278     {
1279     emit_byte(0x0f);
1280     emit_byte(0xc8+r);
1281     }
1282     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1283    
1284     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1285     {
1286     emit_byte(0x66);
1287     emit_byte(0xc1);
1288     emit_byte(0xc0+r);
1289     emit_byte(0x08);
1290     }
1291     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1292    
1293     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1294     {
1295     emit_byte(0x89);
1296     emit_byte(0xc0+8*s+d);
1297     }
1298     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1299    
1300     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1301     {
1302     emit_byte(0x89);
1303     emit_byte(0x05+8*s);
1304     emit_long(d);
1305     }
1306     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1307    
1308     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1309     {
1310     emit_byte(0x66);
1311     emit_byte(0x89);
1312     emit_byte(0x05+8*s);
1313     emit_long(d);
1314     }
1315     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1316    
1317     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1318     {
1319     emit_byte(0x66);
1320     emit_byte(0x8b);
1321     emit_byte(0x05+8*d);
1322     emit_long(s);
1323     }
1324     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1325    
1326     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1327     {
1328     emit_byte(0x88);
1329     emit_byte(0x05+8*s);
1330     emit_long(d);
1331     }
1332     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1333    
1334     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1335     {
1336     emit_byte(0x8a);
1337     emit_byte(0x05+8*d);
1338     emit_long(s);
1339     }
1340     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1341    
1342     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1343     {
1344     emit_byte(0xb8+d);
1345     emit_long(s);
1346     }
1347     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1348    
1349     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1350     {
1351     emit_byte(0x66);
1352     emit_byte(0xb8+d);
1353     emit_word(s);
1354     }
1355     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1356    
1357     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1358     {
1359     emit_byte(0xb0+d);
1360     emit_byte(s);
1361     }
1362     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1363    
1364     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1365     {
1366     emit_byte(0x81);
1367     emit_byte(0x15);
1368     emit_long(d);
1369     emit_long(s);
1370     }
1371     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1372    
1373     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1374     {
1375     if (optimize_imm8 && isbyte(s)) {
1376     emit_byte(0x83);
1377     emit_byte(0x05);
1378     emit_long(d);
1379     emit_byte(s);
1380     }
1381     else {
1382     emit_byte(0x81);
1383     emit_byte(0x05);
1384     emit_long(d);
1385     emit_long(s);
1386     }
1387     }
1388     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1389    
1390     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1391     {
1392     emit_byte(0x66);
1393     emit_byte(0x81);
1394     emit_byte(0x05);
1395     emit_long(d);
1396     emit_word(s);
1397     }
1398     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1399    
1400     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1401     {
1402     emit_byte(0x80);
1403     emit_byte(0x05);
1404     emit_long(d);
1405     emit_byte(s);
1406     }
1407     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1408    
1409     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1410     {
1411 gbeauche 1.2 if (optimize_accum && isaccum(d))
1412     emit_byte(0xa9);
1413     else {
1414 gbeauche 1.1 emit_byte(0xf7);
1415     emit_byte(0xc0+d);
1416 gbeauche 1.2 }
1417 gbeauche 1.1 emit_long(i);
1418     }
1419     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1420    
1421     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1422     {
1423     emit_byte(0x85);
1424     emit_byte(0xc0+8*s+d);
1425     }
1426     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1427    
1428     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1429     {
1430     emit_byte(0x66);
1431     emit_byte(0x85);
1432     emit_byte(0xc0+8*s+d);
1433     }
1434     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1435    
1436     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1437     {
1438     emit_byte(0x84);
1439     emit_byte(0xc0+8*s+d);
1440     }
1441     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1442    
1443     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1444     {
1445     if (optimize_imm8 && isbyte(i)) {
1446 gbeauche 1.2 emit_byte(0x83);
1447     emit_byte(0xe0+d);
1448     emit_byte(i);
1449 gbeauche 1.1 }
1450     else {
1451 gbeauche 1.2 if (optimize_accum && isaccum(d))
1452     emit_byte(0x25);
1453     else {
1454     emit_byte(0x81);
1455     emit_byte(0xe0+d);
1456     }
1457     emit_long(i);
1458 gbeauche 1.1 }
1459     }
1460     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1461    
1462     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1463     {
1464 gbeauche 1.2 emit_byte(0x66);
1465     if (optimize_imm8 && isbyte(i)) {
1466     emit_byte(0x83);
1467     emit_byte(0xe0+d);
1468     emit_byte(i);
1469     }
1470     else {
1471     if (optimize_accum && isaccum(d))
1472     emit_byte(0x25);
1473     else {
1474     emit_byte(0x81);
1475     emit_byte(0xe0+d);
1476     }
1477     emit_word(i);
1478     }
1479 gbeauche 1.1 }
1480     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1481    
1482     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1483     {
1484     emit_byte(0x21);
1485     emit_byte(0xc0+8*s+d);
1486     }
1487     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1488    
1489     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1490     {
1491     emit_byte(0x66);
1492     emit_byte(0x21);
1493     emit_byte(0xc0+8*s+d);
1494     }
1495     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1496    
1497     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1498     {
1499     emit_byte(0x20);
1500     emit_byte(0xc0+8*s+d);
1501     }
1502     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1503    
1504     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1505     {
1506     if (optimize_imm8 && isbyte(i)) {
1507     emit_byte(0x83);
1508     emit_byte(0xc8+d);
1509     emit_byte(i);
1510     }
1511     else {
1512 gbeauche 1.2 if (optimize_accum && isaccum(d))
1513     emit_byte(0x0d);
1514     else {
1515 gbeauche 1.1 emit_byte(0x81);
1516     emit_byte(0xc8+d);
1517 gbeauche 1.2 }
1518 gbeauche 1.1 emit_long(i);
1519     }
1520     }
1521     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1522    
1523     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1524     {
1525     emit_byte(0x09);
1526     emit_byte(0xc0+8*s+d);
1527     }
1528     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1529    
1530     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1531     {
1532     emit_byte(0x66);
1533     emit_byte(0x09);
1534     emit_byte(0xc0+8*s+d);
1535     }
1536     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1537    
1538     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1539     {
1540     emit_byte(0x08);
1541     emit_byte(0xc0+8*s+d);
1542     }
1543     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1544    
1545     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1546     {
1547     emit_byte(0x11);
1548     emit_byte(0xc0+8*s+d);
1549     }
1550     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1551    
1552     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1553     {
1554     emit_byte(0x66);
1555     emit_byte(0x11);
1556     emit_byte(0xc0+8*s+d);
1557     }
1558     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1559    
1560     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1561     {
1562     emit_byte(0x10);
1563     emit_byte(0xc0+8*s+d);
1564     }
1565     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1566    
1567     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1568     {
1569     emit_byte(0x01);
1570     emit_byte(0xc0+8*s+d);
1571     }
1572     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1573    
1574     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1575     {
1576     emit_byte(0x66);
1577     emit_byte(0x01);
1578     emit_byte(0xc0+8*s+d);
1579     }
1580     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1581    
1582     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1583     {
1584     emit_byte(0x00);
1585     emit_byte(0xc0+8*s+d);
1586     }
1587     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1588    
1589     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1590     {
1591     if (isbyte(i)) {
1592     emit_byte(0x83);
1593     emit_byte(0xe8+d);
1594     emit_byte(i);
1595     }
1596     else {
1597 gbeauche 1.2 if (optimize_accum && isaccum(d))
1598     emit_byte(0x2d);
1599     else {
1600 gbeauche 1.1 emit_byte(0x81);
1601     emit_byte(0xe8+d);
1602 gbeauche 1.2 }
1603 gbeauche 1.1 emit_long(i);
1604     }
1605     }
1606     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1607    
1608     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1609     {
1610 gbeauche 1.2 if (optimize_accum && isaccum(d))
1611     emit_byte(0x2c);
1612     else {
1613 gbeauche 1.1 emit_byte(0x80);
1614     emit_byte(0xe8+d);
1615 gbeauche 1.2 }
1616 gbeauche 1.1 emit_byte(i);
1617     }
1618     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1619    
1620     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1621     {
1622     if (isbyte(i)) {
1623     emit_byte(0x83);
1624     emit_byte(0xc0+d);
1625     emit_byte(i);
1626     }
1627     else {
1628 gbeauche 1.2 if (optimize_accum && isaccum(d))
1629     emit_byte(0x05);
1630     else {
1631 gbeauche 1.1 emit_byte(0x81);
1632     emit_byte(0xc0+d);
1633 gbeauche 1.2 }
1634 gbeauche 1.1 emit_long(i);
1635     }
1636     }
1637     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1638    
1639     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1640     {
1641 gbeauche 1.2 emit_byte(0x66);
1642 gbeauche 1.1 if (isbyte(i)) {
1643     emit_byte(0x83);
1644     emit_byte(0xc0+d);
1645     emit_byte(i);
1646     }
1647     else {
1648 gbeauche 1.2 if (optimize_accum && isaccum(d))
1649     emit_byte(0x05);
1650     else {
1651 gbeauche 1.1 emit_byte(0x81);
1652     emit_byte(0xc0+d);
1653 gbeauche 1.2 }
1654 gbeauche 1.1 emit_word(i);
1655     }
1656     }
1657     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1658    
1659     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1660     {
1661 gbeauche 1.2 if (optimize_accum && isaccum(d))
1662     emit_byte(0x04);
1663     else {
1664     emit_byte(0x80);
1665     emit_byte(0xc0+d);
1666     }
1667 gbeauche 1.1 emit_byte(i);
1668     }
1669     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1670    
1671     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1672     {
1673     emit_byte(0x19);
1674     emit_byte(0xc0+8*s+d);
1675     }
1676     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1677    
1678     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1679     {
1680     emit_byte(0x66);
1681     emit_byte(0x19);
1682     emit_byte(0xc0+8*s+d);
1683     }
1684     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1685    
1686     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1687     {
1688     emit_byte(0x18);
1689     emit_byte(0xc0+8*s+d);
1690     }
1691     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1692    
1693     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1694     {
1695     emit_byte(0x29);
1696     emit_byte(0xc0+8*s+d);
1697     }
1698     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1699    
1700     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1701     {
1702     emit_byte(0x66);
1703     emit_byte(0x29);
1704     emit_byte(0xc0+8*s+d);
1705     }
1706     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1707    
1708     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1709     {
1710     emit_byte(0x28);
1711     emit_byte(0xc0+8*s+d);
1712     }
1713     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1714    
1715     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1716     {
1717     emit_byte(0x39);
1718     emit_byte(0xc0+8*s+d);
1719     }
1720     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1721    
1722     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1723     {
1724     if (optimize_imm8 && isbyte(i)) {
1725     emit_byte(0x83);
1726     emit_byte(0xf8+r);
1727     emit_byte(i);
1728     }
1729     else {
1730 gbeauche 1.2 if (optimize_accum && isaccum(r))
1731     emit_byte(0x3d);
1732     else {
1733 gbeauche 1.1 emit_byte(0x81);
1734     emit_byte(0xf8+r);
1735 gbeauche 1.2 }
1736 gbeauche 1.1 emit_long(i);
1737     }
1738     }
1739     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1740    
1741     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1742     {
1743     emit_byte(0x66);
1744     emit_byte(0x39);
1745     emit_byte(0xc0+8*s+d);
1746     }
1747     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1748    
1749 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1750     {
1751     emit_byte(0x80);
1752     emit_byte(0x3d);
1753     emit_long(d);
1754     emit_byte(s);
1755     }
1756     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1757    
1758 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1759     {
1760 gbeauche 1.2 if (optimize_accum && isaccum(d))
1761     emit_byte(0x3c);
1762     else {
1763 gbeauche 1.1 emit_byte(0x80);
1764     emit_byte(0xf8+d);
1765 gbeauche 1.2 }
1766 gbeauche 1.1 emit_byte(i);
1767     }
1768     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1769    
1770     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1771     {
1772     emit_byte(0x38);
1773     emit_byte(0xc0+8*s+d);
1774     }
1775     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1776    
1777     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1778     {
1779     int fi;
1780    
1781     switch(factor) {
1782     case 1: fi=0; break;
1783     case 2: fi=1; break;
1784     case 4: fi=2; break;
1785     case 8: fi=3; break;
1786     default: abort();
1787     }
1788     emit_byte(0x39);
1789     emit_byte(0x04+8*d);
1790     emit_byte(5+8*index+0x40*fi);
1791     emit_long(offset);
1792     }
1793     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1794    
1795     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1796     {
1797     emit_byte(0x31);
1798     emit_byte(0xc0+8*s+d);
1799     }
1800     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1801    
1802     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1803     {
1804     emit_byte(0x66);
1805     emit_byte(0x31);
1806     emit_byte(0xc0+8*s+d);
1807     }
1808     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1809    
1810     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1811     {
1812     emit_byte(0x30);
1813     emit_byte(0xc0+8*s+d);
1814     }
1815     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1816    
1817     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1818     {
1819     if (optimize_imm8 && isbyte(s)) {
1820     emit_byte(0x83);
1821     emit_byte(0x2d);
1822     emit_long(d);
1823     emit_byte(s);
1824     }
1825     else {
1826     emit_byte(0x81);
1827     emit_byte(0x2d);
1828     emit_long(d);
1829     emit_long(s);
1830     }
1831     }
1832     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1833    
1834     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1835     {
1836     if (optimize_imm8 && isbyte(s)) {
1837     emit_byte(0x83);
1838     emit_byte(0x3d);
1839     emit_long(d);
1840     emit_byte(s);
1841     }
1842     else {
1843     emit_byte(0x81);
1844     emit_byte(0x3d);
1845     emit_long(d);
1846     emit_long(s);
1847     }
1848     }
1849     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1850    
1851     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1852     {
1853     emit_byte(0x87);
1854     emit_byte(0xc0+8*r1+r2);
1855     }
1856     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1857    
1858     /*************************************************************************
1859     * FIXME: string-related instructions *
1860     *************************************************************************/
1861    
1862     LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1863     {
1864     emit_byte(0xfc);
1865     }
1866     LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1867    
1868     LOWFUNC(WRITE,NONE,0,raw_std,(void))
1869     {
1870     emit_byte(0xfd);
1871     }
1872     LENDFUNC(WRITE,NONE,0,raw_std,(void))
1873    
1874     LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1875     {
1876     emit_byte(0xa4);
1877     }
1878     LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1879    
1880     LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1881     {
1882     emit_byte(0xa5);
1883     }
1884     LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1885    
1886     LOWFUNC(NONE,RMW,0,raw_rep,(void))
1887     {
1888     emit_byte(0xf3);
1889     }
1890     LENDFUNC(NONE,RMW,0,raw_rep,(void))
1891    
1892     LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1893     {
1894     raw_rep();
1895     raw_movs_b();
1896     }
1897     LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1898    
1899     LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1900     {
1901     raw_rep();
1902     raw_movs_l();
1903     }
1904     LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1905    
1906     /*************************************************************************
1907     * FIXME: mem access modes probably wrong *
1908     *************************************************************************/
1909    
1910     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1911     {
1912     emit_byte(0x9c);
1913     }
1914     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1915    
1916     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1917     {
1918     emit_byte(0x9d);
1919     }
1920     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1921    
1922     /*************************************************************************
1923     * Unoptimizable stuff --- jump *
1924     *************************************************************************/
1925    
1926     static __inline__ void raw_call_r(R4 r)
1927     {
1928     emit_byte(0xff);
1929     emit_byte(0xd0+r);
1930 gbeauche 1.5 }
1931    
1932     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1933     {
1934     int mu;
1935     switch(m) {
1936     case 1: mu=0; break;
1937     case 2: mu=1; break;
1938     case 4: mu=2; break;
1939     case 8: mu=3; break;
1940     default: abort();
1941     }
1942     emit_byte(0xff);
1943     emit_byte(0x14);
1944     emit_byte(0x05+8*r+0x40*mu);
1945     emit_long(base);
1946 gbeauche 1.1 }
1947    
1948     static __inline__ void raw_jmp_r(R4 r)
1949     {
1950     emit_byte(0xff);
1951     emit_byte(0xe0+r);
1952     }
1953    
1954     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1955     {
1956     int mu;
1957     switch(m) {
1958     case 1: mu=0; break;
1959     case 2: mu=1; break;
1960     case 4: mu=2; break;
1961     case 8: mu=3; break;
1962     default: abort();
1963     }
1964     emit_byte(0xff);
1965     emit_byte(0x24);
1966     emit_byte(0x05+8*r+0x40*mu);
1967     emit_long(base);
1968     }
1969    
1970     static __inline__ void raw_jmp_m(uae_u32 base)
1971     {
1972     emit_byte(0xff);
1973     emit_byte(0x25);
1974     emit_long(base);
1975     }
1976    
1977    
1978     static __inline__ void raw_call(uae_u32 t)
1979     {
1980     emit_byte(0xe8);
1981     emit_long(t-(uae_u32)target-4);
1982     }
1983    
1984     static __inline__ void raw_jmp(uae_u32 t)
1985     {
1986     emit_byte(0xe9);
1987     emit_long(t-(uae_u32)target-4);
1988     }
1989    
1990     static __inline__ void raw_jl(uae_u32 t)
1991     {
1992     emit_byte(0x0f);
1993     emit_byte(0x8c);
1994     emit_long(t-(uae_u32)target-4);
1995     }
1996    
1997     static __inline__ void raw_jz(uae_u32 t)
1998     {
1999     emit_byte(0x0f);
2000     emit_byte(0x84);
2001     emit_long(t-(uae_u32)target-4);
2002     }
2003    
2004     static __inline__ void raw_jnz(uae_u32 t)
2005     {
2006     emit_byte(0x0f);
2007     emit_byte(0x85);
2008     emit_long(t-(uae_u32)target-4);
2009     }
2010    
2011     static __inline__ void raw_jnz_l_oponly(void)
2012     {
2013     emit_byte(0x0f);
2014     emit_byte(0x85);
2015     }
2016    
2017     static __inline__ void raw_jcc_l_oponly(int cc)
2018     {
2019     emit_byte(0x0f);
2020     emit_byte(0x80+cc);
2021     }
2022    
2023     static __inline__ void raw_jnz_b_oponly(void)
2024     {
2025     emit_byte(0x75);
2026     }
2027    
2028     static __inline__ void raw_jz_b_oponly(void)
2029     {
2030     emit_byte(0x74);
2031     }
2032    
2033     static __inline__ void raw_jcc_b_oponly(int cc)
2034     {
2035     emit_byte(0x70+cc);
2036     }
2037    
2038     static __inline__ void raw_jmp_l_oponly(void)
2039     {
2040     emit_byte(0xe9);
2041     }
2042    
2043     static __inline__ void raw_jmp_b_oponly(void)
2044     {
2045     emit_byte(0xeb);
2046     }
2047    
2048     static __inline__ void raw_ret(void)
2049     {
2050     emit_byte(0xc3);
2051     }
2052    
2053     static __inline__ void raw_nop(void)
2054     {
2055     emit_byte(0x90);
2056     }
2057    
2058    
2059     /*************************************************************************
2060     * Flag handling, to and fro UAE flag register *
2061     *************************************************************************/
2062    
2063     #ifdef SAHF_SETO_PROFITABLE
2064    
2065     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
2066    
2067     static __inline__ void raw_flags_to_reg(int r)
2068     {
2069     raw_lahf(0); /* Most flags in AH */
2070     //raw_setcc(r,0); /* V flag in AL */
2071     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2072    
2073     #if 1 /* Let's avoid those nasty partial register stalls */
2074     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2075     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
2076     //live.state[FLAGTMP].status=CLEAN;
2077     live.state[FLAGTMP].status=INMEM;
2078     live.state[FLAGTMP].realreg=-1;
2079     /* We just "evicted" FLAGTMP. */
2080     if (live.nat[r].nholds!=1) {
2081     /* Huh? */
2082     abort();
2083     }
2084     live.nat[r].nholds=0;
2085     #endif
2086     }
2087    
2088     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
2089     static __inline__ void raw_reg_to_flags(int r)
2090     {
2091     raw_cmp_b_ri(r,-127); /* set V */
2092     raw_sahf(0);
2093     }
2094    
2095     #else
2096    
2097     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
2098     static __inline__ void raw_flags_to_reg(int r)
2099     {
2100     raw_pushfl();
2101     raw_pop_l_r(r);
2102     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
2103     // live.state[FLAGTMP].status=CLEAN;
2104     live.state[FLAGTMP].status=INMEM;
2105     live.state[FLAGTMP].realreg=-1;
2106     /* We just "evicted" FLAGTMP. */
2107     if (live.nat[r].nholds!=1) {
2108     /* Huh? */
2109     abort();
2110     }
2111     live.nat[r].nholds=0;
2112     }
2113    
2114     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2115     static __inline__ void raw_reg_to_flags(int r)
2116     {
2117     raw_push_l_r(r);
2118     raw_popfl();
2119     }
2120    
2121     #endif
2122    
2123     /* Apparently, there are enough instructions between flag store and
2124     flag reload to avoid the partial memory stall */
2125     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2126     {
2127     #if 1
2128     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2129     #else
2130     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2131     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2132     #endif
2133     }
2134    
2135     /* FLAGX is byte sized, and we *do* write it at that size */
2136     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2137     {
2138     if (live.nat[target].canbyte)
2139     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2140     else if (live.nat[target].canword)
2141     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2142     else
2143     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2144     }
2145    
2146    
2147     static __inline__ void raw_inc_sp(int off)
2148     {
2149 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
2150 gbeauche 1.1 }
2151    
2152     /*************************************************************************
2153     * Handling mistaken direct memory access *
2154     *************************************************************************/
2155    
2156     // gb-- I don't need that part for JIT Basilisk II
2157     #if defined(NATMEM_OFFSET) && 0
2158     #include <asm/sigcontext.h>
2159     #include <signal.h>
2160    
2161     #define SIG_READ 1
2162     #define SIG_WRITE 2
2163    
2164     static int in_handler=0;
2165     static uae_u8 veccode[256];
2166    
2167     static void vec(int x, struct sigcontext sc)
2168     {
2169     uae_u8* i=(uae_u8*)sc.eip;
2170     uae_u32 addr=sc.cr2;
2171     int r=-1;
2172     int size=4;
2173     int dir=-1;
2174     int len=0;
2175     int j;
2176    
2177     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2178     if (!canbang)
2179     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2180     if (in_handler)
2181     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2182    
2183     if (canbang && i>=compiled_code && i<=current_compile_p) {
2184     if (*i==0x66) {
2185     i++;
2186     size=2;
2187     len++;
2188     }
2189    
2190     switch(i[0]) {
2191     case 0x8a:
2192     if ((i[1]&0xc0)==0x80) {
2193     r=(i[1]>>3)&7;
2194     dir=SIG_READ;
2195     size=1;
2196     len+=6;
2197     break;
2198     }
2199     break;
2200     case 0x88:
2201     if ((i[1]&0xc0)==0x80) {
2202     r=(i[1]>>3)&7;
2203     dir=SIG_WRITE;
2204     size=1;
2205     len+=6;
2206     break;
2207     }
2208     break;
2209     case 0x8b:
2210     if ((i[1]&0xc0)==0x80) {
2211     r=(i[1]>>3)&7;
2212     dir=SIG_READ;
2213     len+=6;
2214     break;
2215     }
2216     if ((i[1]&0xc0)==0x40) {
2217     r=(i[1]>>3)&7;
2218     dir=SIG_READ;
2219     len+=3;
2220     break;
2221     }
2222     break;
2223     case 0x89:
2224     if ((i[1]&0xc0)==0x80) {
2225     r=(i[1]>>3)&7;
2226     dir=SIG_WRITE;
2227     len+=6;
2228     break;
2229     }
2230     if ((i[1]&0xc0)==0x40) {
2231     r=(i[1]>>3)&7;
2232     dir=SIG_WRITE;
2233     len+=3;
2234     break;
2235     }
2236     break;
2237     }
2238     }
2239    
2240     if (r!=-1) {
2241     void* pr=NULL;
2242     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2243    
2244     switch(r) {
2245     case 0: pr=&(sc.eax); break;
2246     case 1: pr=&(sc.ecx); break;
2247     case 2: pr=&(sc.edx); break;
2248     case 3: pr=&(sc.ebx); break;
2249     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2250     case 5: pr=(size>1)?
2251     (void*)(&(sc.ebp)):
2252     (void*)(((uae_u8*)&(sc.ecx))+1); break;
2253     case 6: pr=(size>1)?
2254     (void*)(&(sc.esi)):
2255     (void*)(((uae_u8*)&(sc.edx))+1); break;
2256     case 7: pr=(size>1)?
2257     (void*)(&(sc.edi)):
2258     (void*)(((uae_u8*)&(sc.ebx))+1); break;
2259     default: abort();
2260     }
2261     if (pr) {
2262     blockinfo* bi;
2263    
2264     if (currprefs.comp_oldsegv) {
2265     addr-=NATMEM_OFFSET;
2266    
2267     if ((addr>=0x10000000 && addr<0x40000000) ||
2268     (addr>=0x50000000)) {
2269     write_log("Suspicious address in %x SEGV handler.\n",addr);
2270     }
2271     if (dir==SIG_READ) {
2272     switch(size) {
2273     case 1: *((uae_u8*)pr)=get_byte(addr); break;
2274     case 2: *((uae_u16*)pr)=get_word(addr); break;
2275     case 4: *((uae_u32*)pr)=get_long(addr); break;
2276     default: abort();
2277     }
2278     }
2279     else { /* write */
2280     switch(size) {
2281     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2282     case 2: put_word(addr,*((uae_u16*)pr)); break;
2283     case 4: put_long(addr,*((uae_u32*)pr)); break;
2284     default: abort();
2285     }
2286     }
2287     write_log("Handled one access!\n");
2288     fflush(stdout);
2289     segvcount++;
2290     sc.eip+=len;
2291     }
2292     else {
2293     void* tmp=target;
2294     int i;
2295     uae_u8 vecbuf[5];
2296    
2297     addr-=NATMEM_OFFSET;
2298    
2299     if ((addr>=0x10000000 && addr<0x40000000) ||
2300     (addr>=0x50000000)) {
2301     write_log("Suspicious address in %x SEGV handler.\n",addr);
2302     }
2303    
2304     target=(uae_u8*)sc.eip;
2305     for (i=0;i<5;i++)
2306     vecbuf[i]=target[i];
2307     emit_byte(0xe9);
2308     emit_long((uae_u32)veccode-(uae_u32)target-4);
2309     write_log("Create jump to %p\n",veccode);
2310    
2311     write_log("Handled one access!\n");
2312     fflush(stdout);
2313     segvcount++;
2314    
2315     target=veccode;
2316    
2317     if (dir==SIG_READ) {
2318     switch(size) {
2319     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2320     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2321     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2322     default: abort();
2323     }
2324     }
2325     else { /* write */
2326     switch(size) {
2327     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2328     case 2: put_word(addr,*((uae_u16*)pr)); break;
2329     case 4: put_long(addr,*((uae_u32*)pr)); break;
2330     default: abort();
2331     }
2332     }
2333     for (i=0;i<5;i++)
2334     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2335     raw_mov_l_mi((uae_u32)&in_handler,0);
2336     emit_byte(0xe9);
2337     emit_long(sc.eip+len-(uae_u32)target-4);
2338     in_handler=1;
2339     target=tmp;
2340     }
2341     bi=active;
2342     while (bi) {
2343     if (bi->handler &&
2344     (uae_u8*)bi->direct_handler<=i &&
2345     (uae_u8*)bi->nexthandler>i) {
2346     write_log("deleted trigger (%p<%p<%p) %p\n",
2347     bi->handler,
2348     i,
2349     bi->nexthandler,
2350     bi->pc_p);
2351     invalidate_block(bi);
2352     raise_in_cl_list(bi);
2353     set_special(0);
2354     return;
2355     }
2356     bi=bi->next;
2357     }
2358     /* Not found in the active list. Might be a rom routine that
2359     is in the dormant list */
2360     bi=dormant;
2361     while (bi) {
2362     if (bi->handler &&
2363     (uae_u8*)bi->direct_handler<=i &&
2364     (uae_u8*)bi->nexthandler>i) {
2365     write_log("deleted trigger (%p<%p<%p) %p\n",
2366     bi->handler,
2367     i,
2368     bi->nexthandler,
2369     bi->pc_p);
2370     invalidate_block(bi);
2371     raise_in_cl_list(bi);
2372     set_special(0);
2373     return;
2374     }
2375     bi=bi->next;
2376     }
2377     write_log("Huh? Could not find trigger!\n");
2378     return;
2379     }
2380     }
2381     write_log("Can't handle access!\n");
2382     for (j=0;j<10;j++) {
2383     write_log("instruction byte %2d is %02x\n",j,i[j]);
2384     }
2385     write_log("Please send the above info (starting at \"fault address\") to\n"
2386     "bmeyer@csse.monash.edu.au\n"
2387     "This shouldn't happen ;-)\n");
2388     fflush(stdout);
2389     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2390     }
2391     #endif
2392    
2393    
2394     /*************************************************************************
2395     * Checking for CPU features *
2396     *************************************************************************/
2397    
2398 gbeauche 1.3 struct cpuinfo_x86 {
2399     uae_u8 x86; // CPU family
2400     uae_u8 x86_vendor; // CPU vendor
2401     uae_u8 x86_processor; // CPU canonical processor type
2402     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
2403     uae_u32 x86_hwcap;
2404     uae_u8 x86_model;
2405     uae_u8 x86_mask;
2406     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
2407     char x86_vendor_id[16];
2408     };
2409     struct cpuinfo_x86 cpuinfo;
2410    
2411     enum {
2412     X86_VENDOR_INTEL = 0,
2413     X86_VENDOR_CYRIX = 1,
2414     X86_VENDOR_AMD = 2,
2415     X86_VENDOR_UMC = 3,
2416     X86_VENDOR_NEXGEN = 4,
2417     X86_VENDOR_CENTAUR = 5,
2418     X86_VENDOR_RISE = 6,
2419     X86_VENDOR_TRANSMETA = 7,
2420     X86_VENDOR_NSC = 8,
2421     X86_VENDOR_UNKNOWN = 0xff
2422     };
2423    
2424     enum {
2425     X86_PROCESSOR_I386, /* 80386 */
2426     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
2427     X86_PROCESSOR_PENTIUM,
2428     X86_PROCESSOR_PENTIUMPRO,
2429     X86_PROCESSOR_K6,
2430     X86_PROCESSOR_ATHLON,
2431     X86_PROCESSOR_PENTIUM4,
2432     X86_PROCESSOR_max
2433     };
2434    
2435     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2436     "80386",
2437     "80486",
2438     "Pentium",
2439     "PentiumPro",
2440     "K6",
2441     "Athlon",
2442     "Pentium4"
2443     };
2444    
2445     static struct ptt {
2446     const int align_loop;
2447     const int align_loop_max_skip;
2448     const int align_jump;
2449     const int align_jump_max_skip;
2450     const int align_func;
2451     }
2452     x86_alignments[X86_PROCESSOR_max] = {
2453     { 4, 3, 4, 3, 4 },
2454     { 16, 15, 16, 15, 16 },
2455     { 16, 7, 16, 7, 16 },
2456     { 16, 15, 16, 7, 16 },
2457     { 32, 7, 32, 7, 32 },
2458 gbeauche 1.4 { 16, 7, 16, 7, 16 },
2459 gbeauche 1.3 { 0, 0, 0, 0, 0 }
2460     };
2461 gbeauche 1.1
2462 gbeauche 1.3 static void
2463     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2464 gbeauche 1.1 {
2465 gbeauche 1.3 char *v = c->x86_vendor_id;
2466    
2467     if (!strcmp(v, "GenuineIntel"))
2468     c->x86_vendor = X86_VENDOR_INTEL;
2469     else if (!strcmp(v, "AuthenticAMD"))
2470     c->x86_vendor = X86_VENDOR_AMD;
2471     else if (!strcmp(v, "CyrixInstead"))
2472     c->x86_vendor = X86_VENDOR_CYRIX;
2473     else if (!strcmp(v, "Geode by NSC"))
2474     c->x86_vendor = X86_VENDOR_NSC;
2475     else if (!strcmp(v, "UMC UMC UMC "))
2476     c->x86_vendor = X86_VENDOR_UMC;
2477     else if (!strcmp(v, "CentaurHauls"))
2478     c->x86_vendor = X86_VENDOR_CENTAUR;
2479     else if (!strcmp(v, "NexGenDriven"))
2480     c->x86_vendor = X86_VENDOR_NEXGEN;
2481     else if (!strcmp(v, "RiseRiseRise"))
2482     c->x86_vendor = X86_VENDOR_RISE;
2483     else if (!strcmp(v, "GenuineTMx86") ||
2484     !strcmp(v, "TransmetaCPU"))
2485     c->x86_vendor = X86_VENDOR_TRANSMETA;
2486     else
2487     c->x86_vendor = X86_VENDOR_UNKNOWN;
2488     }
2489 gbeauche 1.1
2490 gbeauche 1.3 static void
2491     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2492     {
2493     static uae_u8 cpuid_space[256];
2494     uae_u8* tmp=get_target();
2495 gbeauche 1.1
2496 gbeauche 1.3 set_target(cpuid_space);
2497     raw_push_l_r(0); /* eax */
2498     raw_push_l_r(1); /* ecx */
2499     raw_push_l_r(2); /* edx */
2500     raw_push_l_r(3); /* ebx */
2501     raw_mov_l_rm(0,(uae_u32)&op);
2502     raw_cpuid(0);
2503     if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2504     if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2505     if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2506     if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2507     raw_pop_l_r(3);
2508     raw_pop_l_r(2);
2509     raw_pop_l_r(1);
2510     raw_pop_l_r(0);
2511     raw_ret();
2512     set_target(tmp);
2513 gbeauche 1.1
2514 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
2515 gbeauche 1.1 }
2516    
2517 gbeauche 1.3 static void
2518     raw_init_cpu(void)
2519 gbeauche 1.1 {
2520 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
2521    
2522     /* Defaults */
2523     c->x86_vendor = X86_VENDOR_UNKNOWN;
2524     c->cpuid_level = -1; /* CPUID not detected */
2525     c->x86_model = c->x86_mask = 0; /* So far unknown... */
2526     c->x86_vendor_id[0] = '\0'; /* Unset */
2527     c->x86_hwcap = 0;
2528    
2529     /* Get vendor name */
2530     c->x86_vendor_id[12] = '\0';
2531     cpuid(0x00000000,
2532     (uae_u32 *)&c->cpuid_level,
2533     (uae_u32 *)&c->x86_vendor_id[0],
2534     (uae_u32 *)&c->x86_vendor_id[8],
2535     (uae_u32 *)&c->x86_vendor_id[4]);
2536     x86_get_cpu_vendor(c);
2537    
2538     /* Intel-defined flags: level 0x00000001 */
2539     c->x86_brand_id = 0;
2540     if ( c->cpuid_level >= 0x00000001 ) {
2541     uae_u32 tfms, brand_id;
2542     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2543     c->x86 = (tfms >> 8) & 15;
2544     c->x86_model = (tfms >> 4) & 15;
2545     c->x86_brand_id = brand_id & 0xff;
2546     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2547     (c->x86 == 0xf)) {
2548     /* AMD Extended Family and Model Values */
2549     c->x86 += (tfms >> 20) & 0xff;
2550     c->x86_model += (tfms >> 12) & 0xf0;
2551     }
2552     c->x86_mask = tfms & 15;
2553     } else {
2554     /* Have CPUID level 0 only - unheard of */
2555     c->x86 = 4;
2556     }
2557    
2558     /* Canonicalize processor ID */
2559     c->x86_processor = X86_PROCESSOR_max;
2560     switch (c->x86) {
2561     case 3:
2562     c->x86_processor = X86_PROCESSOR_I386;
2563     break;
2564     case 4:
2565     c->x86_processor = X86_PROCESSOR_I486;
2566     break;
2567     case 5:
2568     if (c->x86_vendor == X86_VENDOR_AMD)
2569     c->x86_processor = X86_PROCESSOR_K6;
2570     else
2571     c->x86_processor = X86_PROCESSOR_PENTIUM;
2572     break;
2573     case 6:
2574     if (c->x86_vendor == X86_VENDOR_AMD)
2575     c->x86_processor = X86_PROCESSOR_ATHLON;
2576     else
2577     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2578     break;
2579     case 15:
2580     if (c->x86_vendor == X86_VENDOR_INTEL) {
2581     /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2582     if (c->x86_brand_id >= 8)
2583     c->x86_processor = X86_PROCESSOR_PENTIUM4;
2584     }
2585     break;
2586     }
2587     if (c->x86_processor == X86_PROCESSOR_max) {
2588     fprintf(stderr, "Error: unknown processor type\n");
2589     fprintf(stderr, " Family : %d\n", c->x86);
2590     fprintf(stderr, " Model : %d\n", c->x86_model);
2591     fprintf(stderr, " Mask : %d\n", c->x86_mask);
2592     if (c->x86_brand_id)
2593     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
2594     abort();
2595     }
2596    
2597     /* Have CMOV support? */
2598     have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2599    
2600     /* Can the host CPU suffer from partial register stalls? */
2601     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2602     #if 1
2603     /* It appears that partial register writes are a bad idea even on
2604 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
2605     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2606 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
2607     have_rat_stall = true;
2608 gbeauche 1.1 #endif
2609 gbeauche 1.3
2610     /* Alignments */
2611     if (tune_alignment) {
2612     align_loops = x86_alignments[c->x86_processor].align_loop;
2613     align_jumps = x86_alignments[c->x86_processor].align_jump;
2614     }
2615    
2616     write_log("Max CPUID level=%d Processor is %s [%s]\n",
2617     c->cpuid_level, c->x86_vendor_id,
2618     x86_processor_string_table[c->x86_processor]);
2619 gbeauche 1.1 }
2620    
2621    
2622     /*************************************************************************
2623     * FPU stuff *
2624     *************************************************************************/
2625    
2626    
2627     static __inline__ void raw_fp_init(void)
2628     {
2629     int i;
2630    
2631     for (i=0;i<N_FREGS;i++)
2632     live.spos[i]=-2;
2633     live.tos=-1; /* Stack is empty */
2634     }
2635    
2636     static __inline__ void raw_fp_cleanup_drop(void)
2637     {
2638     #if 0
2639     /* using FINIT instead of popping all the entries.
2640     Seems to have side effects --- there is display corruption in
2641     Quake when this is used */
2642     if (live.tos>1) {
2643     emit_byte(0x9b);
2644     emit_byte(0xdb);
2645     emit_byte(0xe3);
2646     live.tos=-1;
2647     }
2648     #endif
2649     while (live.tos>=1) {
2650     emit_byte(0xde);
2651     emit_byte(0xd9);
2652     live.tos-=2;
2653     }
2654     while (live.tos>=0) {
2655     emit_byte(0xdd);
2656     emit_byte(0xd8);
2657     live.tos--;
2658     }
2659     raw_fp_init();
2660     }
2661    
2662     static __inline__ void make_tos(int r)
2663     {
2664     int p,q;
2665    
2666     if (live.spos[r]<0) { /* Register not yet on stack */
2667     emit_byte(0xd9);
2668     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2669     live.tos++;
2670     live.spos[r]=live.tos;
2671     live.onstack[live.tos]=r;
2672     return;
2673     }
2674     /* Register is on stack */
2675     if (live.tos==live.spos[r])
2676     return;
2677     p=live.spos[r];
2678     q=live.onstack[live.tos];
2679    
2680     emit_byte(0xd9);
2681     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2682     live.onstack[live.tos]=r;
2683     live.spos[r]=live.tos;
2684     live.onstack[p]=q;
2685     live.spos[q]=p;
2686     }
2687    
2688     static __inline__ void make_tos2(int r, int r2)
2689     {
2690     int q;
2691    
2692     make_tos(r2); /* Put the reg that's supposed to end up in position2
2693     on top */
2694    
2695     if (live.spos[r]<0) { /* Register not yet on stack */
2696     make_tos(r); /* This will extend the stack */
2697     return;
2698     }
2699     /* Register is on stack */
2700     emit_byte(0xd9);
2701     emit_byte(0xc9); /* Move r2 into position 2 */
2702    
2703     q=live.onstack[live.tos-1];
2704     live.onstack[live.tos]=q;
2705     live.spos[q]=live.tos;
2706     live.onstack[live.tos-1]=r2;
2707     live.spos[r2]=live.tos-1;
2708    
2709     make_tos(r); /* And r into 1 */
2710     }
2711    
2712     static __inline__ int stackpos(int r)
2713     {
2714     if (live.spos[r]<0)
2715     abort();
2716     if (live.tos<live.spos[r]) {
2717     printf("Looking for spos for fnreg %d\n",r);
2718     abort();
2719     }
2720     return live.tos-live.spos[r];
2721     }
2722    
2723     static __inline__ void usereg(int r)
2724     {
2725     if (live.spos[r]<0)
2726     make_tos(r);
2727     }
2728    
2729     /* This is called with one FP value in a reg *above* tos, which it will
2730     pop off the stack if necessary */
2731     static __inline__ void tos_make(int r)
2732     {
2733     if (live.spos[r]<0) {
2734     live.tos++;
2735     live.spos[r]=live.tos;
2736     live.onstack[live.tos]=r;
2737     return;
2738     }
2739     emit_byte(0xdd);
2740     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2741     and pop it*/
2742     }
2743    
2744    
2745     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2746     {
2747     make_tos(r);
2748     emit_byte(0xdd);
2749     emit_byte(0x15);
2750     emit_long(m);
2751     }
2752     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2753    
2754     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2755     {
2756     make_tos(r);
2757     emit_byte(0xdd);
2758     emit_byte(0x1d);
2759     emit_long(m);
2760     live.onstack[live.tos]=-1;
2761     live.tos--;
2762     live.spos[r]=-2;
2763     }
2764     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2765    
2766     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2767     {
2768     emit_byte(0xdd);
2769     emit_byte(0x05);
2770     emit_long(m);
2771     tos_make(r);
2772     }
2773     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2774    
2775     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2776     {
2777     emit_byte(0xdb);
2778     emit_byte(0x05);
2779     emit_long(m);
2780     tos_make(r);
2781     }
2782     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2783    
2784     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2785     {
2786     make_tos(r);
2787     emit_byte(0xdb);
2788     emit_byte(0x15);
2789     emit_long(m);
2790     }
2791     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2792    
2793     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2794     {
2795     emit_byte(0xd9);
2796     emit_byte(0x05);
2797     emit_long(m);
2798     tos_make(r);
2799     }
2800     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2801    
2802     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2803     {
2804     make_tos(r);
2805     emit_byte(0xd9);
2806     emit_byte(0x15);
2807     emit_long(m);
2808     }
2809     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2810    
2811     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2812     {
2813     int rs;
2814    
2815     /* Stupid x87 can't write a long double to mem without popping the
2816     stack! */
2817     usereg(r);
2818     rs=stackpos(r);
2819     emit_byte(0xd9); /* Get a copy to the top of stack */
2820     emit_byte(0xc0+rs);
2821    
2822     emit_byte(0xdb); /* store and pop it */
2823     emit_byte(0x3d);
2824     emit_long(m);
2825     }
2826     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2827    
2828     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2829     {
2830     int rs;
2831    
2832     make_tos(r);
2833     emit_byte(0xdb); /* store and pop it */
2834     emit_byte(0x3d);
2835     emit_long(m);
2836     live.onstack[live.tos]=-1;
2837     live.tos--;
2838     live.spos[r]=-2;
2839     }
2840     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2841    
2842     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2843     {
2844     emit_byte(0xdb);
2845     emit_byte(0x2d);
2846     emit_long(m);
2847     tos_make(r);
2848     }
2849     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2850    
2851     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2852     {
2853     emit_byte(0xd9);
2854     emit_byte(0xeb);
2855     tos_make(r);
2856     }
2857     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2858    
2859     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2860     {
2861     emit_byte(0xd9);
2862     emit_byte(0xec);
2863     tos_make(r);
2864     }
2865     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2866    
2867     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2868     {
2869     emit_byte(0xd9);
2870     emit_byte(0xea);
2871     tos_make(r);
2872     }
2873     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2874    
2875     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2876     {
2877     emit_byte(0xd9);
2878     emit_byte(0xed);
2879     tos_make(r);
2880     }
2881     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2882    
2883     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2884     {
2885     emit_byte(0xd9);
2886     emit_byte(0xe8);
2887     tos_make(r);
2888     }
2889     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2890    
2891     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2892     {
2893     emit_byte(0xd9);
2894     emit_byte(0xee);
2895     tos_make(r);
2896     }
2897     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2898    
2899     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2900     {
2901     int ds;
2902    
2903     usereg(s);
2904     ds=stackpos(s);
2905     if (ds==0 && live.spos[d]>=0) {
2906     /* source is on top of stack, and we already have the dest */
2907     int dd=stackpos(d);
2908     emit_byte(0xdd);
2909     emit_byte(0xd0+dd);
2910     }
2911     else {
2912     emit_byte(0xd9);
2913     emit_byte(0xc0+ds); /* duplicate source on tos */
2914     tos_make(d); /* store to destination, pop if necessary */
2915     }
2916     }
2917     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2918    
2919     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2920     {
2921     emit_byte(0xd9);
2922     emit_byte(0xa8+index);
2923     emit_long(base);
2924     }
2925     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2926    
2927    
2928     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2929     {
2930     int ds;
2931    
2932     if (d!=s) {
2933     usereg(s);
2934     ds=stackpos(s);
2935     emit_byte(0xd9);
2936     emit_byte(0xc0+ds); /* duplicate source */
2937     emit_byte(0xd9);
2938     emit_byte(0xfa); /* take square root */
2939     tos_make(d); /* store to destination */
2940     }
2941     else {
2942     make_tos(d);
2943     emit_byte(0xd9);
2944     emit_byte(0xfa); /* take square root */
2945     }
2946     }
2947     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2948    
2949     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2950     {
2951     int ds;
2952    
2953     if (d!=s) {
2954     usereg(s);
2955     ds=stackpos(s);
2956     emit_byte(0xd9);
2957     emit_byte(0xc0+ds); /* duplicate source */
2958     emit_byte(0xd9);
2959     emit_byte(0xe1); /* take fabs */
2960     tos_make(d); /* store to destination */
2961     }
2962     else {
2963     make_tos(d);
2964     emit_byte(0xd9);
2965     emit_byte(0xe1); /* take fabs */
2966     }
2967     }
2968     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2969    
2970     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2971     {
2972     int ds;
2973    
2974     if (d!=s) {
2975     usereg(s);
2976     ds=stackpos(s);
2977     emit_byte(0xd9);
2978     emit_byte(0xc0+ds); /* duplicate source */
2979     emit_byte(0xd9);
2980     emit_byte(0xfc); /* take frndint */
2981     tos_make(d); /* store to destination */
2982     }
2983     else {
2984     make_tos(d);
2985     emit_byte(0xd9);
2986     emit_byte(0xfc); /* take frndint */
2987     }
2988     }
2989     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2990    
2991     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2992     {
2993     int ds;
2994    
2995     if (d!=s) {
2996     usereg(s);
2997     ds=stackpos(s);
2998     emit_byte(0xd9);
2999     emit_byte(0xc0+ds); /* duplicate source */
3000     emit_byte(0xd9);
3001     emit_byte(0xff); /* take cos */
3002     tos_make(d); /* store to destination */
3003     }
3004     else {
3005     make_tos(d);
3006     emit_byte(0xd9);
3007     emit_byte(0xff); /* take cos */
3008     }
3009     }
3010     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
3011    
3012     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3013     {
3014     int ds;
3015    
3016     if (d!=s) {
3017     usereg(s);
3018     ds=stackpos(s);
3019     emit_byte(0xd9);
3020     emit_byte(0xc0+ds); /* duplicate source */
3021     emit_byte(0xd9);
3022     emit_byte(0xfe); /* take sin */
3023     tos_make(d); /* store to destination */
3024     }
3025     else {
3026     make_tos(d);
3027     emit_byte(0xd9);
3028     emit_byte(0xfe); /* take sin */
3029     }
3030     }
3031     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3032    
3033     double one=1;
3034     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3035     {
3036     int ds;
3037    
3038     usereg(s);
3039     ds=stackpos(s);
3040     emit_byte(0xd9);
3041     emit_byte(0xc0+ds); /* duplicate source */
3042    
3043     emit_byte(0xd9);
3044     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3045     emit_byte(0xd9);
3046     emit_byte(0xfc); /* rndint */
3047     emit_byte(0xd9);
3048     emit_byte(0xc9); /* swap top two elements */
3049     emit_byte(0xd8);
3050     emit_byte(0xe1); /* subtract rounded from original */
3051     emit_byte(0xd9);
3052     emit_byte(0xf0); /* f2xm1 */
3053     emit_byte(0xdc);
3054     emit_byte(0x05);
3055     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3056     emit_byte(0xd9);
3057     emit_byte(0xfd); /* and scale it */
3058     emit_byte(0xdd);
3059     emit_byte(0xd9); /* take he rounded value off */
3060     tos_make(d); /* store to destination */
3061     }
3062     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3063    
3064     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3065     {
3066     int ds;
3067    
3068     usereg(s);
3069     ds=stackpos(s);
3070     emit_byte(0xd9);
3071     emit_byte(0xc0+ds); /* duplicate source */
3072     emit_byte(0xd9);
3073     emit_byte(0xea); /* fldl2e */
3074     emit_byte(0xde);
3075     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
3076    
3077     emit_byte(0xd9);
3078     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3079     emit_byte(0xd9);
3080     emit_byte(0xfc); /* rndint */
3081     emit_byte(0xd9);
3082     emit_byte(0xc9); /* swap top two elements */
3083     emit_byte(0xd8);
3084     emit_byte(0xe1); /* subtract rounded from original */
3085     emit_byte(0xd9);
3086     emit_byte(0xf0); /* f2xm1 */
3087     emit_byte(0xdc);
3088     emit_byte(0x05);
3089     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3090     emit_byte(0xd9);
3091     emit_byte(0xfd); /* and scale it */
3092     emit_byte(0xdd);
3093     emit_byte(0xd9); /* take he rounded value off */
3094     tos_make(d); /* store to destination */
3095     }
3096     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3097    
3098     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3099     {
3100     int ds;
3101    
3102     usereg(s);
3103     ds=stackpos(s);
3104     emit_byte(0xd9);
3105     emit_byte(0xc0+ds); /* duplicate source */
3106     emit_byte(0xd9);
3107     emit_byte(0xe8); /* push '1' */
3108     emit_byte(0xd9);
3109     emit_byte(0xc9); /* swap top two */
3110     emit_byte(0xd9);
3111     emit_byte(0xf1); /* take 1*log2(x) */
3112     tos_make(d); /* store to destination */
3113     }
3114     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3115    
3116    
3117     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3118     {
3119     int ds;
3120    
3121     if (d!=s) {
3122     usereg(s);
3123     ds=stackpos(s);
3124     emit_byte(0xd9);
3125     emit_byte(0xc0+ds); /* duplicate source */
3126     emit_byte(0xd9);
3127     emit_byte(0xe0); /* take fchs */
3128     tos_make(d); /* store to destination */
3129     }
3130     else {
3131     make_tos(d);
3132     emit_byte(0xd9);
3133     emit_byte(0xe0); /* take fchs */
3134     }
3135     }
3136     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3137    
3138     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3139     {
3140     int ds;
3141    
3142     usereg(s);
3143     usereg(d);
3144    
3145     if (live.spos[s]==live.tos) {
3146     /* Source is on top of stack */
3147     ds=stackpos(d);
3148     emit_byte(0xdc);
3149     emit_byte(0xc0+ds); /* add source to dest*/
3150     }
3151     else {
3152     make_tos(d);
3153     ds=stackpos(s);
3154    
3155     emit_byte(0xd8);
3156     emit_byte(0xc0+ds); /* add source to dest*/
3157     }
3158     }
3159     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3160    
3161     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3162     {
3163     int ds;
3164    
3165     usereg(s);
3166     usereg(d);
3167    
3168     if (live.spos[s]==live.tos) {
3169     /* Source is on top of stack */
3170     ds=stackpos(d);
3171     emit_byte(0xdc);
3172     emit_byte(0xe8+ds); /* sub source from dest*/
3173     }
3174     else {
3175     make_tos(d);
3176     ds=stackpos(s);
3177    
3178     emit_byte(0xd8);
3179     emit_byte(0xe0+ds); /* sub src from dest */
3180     }
3181     }
3182     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3183    
3184     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3185     {
3186     int ds;
3187    
3188     usereg(s);
3189     usereg(d);
3190    
3191     make_tos(d);
3192     ds=stackpos(s);
3193    
3194     emit_byte(0xdd);
3195     emit_byte(0xe0+ds); /* cmp dest with source*/
3196     }
3197     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3198    
3199     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3200     {
3201     int ds;
3202    
3203     usereg(s);
3204     usereg(d);
3205    
3206     if (live.spos[s]==live.tos) {
3207     /* Source is on top of stack */
3208     ds=stackpos(d);
3209     emit_byte(0xdc);
3210     emit_byte(0xc8+ds); /* mul dest by source*/
3211     }
3212     else {
3213     make_tos(d);
3214     ds=stackpos(s);
3215    
3216     emit_byte(0xd8);
3217     emit_byte(0xc8+ds); /* mul dest by source*/
3218     }
3219     }
3220     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3221    
3222     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3223     {
3224     int ds;
3225    
3226     usereg(s);
3227     usereg(d);
3228    
3229     if (live.spos[s]==live.tos) {
3230     /* Source is on top of stack */
3231     ds=stackpos(d);
3232     emit_byte(0xdc);
3233     emit_byte(0xf8+ds); /* div dest by source */
3234     }
3235     else {
3236     make_tos(d);
3237     ds=stackpos(s);
3238    
3239     emit_byte(0xd8);
3240     emit_byte(0xf0+ds); /* div dest by source*/
3241     }
3242     }
3243     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3244    
3245     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3246     {
3247     int ds;
3248    
3249     usereg(s);
3250     usereg(d);
3251    
3252     make_tos2(d,s);
3253     ds=stackpos(s);
3254    
3255     if (ds!=1) {
3256     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3257     abort();
3258     }
3259     emit_byte(0xd9);
3260     emit_byte(0xf8); /* take rem from dest by source */
3261     }
3262     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3263    
3264     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3265     {
3266     int ds;
3267    
3268     usereg(s);
3269     usereg(d);
3270    
3271     make_tos2(d,s);
3272     ds=stackpos(s);
3273    
3274     if (ds!=1) {
3275     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3276     abort();
3277     }
3278     emit_byte(0xd9);
3279     emit_byte(0xf5); /* take rem1 from dest by source */
3280     }
3281     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3282    
3283    
3284     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3285     {
3286     make_tos(r);
3287     emit_byte(0xd9); /* ftst */
3288     emit_byte(0xe4);
3289     }
3290     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3291    
3292     /* %eax register is clobbered if target processor doesn't support fucomi */
3293     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3294     #define FFLAG_NREG EAX_INDEX
3295    
3296     static __inline__ void raw_fflags_into_flags(int r)
3297     {
3298     int p;
3299    
3300     usereg(r);
3301     p=stackpos(r);
3302    
3303     emit_byte(0xd9);
3304     emit_byte(0xee); /* Push 0 */
3305     emit_byte(0xd9);
3306     emit_byte(0xc9+p); /* swap top two around */
3307     if (have_cmov) {
3308     // gb-- fucomi is for P6 cores only, not K6-2 then...
3309     emit_byte(0xdb);
3310     emit_byte(0xe9+p); /* fucomi them */
3311     }
3312     else {
3313     emit_byte(0xdd);
3314     emit_byte(0xe1+p); /* fucom them */
3315     emit_byte(0x9b);
3316     emit_byte(0xdf);
3317     emit_byte(0xe0); /* fstsw ax */
3318     raw_sahf(0); /* sahf */
3319     }
3320     emit_byte(0xdd);
3321     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3322     }