ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.5
Committed: 2002-10-01T09:37:03Z (21 years, 11 months ago) by gbeauche
Branch: MAIN
Changes since 1.4: +27 -0 lines
Log Message:
- #include "flags_x86.h" here to get NATICE_CC_?? helper macros
- Add raw_cmp_b_mi() and raw_call_m_indexed() for generated
  m68k_compile_execute() function

File Contents

# User Rev Content
1 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
2     only target, and it's easier this way... */
3    
4 gbeauche 1.5 #include "flags_x86.h"
5    
6 gbeauche 1.1 /*************************************************************************
7     * Some basic information about the the target CPU *
8     *************************************************************************/
9    
10     #define EAX_INDEX 0
11     #define ECX_INDEX 1
12     #define EDX_INDEX 2
13     #define EBX_INDEX 3
14     #define ESP_INDEX 4
15     #define EBP_INDEX 5
16     #define ESI_INDEX 6
17     #define EDI_INDEX 7
18    
19     /* The register in which subroutines return an integer return value */
20     #define REG_RESULT 0
21    
22     /* The registers subroutines take their first and second argument in */
23     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
24     /* Handle the _fastcall parameters of ECX and EDX */
25     #define REG_PAR1 1
26     #define REG_PAR2 2
27     #else
28     #define REG_PAR1 0
29     #define REG_PAR2 2
30     #endif
31    
32     /* Three registers that are not used for any of the above */
33     #define REG_NOPAR1 6
34     #define REG_NOPAR2 5
35     #define REG_NOPAR3 3
36    
37     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
38     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
39     #define REG_PC_TMP 0
40     #else
41     #define REG_PC_TMP 1 /* Another register that is not the above */
42     #endif
43    
44     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
45     -1 if any reg will do */
46     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
47     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
48    
49     uae_s8 always_used[]={4,-1};
50     uae_s8 can_byte[]={0,1,2,3,-1};
51     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
52    
53     /* cpuopti mutate instruction handlers to assume registers are saved
54     by the caller */
55     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
56    
57     /* This *should* be the same as call_saved. But:
58     - We might not really know which registers are saved, and which aren't,
59     so we need to preserve some, but don't want to rely on everyone else
60     also saving those registers
61     - Special registers (such like the stack pointer) should not be "preserved"
62     by pushing, even though they are "saved" across function calls
63     */
64     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
65    
66     /* Whether classes of instructions do or don't clobber the native flags */
67     #define CLOBBER_MOV
68     #define CLOBBER_LEA
69     #define CLOBBER_CMOV
70     #define CLOBBER_POP
71     #define CLOBBER_PUSH
72     #define CLOBBER_SUB clobber_flags()
73     #define CLOBBER_SBB clobber_flags()
74     #define CLOBBER_CMP clobber_flags()
75     #define CLOBBER_ADD clobber_flags()
76     #define CLOBBER_ADC clobber_flags()
77     #define CLOBBER_AND clobber_flags()
78     #define CLOBBER_OR clobber_flags()
79     #define CLOBBER_XOR clobber_flags()
80    
81     #define CLOBBER_ROL clobber_flags()
82     #define CLOBBER_ROR clobber_flags()
83     #define CLOBBER_SHLL clobber_flags()
84     #define CLOBBER_SHRL clobber_flags()
85     #define CLOBBER_SHRA clobber_flags()
86     #define CLOBBER_TEST clobber_flags()
87     #define CLOBBER_CL16
88     #define CLOBBER_CL8
89     #define CLOBBER_SE16
90     #define CLOBBER_SE8
91     #define CLOBBER_ZE16
92     #define CLOBBER_ZE8
93     #define CLOBBER_SW16 clobber_flags()
94     #define CLOBBER_SW32
95     #define CLOBBER_SETCC
96     #define CLOBBER_MUL clobber_flags()
97     #define CLOBBER_BT clobber_flags()
98     #define CLOBBER_BSF clobber_flags()
99    
100 gbeauche 1.2 const bool optimize_accum = true;
101 gbeauche 1.1 const bool optimize_imm8 = true;
102     const bool optimize_shift_once = true;
103    
104     /*************************************************************************
105     * Actual encoding of the instructions on the target CPU *
106     *************************************************************************/
107    
108 gbeauche 1.2 static __inline__ int isaccum(int r)
109     {
110     return (r == EAX_INDEX);
111     }
112    
113 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
114     {
115     return (x>=-128 && x<=127);
116     }
117    
118     static __inline__ int isword(uae_s32 x)
119     {
120     return (x>=-32768 && x<=32767);
121     }
122    
123     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
124     {
125     emit_byte(0x50+r);
126     }
127     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
128    
129     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
130     {
131     emit_byte(0x58+r);
132     }
133     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
134    
135     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
136     {
137     emit_byte(0x0f);
138     emit_byte(0xba);
139     emit_byte(0xe0+r);
140     emit_byte(i);
141     }
142     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
143    
144     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
145     {
146     emit_byte(0x0f);
147     emit_byte(0xa3);
148     emit_byte(0xc0+8*b+r);
149     }
150     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
151    
152     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
153     {
154     emit_byte(0x0f);
155     emit_byte(0xba);
156     emit_byte(0xf8+r);
157     emit_byte(i);
158     }
159     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
160    
161     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
162     {
163     emit_byte(0x0f);
164     emit_byte(0xbb);
165     emit_byte(0xc0+8*b+r);
166     }
167     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
168    
169    
170     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
171     {
172     emit_byte(0x0f);
173     emit_byte(0xba);
174     emit_byte(0xf0+r);
175     emit_byte(i);
176     }
177     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
178    
179     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
180     {
181     emit_byte(0x0f);
182     emit_byte(0xb3);
183     emit_byte(0xc0+8*b+r);
184     }
185     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
186    
187     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
188     {
189     emit_byte(0x0f);
190     emit_byte(0xba);
191     emit_byte(0xe8+r);
192     emit_byte(i);
193     }
194     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
195    
196     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
197     {
198     emit_byte(0x0f);
199     emit_byte(0xab);
200     emit_byte(0xc0+8*b+r);
201     }
202     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
203    
204     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
205     {
206     emit_byte(0x66);
207     if (isbyte(i)) {
208     emit_byte(0x83);
209     emit_byte(0xe8+d);
210     emit_byte(i);
211     }
212     else {
213 gbeauche 1.2 if (optimize_accum && isaccum(d))
214     emit_byte(0x2d);
215     else {
216 gbeauche 1.1 emit_byte(0x81);
217     emit_byte(0xe8+d);
218 gbeauche 1.2 }
219 gbeauche 1.1 emit_word(i);
220     }
221     }
222     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
223    
224    
225     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
226     {
227     emit_byte(0x8b);
228     emit_byte(0x05+8*d);
229     emit_long(s);
230     }
231     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
232    
233     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
234     {
235     emit_byte(0xc7);
236     emit_byte(0x05);
237     emit_long(d);
238     emit_long(s);
239     }
240     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
241    
242     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
243     {
244     emit_byte(0x66);
245     emit_byte(0xc7);
246     emit_byte(0x05);
247     emit_long(d);
248     emit_word(s);
249     }
250     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
251    
252     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
253     {
254     emit_byte(0xc6);
255     emit_byte(0x05);
256     emit_long(d);
257     emit_byte(s);
258     }
259     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
260    
261     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
262     {
263     if (optimize_shift_once && (i == 1)) {
264     emit_byte(0xd0);
265     emit_byte(0x05);
266     emit_long(d);
267     }
268     else {
269     emit_byte(0xc0);
270     emit_byte(0x05);
271     emit_long(d);
272     emit_byte(i);
273     }
274     }
275     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
276    
277     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
278     {
279     if (optimize_shift_once && (i == 1)) {
280     emit_byte(0xd0);
281     emit_byte(0xc0+r);
282     }
283     else {
284     emit_byte(0xc0);
285     emit_byte(0xc0+r);
286     emit_byte(i);
287     }
288     }
289     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
290    
291     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
292     {
293     emit_byte(0x66);
294     emit_byte(0xc1);
295     emit_byte(0xc0+r);
296     emit_byte(i);
297     }
298     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
299    
300     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
301     {
302     if (optimize_shift_once && (i == 1)) {
303     emit_byte(0xd1);
304     emit_byte(0xc0+r);
305     }
306     else {
307     emit_byte(0xc1);
308     emit_byte(0xc0+r);
309     emit_byte(i);
310     }
311     }
312     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
313    
314     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
315     {
316     emit_byte(0xd3);
317     emit_byte(0xc0+d);
318     }
319     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
320    
321     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
322     {
323     emit_byte(0x66);
324     emit_byte(0xd3);
325     emit_byte(0xc0+d);
326     }
327     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
328    
329     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
330     {
331     emit_byte(0xd2);
332     emit_byte(0xc0+d);
333     }
334     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
335    
336     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
337     {
338     emit_byte(0xd3);
339     emit_byte(0xe0+d);
340     }
341     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
342    
343     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
344     {
345     emit_byte(0x66);
346     emit_byte(0xd3);
347     emit_byte(0xe0+d);
348     }
349     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
350    
351     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
352     {
353     emit_byte(0xd2);
354     emit_byte(0xe0+d);
355     }
356     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
357    
358     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
359     {
360     if (optimize_shift_once && (i == 1)) {
361     emit_byte(0xd0);
362     emit_byte(0xc8+r);
363     }
364     else {
365     emit_byte(0xc0);
366     emit_byte(0xc8+r);
367     emit_byte(i);
368     }
369     }
370     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
371    
372     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
373     {
374     emit_byte(0x66);
375     emit_byte(0xc1);
376     emit_byte(0xc8+r);
377     emit_byte(i);
378     }
379     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
380    
381     // gb-- used for making an fpcr value in compemu_fpp.cpp
382     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
383     {
384     emit_byte(0x0b);
385     emit_byte(0x05+8*d);
386     emit_long(s);
387     }
388     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
389    
390     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
391     {
392     if (optimize_shift_once && (i == 1)) {
393     emit_byte(0xd1);
394     emit_byte(0xc8+r);
395     }
396     else {
397     emit_byte(0xc1);
398     emit_byte(0xc8+r);
399     emit_byte(i);
400     }
401     }
402     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
403    
404     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
405     {
406     emit_byte(0xd3);
407     emit_byte(0xc8+d);
408     }
409     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
410    
411     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
412     {
413     emit_byte(0x66);
414     emit_byte(0xd3);
415     emit_byte(0xc8+d);
416     }
417     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
418    
419     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
420     {
421     emit_byte(0xd2);
422     emit_byte(0xc8+d);
423     }
424     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
425    
426     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
427     {
428     emit_byte(0xd3);
429     emit_byte(0xe8+d);
430     }
431     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
432    
433     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
434     {
435     emit_byte(0x66);
436     emit_byte(0xd3);
437     emit_byte(0xe8+d);
438     }
439     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
440    
441     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
442     {
443     emit_byte(0xd2);
444     emit_byte(0xe8+d);
445     }
446     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
447    
448     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
449     {
450     emit_byte(0xd3);
451     emit_byte(0xf8+d);
452     }
453     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
454    
455     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
456     {
457     emit_byte(0x66);
458     emit_byte(0xd3);
459     emit_byte(0xf8+d);
460     }
461     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
462    
463     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
464     {
465     emit_byte(0xd2);
466     emit_byte(0xf8+d);
467     }
468     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
469    
470     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
471     {
472     if (optimize_shift_once && (i == 1)) {
473     emit_byte(0xd1);
474     emit_byte(0xe0+r);
475     }
476     else {
477     emit_byte(0xc1);
478     emit_byte(0xe0+r);
479     emit_byte(i);
480     }
481     }
482     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
483    
484     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
485     {
486     emit_byte(0x66);
487     emit_byte(0xc1);
488     emit_byte(0xe0+r);
489     emit_byte(i);
490     }
491     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
492    
493     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
494     {
495     if (optimize_shift_once && (i == 1)) {
496     emit_byte(0xd0);
497     emit_byte(0xe0+r);
498     }
499     else {
500     emit_byte(0xc0);
501     emit_byte(0xe0+r);
502     emit_byte(i);
503     }
504     }
505     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
506    
507     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
508     {
509     if (optimize_shift_once && (i == 1)) {
510     emit_byte(0xd1);
511     emit_byte(0xe8+r);
512     }
513     else {
514     emit_byte(0xc1);
515     emit_byte(0xe8+r);
516     emit_byte(i);
517     }
518     }
519     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
520    
521     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
522     {
523     emit_byte(0x66);
524     emit_byte(0xc1);
525     emit_byte(0xe8+r);
526     emit_byte(i);
527     }
528     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
529    
530     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
531     {
532     if (optimize_shift_once && (i == 1)) {
533     emit_byte(0xd0);
534     emit_byte(0xe8+r);
535     }
536     else {
537     emit_byte(0xc0);
538     emit_byte(0xe8+r);
539     emit_byte(i);
540     }
541     }
542     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
543    
544     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
545     {
546     if (optimize_shift_once && (i == 1)) {
547     emit_byte(0xd1);
548     emit_byte(0xf8+r);
549     }
550     else {
551     emit_byte(0xc1);
552     emit_byte(0xf8+r);
553     emit_byte(i);
554     }
555     }
556     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
557    
558     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
559     {
560     emit_byte(0x66);
561     emit_byte(0xc1);
562     emit_byte(0xf8+r);
563     emit_byte(i);
564     }
565     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
566    
567     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
568     {
569     if (optimize_shift_once && (i == 1)) {
570     emit_byte(0xd0);
571     emit_byte(0xf8+r);
572     }
573     else {
574     emit_byte(0xc0);
575     emit_byte(0xf8+r);
576     emit_byte(i);
577     }
578     }
579     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
580    
581     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
582     {
583     emit_byte(0x9e);
584     }
585     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
586    
587     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
588     {
589     emit_byte(0x0f);
590     emit_byte(0xa2);
591     }
592     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
593    
594     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
595     {
596     emit_byte(0x9f);
597     }
598     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
599    
600     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
601     {
602     emit_byte(0x0f);
603     emit_byte(0x90+cc);
604     emit_byte(0xc0+d);
605     }
606     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
607    
608     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
609     {
610     emit_byte(0x0f);
611     emit_byte(0x90+cc);
612     emit_byte(0x05);
613     emit_long(d);
614     }
615     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
616    
617     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
618     {
619     if (have_cmov) {
620     emit_byte(0x0f);
621     emit_byte(0x40+cc);
622     emit_byte(0xc0+8*d+s);
623     }
624     else { /* replacement using branch and mov */
625     int uncc=(cc^1);
626     emit_byte(0x70+uncc);
627     emit_byte(2); /* skip next 2 bytes if not cc=true */
628     emit_byte(0x89);
629     emit_byte(0xc0+8*s+d);
630     }
631     }
632     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
633    
634     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
635     {
636     emit_byte(0x0f);
637     emit_byte(0xbc);
638     emit_byte(0xc0+8*d+s);
639     }
640     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
641    
642     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
643     {
644     emit_byte(0x0f);
645     emit_byte(0xbf);
646     emit_byte(0xc0+8*d+s);
647     }
648     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
649    
650     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
651     {
652     emit_byte(0x0f);
653     emit_byte(0xbe);
654     emit_byte(0xc0+8*d+s);
655     }
656     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
657    
658     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
659     {
660     emit_byte(0x0f);
661     emit_byte(0xb7);
662     emit_byte(0xc0+8*d+s);
663     }
664     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
665    
666     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
667     {
668     emit_byte(0x0f);
669     emit_byte(0xb6);
670     emit_byte(0xc0+8*d+s);
671     }
672     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
673    
674     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
675     {
676     emit_byte(0x0f);
677     emit_byte(0xaf);
678     emit_byte(0xc0+8*d+s);
679     }
680     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
681    
682     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
683     {
684     if (d!=MUL_NREG1 || s!=MUL_NREG2)
685     abort();
686     emit_byte(0xf7);
687     emit_byte(0xea);
688     }
689     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
690    
691     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
692     {
693     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
694     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
695     abort();
696     }
697     emit_byte(0xf7);
698     emit_byte(0xe2);
699     }
700     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
701    
702     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
703     {
704     abort(); /* %^$&%^$%#^ x86! */
705     emit_byte(0x0f);
706     emit_byte(0xaf);
707     emit_byte(0xc0+8*d+s);
708     }
709     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
710    
711     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
712     {
713     emit_byte(0x88);
714     emit_byte(0xc0+8*s+d);
715     }
716     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
717    
718     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
719     {
720     emit_byte(0x66);
721     emit_byte(0x89);
722     emit_byte(0xc0+8*s+d);
723     }
724     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
725    
726     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
727     {
728     int isebp=(baser==5)?0x40:0;
729     int fi;
730    
731     switch(factor) {
732     case 1: fi=0; break;
733     case 2: fi=1; break;
734     case 4: fi=2; break;
735     case 8: fi=3; break;
736     default: abort();
737     }
738    
739    
740     emit_byte(0x8b);
741     emit_byte(0x04+8*d+isebp);
742     emit_byte(baser+8*index+0x40*fi);
743     if (isebp)
744     emit_byte(0x00);
745     }
746     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
747    
748     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
749     {
750     int fi;
751     int isebp;
752    
753     switch(factor) {
754     case 1: fi=0; break;
755     case 2: fi=1; break;
756     case 4: fi=2; break;
757     case 8: fi=3; break;
758     default: abort();
759     }
760     isebp=(baser==5)?0x40:0;
761    
762     emit_byte(0x66);
763     emit_byte(0x8b);
764     emit_byte(0x04+8*d+isebp);
765     emit_byte(baser+8*index+0x40*fi);
766     if (isebp)
767     emit_byte(0x00);
768     }
769     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
770    
771     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
772     {
773     int fi;
774     int isebp;
775    
776     switch(factor) {
777     case 1: fi=0; break;
778     case 2: fi=1; break;
779     case 4: fi=2; break;
780     case 8: fi=3; break;
781     default: abort();
782     }
783     isebp=(baser==5)?0x40:0;
784    
785     emit_byte(0x8a);
786     emit_byte(0x04+8*d+isebp);
787     emit_byte(baser+8*index+0x40*fi);
788     if (isebp)
789     emit_byte(0x00);
790     }
791     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
792    
793     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
794     {
795     int fi;
796     int isebp;
797    
798     switch(factor) {
799     case 1: fi=0; break;
800     case 2: fi=1; break;
801     case 4: fi=2; break;
802     case 8: fi=3; break;
803     default: abort();
804     }
805    
806    
807     isebp=(baser==5)?0x40:0;
808    
809     emit_byte(0x89);
810     emit_byte(0x04+8*s+isebp);
811     emit_byte(baser+8*index+0x40*fi);
812     if (isebp)
813     emit_byte(0x00);
814     }
815     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
816    
817     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
818     {
819     int fi;
820     int isebp;
821    
822     switch(factor) {
823     case 1: fi=0; break;
824     case 2: fi=1; break;
825     case 4: fi=2; break;
826     case 8: fi=3; break;
827     default: abort();
828     }
829     isebp=(baser==5)?0x40:0;
830    
831     emit_byte(0x66);
832     emit_byte(0x89);
833     emit_byte(0x04+8*s+isebp);
834     emit_byte(baser+8*index+0x40*fi);
835     if (isebp)
836     emit_byte(0x00);
837     }
838     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
839    
840     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
841     {
842     int fi;
843     int isebp;
844    
845     switch(factor) {
846     case 1: fi=0; break;
847     case 2: fi=1; break;
848     case 4: fi=2; break;
849     case 8: fi=3; break;
850     default: abort();
851     }
852     isebp=(baser==5)?0x40:0;
853    
854     emit_byte(0x88);
855     emit_byte(0x04+8*s+isebp);
856     emit_byte(baser+8*index+0x40*fi);
857     if (isebp)
858     emit_byte(0x00);
859     }
860     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
861    
862     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
863     {
864     int fi;
865    
866     switch(factor) {
867     case 1: fi=0; break;
868     case 2: fi=1; break;
869     case 4: fi=2; break;
870     case 8: fi=3; break;
871     default: abort();
872     }
873    
874     emit_byte(0x89);
875     emit_byte(0x84+8*s);
876     emit_byte(baser+8*index+0x40*fi);
877     emit_long(base);
878     }
879     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
880    
881     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
882     {
883     int fi;
884    
885     switch(factor) {
886     case 1: fi=0; break;
887     case 2: fi=1; break;
888     case 4: fi=2; break;
889     case 8: fi=3; break;
890     default: abort();
891     }
892    
893     emit_byte(0x66);
894     emit_byte(0x89);
895     emit_byte(0x84+8*s);
896     emit_byte(baser+8*index+0x40*fi);
897     emit_long(base);
898     }
899     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
900    
901     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
902     {
903     int fi;
904    
905     switch(factor) {
906     case 1: fi=0; break;
907     case 2: fi=1; break;
908     case 4: fi=2; break;
909     case 8: fi=3; break;
910     default: abort();
911     }
912    
913     emit_byte(0x88);
914     emit_byte(0x84+8*s);
915     emit_byte(baser+8*index+0x40*fi);
916     emit_long(base);
917     }
918     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
919    
920     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
921     {
922     int fi;
923    
924     switch(factor) {
925     case 1: fi=0; break;
926     case 2: fi=1; break;
927     case 4: fi=2; break;
928     case 8: fi=3; break;
929     default: abort();
930     }
931    
932     emit_byte(0x8b);
933     emit_byte(0x84+8*d);
934     emit_byte(baser+8*index+0x40*fi);
935     emit_long(base);
936     }
937     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
938    
939     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
940     {
941     int fi;
942    
943     switch(factor) {
944     case 1: fi=0; break;
945     case 2: fi=1; break;
946     case 4: fi=2; break;
947     case 8: fi=3; break;
948     default: abort();
949     }
950    
951     emit_byte(0x66);
952     emit_byte(0x8b);
953     emit_byte(0x84+8*d);
954     emit_byte(baser+8*index+0x40*fi);
955     emit_long(base);
956     }
957     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
958    
959     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
960     {
961     int fi;
962    
963     switch(factor) {
964     case 1: fi=0; break;
965     case 2: fi=1; break;
966     case 4: fi=2; break;
967     case 8: fi=3; break;
968     default: abort();
969     }
970    
971     emit_byte(0x8a);
972     emit_byte(0x84+8*d);
973     emit_byte(baser+8*index+0x40*fi);
974     emit_long(base);
975     }
976     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
977    
978     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
979     {
980     int fi;
981     switch(factor) {
982     case 1: fi=0; break;
983     case 2: fi=1; break;
984     case 4: fi=2; break;
985     case 8: fi=3; break;
986     default:
987     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
988     abort();
989     }
990     emit_byte(0x8b);
991     emit_byte(0x04+8*d);
992     emit_byte(0x05+8*index+64*fi);
993     emit_long(base);
994     }
995     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
996    
997     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
998     {
999     int fi;
1000     switch(factor) {
1001     case 1: fi=0; break;
1002     case 2: fi=1; break;
1003     case 4: fi=2; break;
1004     case 8: fi=3; break;
1005     default:
1006     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1007     abort();
1008     }
1009     if (have_cmov) {
1010     emit_byte(0x0f);
1011     emit_byte(0x40+cond);
1012     emit_byte(0x04+8*d);
1013     emit_byte(0x05+8*index+64*fi);
1014     emit_long(base);
1015     }
1016     else { /* replacement using branch and mov */
1017     int uncc=(cond^1);
1018     emit_byte(0x70+uncc);
1019     emit_byte(7); /* skip next 7 bytes if not cc=true */
1020     emit_byte(0x8b);
1021     emit_byte(0x04+8*d);
1022     emit_byte(0x05+8*index+64*fi);
1023     emit_long(base);
1024     }
1025     }
1026     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1027    
1028     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1029     {
1030     if (have_cmov) {
1031     emit_byte(0x0f);
1032     emit_byte(0x40+cond);
1033     emit_byte(0x05+8*d);
1034     emit_long(mem);
1035     }
1036     else { /* replacement using branch and mov */
1037     int uncc=(cond^1);
1038     emit_byte(0x70+uncc);
1039     emit_byte(6); /* skip next 6 bytes if not cc=true */
1040     emit_byte(0x8b);
1041     emit_byte(0x05+8*d);
1042     emit_long(mem);
1043     }
1044     }
1045     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1046    
1047     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1048     {
1049     emit_byte(0x8b);
1050     emit_byte(0x40+8*d+s);
1051     emit_byte(offset);
1052     }
1053     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1054    
1055     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1056     {
1057     emit_byte(0x66);
1058     emit_byte(0x8b);
1059     emit_byte(0x40+8*d+s);
1060     emit_byte(offset);
1061     }
1062     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1063    
1064     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1065     {
1066     emit_byte(0x8a);
1067     emit_byte(0x40+8*d+s);
1068     emit_byte(offset);
1069     }
1070     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1071    
1072     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1073     {
1074     emit_byte(0x8b);
1075     emit_byte(0x80+8*d+s);
1076     emit_long(offset);
1077     }
1078     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1079    
1080     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1081     {
1082     emit_byte(0x66);
1083     emit_byte(0x8b);
1084     emit_byte(0x80+8*d+s);
1085     emit_long(offset);
1086     }
1087     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1088    
1089     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1090     {
1091     emit_byte(0x8a);
1092     emit_byte(0x80+8*d+s);
1093     emit_long(offset);
1094     }
1095     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1096    
1097     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1098     {
1099     emit_byte(0xc7);
1100     emit_byte(0x40+d);
1101     emit_byte(offset);
1102     emit_long(i);
1103     }
1104     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1105    
1106     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1107     {
1108     emit_byte(0x66);
1109     emit_byte(0xc7);
1110     emit_byte(0x40+d);
1111     emit_byte(offset);
1112     emit_word(i);
1113     }
1114     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1115    
1116     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1117     {
1118     emit_byte(0xc6);
1119     emit_byte(0x40+d);
1120     emit_byte(offset);
1121     emit_byte(i);
1122     }
1123     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1124    
1125     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1126     {
1127     emit_byte(0x89);
1128     emit_byte(0x40+8*s+d);
1129     emit_byte(offset);
1130     }
1131     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1132    
1133     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1134     {
1135     emit_byte(0x66);
1136     emit_byte(0x89);
1137     emit_byte(0x40+8*s+d);
1138     emit_byte(offset);
1139     }
1140     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1141    
1142     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1143     {
1144     emit_byte(0x88);
1145     emit_byte(0x40+8*s+d);
1146     emit_byte(offset);
1147     }
1148     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1149    
1150     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1151     {
1152     if (optimize_imm8 && isbyte(offset)) {
1153     emit_byte(0x8d);
1154     emit_byte(0x40+8*d+s);
1155     emit_byte(offset);
1156     }
1157     else {
1158     emit_byte(0x8d);
1159     emit_byte(0x80+8*d+s);
1160     emit_long(offset);
1161     }
1162     }
1163     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1164    
1165     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1166     {
1167     int fi;
1168    
1169     switch(factor) {
1170     case 1: fi=0; break;
1171     case 2: fi=1; break;
1172     case 4: fi=2; break;
1173     case 8: fi=3; break;
1174     default: abort();
1175     }
1176    
1177     if (optimize_imm8 && isbyte(offset)) {
1178     emit_byte(0x8d);
1179     emit_byte(0x44+8*d);
1180     emit_byte(0x40*fi+8*index+s);
1181     emit_byte(offset);
1182     }
1183     else {
1184     emit_byte(0x8d);
1185     emit_byte(0x84+8*d);
1186     emit_byte(0x40*fi+8*index+s);
1187     emit_long(offset);
1188     }
1189     }
1190     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1191    
1192     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1193     {
1194     int isebp=(s==5)?0x40:0;
1195     int fi;
1196    
1197     switch(factor) {
1198     case 1: fi=0; break;
1199     case 2: fi=1; break;
1200     case 4: fi=2; break;
1201     case 8: fi=3; break;
1202     default: abort();
1203     }
1204    
1205     emit_byte(0x8d);
1206     emit_byte(0x04+8*d+isebp);
1207     emit_byte(0x40*fi+8*index+s);
1208     if (isebp)
1209     emit_byte(0);
1210     }
1211     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1212    
1213     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1214     {
1215     if (optimize_imm8 && isbyte(offset)) {
1216     emit_byte(0x89);
1217     emit_byte(0x40+8*s+d);
1218     emit_byte(offset);
1219     }
1220     else {
1221     emit_byte(0x89);
1222     emit_byte(0x80+8*s+d);
1223     emit_long(offset);
1224     }
1225     }
1226     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1227    
1228     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1229     {
1230     emit_byte(0x66);
1231     emit_byte(0x89);
1232     emit_byte(0x80+8*s+d);
1233     emit_long(offset);
1234     }
1235     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1236    
1237     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1238     {
1239     if (optimize_imm8 && isbyte(offset)) {
1240     emit_byte(0x88);
1241     emit_byte(0x40+8*s+d);
1242     emit_byte(offset);
1243     }
1244     else {
1245     emit_byte(0x88);
1246     emit_byte(0x80+8*s+d);
1247     emit_long(offset);
1248     }
1249     }
1250     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1251    
1252     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1253     {
1254     emit_byte(0x0f);
1255     emit_byte(0xc8+r);
1256     }
1257     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1258    
1259     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1260     {
1261     emit_byte(0x66);
1262     emit_byte(0xc1);
1263     emit_byte(0xc0+r);
1264     emit_byte(0x08);
1265     }
1266     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1267    
1268     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1269     {
1270     emit_byte(0x89);
1271     emit_byte(0xc0+8*s+d);
1272     }
1273     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1274    
1275     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1276     {
1277     emit_byte(0x89);
1278     emit_byte(0x05+8*s);
1279     emit_long(d);
1280     }
1281     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1282    
1283     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1284     {
1285     emit_byte(0x66);
1286     emit_byte(0x89);
1287     emit_byte(0x05+8*s);
1288     emit_long(d);
1289     }
1290     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1291    
1292     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1293     {
1294     emit_byte(0x66);
1295     emit_byte(0x8b);
1296     emit_byte(0x05+8*d);
1297     emit_long(s);
1298     }
1299     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1300    
1301     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1302     {
1303     emit_byte(0x88);
1304     emit_byte(0x05+8*s);
1305     emit_long(d);
1306     }
1307     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1308    
1309     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1310     {
1311     emit_byte(0x8a);
1312     emit_byte(0x05+8*d);
1313     emit_long(s);
1314     }
1315     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1316    
1317     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1318     {
1319     emit_byte(0xb8+d);
1320     emit_long(s);
1321     }
1322     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1323    
1324     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1325     {
1326     emit_byte(0x66);
1327     emit_byte(0xb8+d);
1328     emit_word(s);
1329     }
1330     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1331    
1332     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1333     {
1334     emit_byte(0xb0+d);
1335     emit_byte(s);
1336     }
1337     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1338    
1339     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1340     {
1341     emit_byte(0x81);
1342     emit_byte(0x15);
1343     emit_long(d);
1344     emit_long(s);
1345     }
1346     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1347    
1348     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1349     {
1350     if (optimize_imm8 && isbyte(s)) {
1351     emit_byte(0x83);
1352     emit_byte(0x05);
1353     emit_long(d);
1354     emit_byte(s);
1355     }
1356     else {
1357     emit_byte(0x81);
1358     emit_byte(0x05);
1359     emit_long(d);
1360     emit_long(s);
1361     }
1362     }
1363     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1364    
1365     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1366     {
1367     emit_byte(0x66);
1368     emit_byte(0x81);
1369     emit_byte(0x05);
1370     emit_long(d);
1371     emit_word(s);
1372     }
1373     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1374    
1375     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1376     {
1377     emit_byte(0x80);
1378     emit_byte(0x05);
1379     emit_long(d);
1380     emit_byte(s);
1381     }
1382     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1383    
1384     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1385     {
1386 gbeauche 1.2 if (optimize_accum && isaccum(d))
1387     emit_byte(0xa9);
1388     else {
1389 gbeauche 1.1 emit_byte(0xf7);
1390     emit_byte(0xc0+d);
1391 gbeauche 1.2 }
1392 gbeauche 1.1 emit_long(i);
1393     }
1394     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1395    
1396     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1397     {
1398     emit_byte(0x85);
1399     emit_byte(0xc0+8*s+d);
1400     }
1401     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1402    
1403     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1404     {
1405     emit_byte(0x66);
1406     emit_byte(0x85);
1407     emit_byte(0xc0+8*s+d);
1408     }
1409     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1410    
1411     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1412     {
1413     emit_byte(0x84);
1414     emit_byte(0xc0+8*s+d);
1415     }
1416     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1417    
1418     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1419     {
1420     if (optimize_imm8 && isbyte(i)) {
1421 gbeauche 1.2 emit_byte(0x83);
1422     emit_byte(0xe0+d);
1423     emit_byte(i);
1424 gbeauche 1.1 }
1425     else {
1426 gbeauche 1.2 if (optimize_accum && isaccum(d))
1427     emit_byte(0x25);
1428     else {
1429     emit_byte(0x81);
1430     emit_byte(0xe0+d);
1431     }
1432     emit_long(i);
1433 gbeauche 1.1 }
1434     }
1435     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1436    
1437     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1438     {
1439 gbeauche 1.2 emit_byte(0x66);
1440     if (optimize_imm8 && isbyte(i)) {
1441     emit_byte(0x83);
1442     emit_byte(0xe0+d);
1443     emit_byte(i);
1444     }
1445     else {
1446     if (optimize_accum && isaccum(d))
1447     emit_byte(0x25);
1448     else {
1449     emit_byte(0x81);
1450     emit_byte(0xe0+d);
1451     }
1452     emit_word(i);
1453     }
1454 gbeauche 1.1 }
1455     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1456    
1457     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1458     {
1459     emit_byte(0x21);
1460     emit_byte(0xc0+8*s+d);
1461     }
1462     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1463    
1464     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1465     {
1466     emit_byte(0x66);
1467     emit_byte(0x21);
1468     emit_byte(0xc0+8*s+d);
1469     }
1470     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1471    
1472     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1473     {
1474     emit_byte(0x20);
1475     emit_byte(0xc0+8*s+d);
1476     }
1477     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1478    
1479     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1480     {
1481     if (optimize_imm8 && isbyte(i)) {
1482     emit_byte(0x83);
1483     emit_byte(0xc8+d);
1484     emit_byte(i);
1485     }
1486     else {
1487 gbeauche 1.2 if (optimize_accum && isaccum(d))
1488     emit_byte(0x0d);
1489     else {
1490 gbeauche 1.1 emit_byte(0x81);
1491     emit_byte(0xc8+d);
1492 gbeauche 1.2 }
1493 gbeauche 1.1 emit_long(i);
1494     }
1495     }
1496     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1497    
1498     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1499     {
1500     emit_byte(0x09);
1501     emit_byte(0xc0+8*s+d);
1502     }
1503     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1504    
1505     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1506     {
1507     emit_byte(0x66);
1508     emit_byte(0x09);
1509     emit_byte(0xc0+8*s+d);
1510     }
1511     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1512    
1513     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1514     {
1515     emit_byte(0x08);
1516     emit_byte(0xc0+8*s+d);
1517     }
1518     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1519    
1520     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1521     {
1522     emit_byte(0x11);
1523     emit_byte(0xc0+8*s+d);
1524     }
1525     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1526    
1527     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1528     {
1529     emit_byte(0x66);
1530     emit_byte(0x11);
1531     emit_byte(0xc0+8*s+d);
1532     }
1533     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1534    
1535     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1536     {
1537     emit_byte(0x10);
1538     emit_byte(0xc0+8*s+d);
1539     }
1540     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1541    
1542     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1543     {
1544     emit_byte(0x01);
1545     emit_byte(0xc0+8*s+d);
1546     }
1547     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1548    
1549     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1550     {
1551     emit_byte(0x66);
1552     emit_byte(0x01);
1553     emit_byte(0xc0+8*s+d);
1554     }
1555     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1556    
1557     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1558     {
1559     emit_byte(0x00);
1560     emit_byte(0xc0+8*s+d);
1561     }
1562     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1563    
1564     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1565     {
1566     if (isbyte(i)) {
1567     emit_byte(0x83);
1568     emit_byte(0xe8+d);
1569     emit_byte(i);
1570     }
1571     else {
1572 gbeauche 1.2 if (optimize_accum && isaccum(d))
1573     emit_byte(0x2d);
1574     else {
1575 gbeauche 1.1 emit_byte(0x81);
1576     emit_byte(0xe8+d);
1577 gbeauche 1.2 }
1578 gbeauche 1.1 emit_long(i);
1579     }
1580     }
1581     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1582    
1583     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1584     {
1585 gbeauche 1.2 if (optimize_accum && isaccum(d))
1586     emit_byte(0x2c);
1587     else {
1588 gbeauche 1.1 emit_byte(0x80);
1589     emit_byte(0xe8+d);
1590 gbeauche 1.2 }
1591 gbeauche 1.1 emit_byte(i);
1592     }
1593     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1594    
1595     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1596     {
1597     if (isbyte(i)) {
1598     emit_byte(0x83);
1599     emit_byte(0xc0+d);
1600     emit_byte(i);
1601     }
1602     else {
1603 gbeauche 1.2 if (optimize_accum && isaccum(d))
1604     emit_byte(0x05);
1605     else {
1606 gbeauche 1.1 emit_byte(0x81);
1607     emit_byte(0xc0+d);
1608 gbeauche 1.2 }
1609 gbeauche 1.1 emit_long(i);
1610     }
1611     }
1612     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1613    
1614     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1615     {
1616 gbeauche 1.2 emit_byte(0x66);
1617 gbeauche 1.1 if (isbyte(i)) {
1618     emit_byte(0x83);
1619     emit_byte(0xc0+d);
1620     emit_byte(i);
1621     }
1622     else {
1623 gbeauche 1.2 if (optimize_accum && isaccum(d))
1624     emit_byte(0x05);
1625     else {
1626 gbeauche 1.1 emit_byte(0x81);
1627     emit_byte(0xc0+d);
1628 gbeauche 1.2 }
1629 gbeauche 1.1 emit_word(i);
1630     }
1631     }
1632     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1633    
1634     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1635     {
1636 gbeauche 1.2 if (optimize_accum && isaccum(d))
1637     emit_byte(0x04);
1638     else {
1639     emit_byte(0x80);
1640     emit_byte(0xc0+d);
1641     }
1642 gbeauche 1.1 emit_byte(i);
1643     }
1644     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1645    
1646     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1647     {
1648     emit_byte(0x19);
1649     emit_byte(0xc0+8*s+d);
1650     }
1651     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1652    
1653     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1654     {
1655     emit_byte(0x66);
1656     emit_byte(0x19);
1657     emit_byte(0xc0+8*s+d);
1658     }
1659     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1660    
1661     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1662     {
1663     emit_byte(0x18);
1664     emit_byte(0xc0+8*s+d);
1665     }
1666     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1667    
1668     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1669     {
1670     emit_byte(0x29);
1671     emit_byte(0xc0+8*s+d);
1672     }
1673     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1674    
1675     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1676     {
1677     emit_byte(0x66);
1678     emit_byte(0x29);
1679     emit_byte(0xc0+8*s+d);
1680     }
1681     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1682    
1683     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1684     {
1685     emit_byte(0x28);
1686     emit_byte(0xc0+8*s+d);
1687     }
1688     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1689    
1690     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1691     {
1692     emit_byte(0x39);
1693     emit_byte(0xc0+8*s+d);
1694     }
1695     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1696    
1697     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1698     {
1699     if (optimize_imm8 && isbyte(i)) {
1700     emit_byte(0x83);
1701     emit_byte(0xf8+r);
1702     emit_byte(i);
1703     }
1704     else {
1705 gbeauche 1.2 if (optimize_accum && isaccum(r))
1706     emit_byte(0x3d);
1707     else {
1708 gbeauche 1.1 emit_byte(0x81);
1709     emit_byte(0xf8+r);
1710 gbeauche 1.2 }
1711 gbeauche 1.1 emit_long(i);
1712     }
1713     }
1714     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1715    
1716     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1717     {
1718     emit_byte(0x66);
1719     emit_byte(0x39);
1720     emit_byte(0xc0+8*s+d);
1721     }
1722     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1723    
1724 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1725     {
1726     emit_byte(0x80);
1727     emit_byte(0x3d);
1728     emit_long(d);
1729     emit_byte(s);
1730     }
1731     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1732    
1733 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1734     {
1735 gbeauche 1.2 if (optimize_accum && isaccum(d))
1736     emit_byte(0x3c);
1737     else {
1738 gbeauche 1.1 emit_byte(0x80);
1739     emit_byte(0xf8+d);
1740 gbeauche 1.2 }
1741 gbeauche 1.1 emit_byte(i);
1742     }
1743     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1744    
1745     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1746     {
1747     emit_byte(0x38);
1748     emit_byte(0xc0+8*s+d);
1749     }
1750     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1751    
1752     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1753     {
1754     int fi;
1755    
1756     switch(factor) {
1757     case 1: fi=0; break;
1758     case 2: fi=1; break;
1759     case 4: fi=2; break;
1760     case 8: fi=3; break;
1761     default: abort();
1762     }
1763     emit_byte(0x39);
1764     emit_byte(0x04+8*d);
1765     emit_byte(5+8*index+0x40*fi);
1766     emit_long(offset);
1767     }
1768     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1769    
1770     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1771     {
1772     emit_byte(0x31);
1773     emit_byte(0xc0+8*s+d);
1774     }
1775     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1776    
1777     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1778     {
1779     emit_byte(0x66);
1780     emit_byte(0x31);
1781     emit_byte(0xc0+8*s+d);
1782     }
1783     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1784    
1785     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1786     {
1787     emit_byte(0x30);
1788     emit_byte(0xc0+8*s+d);
1789     }
1790     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1791    
1792     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1793     {
1794     if (optimize_imm8 && isbyte(s)) {
1795     emit_byte(0x83);
1796     emit_byte(0x2d);
1797     emit_long(d);
1798     emit_byte(s);
1799     }
1800     else {
1801     emit_byte(0x81);
1802     emit_byte(0x2d);
1803     emit_long(d);
1804     emit_long(s);
1805     }
1806     }
1807     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1808    
1809     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1810     {
1811     if (optimize_imm8 && isbyte(s)) {
1812     emit_byte(0x83);
1813     emit_byte(0x3d);
1814     emit_long(d);
1815     emit_byte(s);
1816     }
1817     else {
1818     emit_byte(0x81);
1819     emit_byte(0x3d);
1820     emit_long(d);
1821     emit_long(s);
1822     }
1823     }
1824     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1825    
1826     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1827     {
1828     emit_byte(0x87);
1829     emit_byte(0xc0+8*r1+r2);
1830     }
1831     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1832    
1833     /*************************************************************************
1834     * FIXME: string-related instructions *
1835     *************************************************************************/
1836    
1837     LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1838     {
1839     emit_byte(0xfc);
1840     }
1841     LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1842    
1843     LOWFUNC(WRITE,NONE,0,raw_std,(void))
1844     {
1845     emit_byte(0xfd);
1846     }
1847     LENDFUNC(WRITE,NONE,0,raw_std,(void))
1848    
1849     LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1850     {
1851     emit_byte(0xa4);
1852     }
1853     LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1854    
1855     LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1856     {
1857     emit_byte(0xa5);
1858     }
1859     LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1860    
1861     LOWFUNC(NONE,RMW,0,raw_rep,(void))
1862     {
1863     emit_byte(0xf3);
1864     }
1865     LENDFUNC(NONE,RMW,0,raw_rep,(void))
1866    
1867     LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1868     {
1869     raw_rep();
1870     raw_movs_b();
1871     }
1872     LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1873    
1874     LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1875     {
1876     raw_rep();
1877     raw_movs_l();
1878     }
1879     LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1880    
1881     /*************************************************************************
1882     * FIXME: mem access modes probably wrong *
1883     *************************************************************************/
1884    
1885     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1886     {
1887     emit_byte(0x9c);
1888     }
1889     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1890    
1891     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1892     {
1893     emit_byte(0x9d);
1894     }
1895     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1896    
1897     /*************************************************************************
1898     * Unoptimizable stuff --- jump *
1899     *************************************************************************/
1900    
1901     static __inline__ void raw_call_r(R4 r)
1902     {
1903     emit_byte(0xff);
1904     emit_byte(0xd0+r);
1905 gbeauche 1.5 }
1906    
1907     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1908     {
1909     int mu;
1910     switch(m) {
1911     case 1: mu=0; break;
1912     case 2: mu=1; break;
1913     case 4: mu=2; break;
1914     case 8: mu=3; break;
1915     default: abort();
1916     }
1917     emit_byte(0xff);
1918     emit_byte(0x14);
1919     emit_byte(0x05+8*r+0x40*mu);
1920     emit_long(base);
1921 gbeauche 1.1 }
1922    
1923     static __inline__ void raw_jmp_r(R4 r)
1924     {
1925     emit_byte(0xff);
1926     emit_byte(0xe0+r);
1927     }
1928    
1929     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1930     {
1931     int mu;
1932     switch(m) {
1933     case 1: mu=0; break;
1934     case 2: mu=1; break;
1935     case 4: mu=2; break;
1936     case 8: mu=3; break;
1937     default: abort();
1938     }
1939     emit_byte(0xff);
1940     emit_byte(0x24);
1941     emit_byte(0x05+8*r+0x40*mu);
1942     emit_long(base);
1943     }
1944    
1945     static __inline__ void raw_jmp_m(uae_u32 base)
1946     {
1947     emit_byte(0xff);
1948     emit_byte(0x25);
1949     emit_long(base);
1950     }
1951    
1952    
1953     static __inline__ void raw_call(uae_u32 t)
1954     {
1955     emit_byte(0xe8);
1956     emit_long(t-(uae_u32)target-4);
1957     }
1958    
1959     static __inline__ void raw_jmp(uae_u32 t)
1960     {
1961     emit_byte(0xe9);
1962     emit_long(t-(uae_u32)target-4);
1963     }
1964    
1965     static __inline__ void raw_jl(uae_u32 t)
1966     {
1967     emit_byte(0x0f);
1968     emit_byte(0x8c);
1969     emit_long(t-(uae_u32)target-4);
1970     }
1971    
1972     static __inline__ void raw_jz(uae_u32 t)
1973     {
1974     emit_byte(0x0f);
1975     emit_byte(0x84);
1976     emit_long(t-(uae_u32)target-4);
1977     }
1978    
1979     static __inline__ void raw_jnz(uae_u32 t)
1980     {
1981     emit_byte(0x0f);
1982     emit_byte(0x85);
1983     emit_long(t-(uae_u32)target-4);
1984     }
1985    
1986     static __inline__ void raw_jnz_l_oponly(void)
1987     {
1988     emit_byte(0x0f);
1989     emit_byte(0x85);
1990     }
1991    
1992     static __inline__ void raw_jcc_l_oponly(int cc)
1993     {
1994     emit_byte(0x0f);
1995     emit_byte(0x80+cc);
1996     }
1997    
1998     static __inline__ void raw_jnz_b_oponly(void)
1999     {
2000     emit_byte(0x75);
2001     }
2002    
2003     static __inline__ void raw_jz_b_oponly(void)
2004     {
2005     emit_byte(0x74);
2006     }
2007    
2008     static __inline__ void raw_jcc_b_oponly(int cc)
2009     {
2010     emit_byte(0x70+cc);
2011     }
2012    
2013     static __inline__ void raw_jmp_l_oponly(void)
2014     {
2015     emit_byte(0xe9);
2016     }
2017    
2018     static __inline__ void raw_jmp_b_oponly(void)
2019     {
2020     emit_byte(0xeb);
2021     }
2022    
2023     static __inline__ void raw_ret(void)
2024     {
2025     emit_byte(0xc3);
2026     }
2027    
2028     static __inline__ void raw_nop(void)
2029     {
2030     emit_byte(0x90);
2031     }
2032    
2033    
2034     /*************************************************************************
2035     * Flag handling, to and fro UAE flag register *
2036     *************************************************************************/
2037    
2038     #ifdef SAHF_SETO_PROFITABLE
2039    
2040     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
2041    
2042     static __inline__ void raw_flags_to_reg(int r)
2043     {
2044     raw_lahf(0); /* Most flags in AH */
2045     //raw_setcc(r,0); /* V flag in AL */
2046     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2047    
2048     #if 1 /* Let's avoid those nasty partial register stalls */
2049     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2050     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
2051     //live.state[FLAGTMP].status=CLEAN;
2052     live.state[FLAGTMP].status=INMEM;
2053     live.state[FLAGTMP].realreg=-1;
2054     /* We just "evicted" FLAGTMP. */
2055     if (live.nat[r].nholds!=1) {
2056     /* Huh? */
2057     abort();
2058     }
2059     live.nat[r].nholds=0;
2060     #endif
2061     }
2062    
2063     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
2064     static __inline__ void raw_reg_to_flags(int r)
2065     {
2066     raw_cmp_b_ri(r,-127); /* set V */
2067     raw_sahf(0);
2068     }
2069    
2070     #else
2071    
2072     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
2073     static __inline__ void raw_flags_to_reg(int r)
2074     {
2075     raw_pushfl();
2076     raw_pop_l_r(r);
2077     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
2078     // live.state[FLAGTMP].status=CLEAN;
2079     live.state[FLAGTMP].status=INMEM;
2080     live.state[FLAGTMP].realreg=-1;
2081     /* We just "evicted" FLAGTMP. */
2082     if (live.nat[r].nholds!=1) {
2083     /* Huh? */
2084     abort();
2085     }
2086     live.nat[r].nholds=0;
2087     }
2088    
2089     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2090     static __inline__ void raw_reg_to_flags(int r)
2091     {
2092     raw_push_l_r(r);
2093     raw_popfl();
2094     }
2095    
2096     #endif
2097    
2098     /* Apparently, there are enough instructions between flag store and
2099     flag reload to avoid the partial memory stall */
2100     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2101     {
2102     #if 1
2103     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2104     #else
2105     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2106     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2107     #endif
2108     }
2109    
2110     /* FLAGX is byte sized, and we *do* write it at that size */
2111     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2112     {
2113     if (live.nat[target].canbyte)
2114     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2115     else if (live.nat[target].canword)
2116     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2117     else
2118     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2119     }
2120    
2121    
2122     static __inline__ void raw_inc_sp(int off)
2123     {
2124 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
2125 gbeauche 1.1 }
2126    
2127     /*************************************************************************
2128     * Handling mistaken direct memory access *
2129     *************************************************************************/
2130    
2131     // gb-- I don't need that part for JIT Basilisk II
2132     #if defined(NATMEM_OFFSET) && 0
2133     #include <asm/sigcontext.h>
2134     #include <signal.h>
2135    
2136     #define SIG_READ 1
2137     #define SIG_WRITE 2
2138    
2139     static int in_handler=0;
2140     static uae_u8 veccode[256];
2141    
2142     static void vec(int x, struct sigcontext sc)
2143     {
2144     uae_u8* i=(uae_u8*)sc.eip;
2145     uae_u32 addr=sc.cr2;
2146     int r=-1;
2147     int size=4;
2148     int dir=-1;
2149     int len=0;
2150     int j;
2151    
2152     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2153     if (!canbang)
2154     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2155     if (in_handler)
2156     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2157    
2158     if (canbang && i>=compiled_code && i<=current_compile_p) {
2159     if (*i==0x66) {
2160     i++;
2161     size=2;
2162     len++;
2163     }
2164    
2165     switch(i[0]) {
2166     case 0x8a:
2167     if ((i[1]&0xc0)==0x80) {
2168     r=(i[1]>>3)&7;
2169     dir=SIG_READ;
2170     size=1;
2171     len+=6;
2172     break;
2173     }
2174     break;
2175     case 0x88:
2176     if ((i[1]&0xc0)==0x80) {
2177     r=(i[1]>>3)&7;
2178     dir=SIG_WRITE;
2179     size=1;
2180     len+=6;
2181     break;
2182     }
2183     break;
2184     case 0x8b:
2185     if ((i[1]&0xc0)==0x80) {
2186     r=(i[1]>>3)&7;
2187     dir=SIG_READ;
2188     len+=6;
2189     break;
2190     }
2191     if ((i[1]&0xc0)==0x40) {
2192     r=(i[1]>>3)&7;
2193     dir=SIG_READ;
2194     len+=3;
2195     break;
2196     }
2197     break;
2198     case 0x89:
2199     if ((i[1]&0xc0)==0x80) {
2200     r=(i[1]>>3)&7;
2201     dir=SIG_WRITE;
2202     len+=6;
2203     break;
2204     }
2205     if ((i[1]&0xc0)==0x40) {
2206     r=(i[1]>>3)&7;
2207     dir=SIG_WRITE;
2208     len+=3;
2209     break;
2210     }
2211     break;
2212     }
2213     }
2214    
2215     if (r!=-1) {
2216     void* pr=NULL;
2217     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2218    
2219     switch(r) {
2220     case 0: pr=&(sc.eax); break;
2221     case 1: pr=&(sc.ecx); break;
2222     case 2: pr=&(sc.edx); break;
2223     case 3: pr=&(sc.ebx); break;
2224     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2225     case 5: pr=(size>1)?
2226     (void*)(&(sc.ebp)):
2227     (void*)(((uae_u8*)&(sc.ecx))+1); break;
2228     case 6: pr=(size>1)?
2229     (void*)(&(sc.esi)):
2230     (void*)(((uae_u8*)&(sc.edx))+1); break;
2231     case 7: pr=(size>1)?
2232     (void*)(&(sc.edi)):
2233     (void*)(((uae_u8*)&(sc.ebx))+1); break;
2234     default: abort();
2235     }
2236     if (pr) {
2237     blockinfo* bi;
2238    
2239     if (currprefs.comp_oldsegv) {
2240     addr-=NATMEM_OFFSET;
2241    
2242     if ((addr>=0x10000000 && addr<0x40000000) ||
2243     (addr>=0x50000000)) {
2244     write_log("Suspicious address in %x SEGV handler.\n",addr);
2245     }
2246     if (dir==SIG_READ) {
2247     switch(size) {
2248     case 1: *((uae_u8*)pr)=get_byte(addr); break;
2249     case 2: *((uae_u16*)pr)=get_word(addr); break;
2250     case 4: *((uae_u32*)pr)=get_long(addr); break;
2251     default: abort();
2252     }
2253     }
2254     else { /* write */
2255     switch(size) {
2256     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2257     case 2: put_word(addr,*((uae_u16*)pr)); break;
2258     case 4: put_long(addr,*((uae_u32*)pr)); break;
2259     default: abort();
2260     }
2261     }
2262     write_log("Handled one access!\n");
2263     fflush(stdout);
2264     segvcount++;
2265     sc.eip+=len;
2266     }
2267     else {
2268     void* tmp=target;
2269     int i;
2270     uae_u8 vecbuf[5];
2271    
2272     addr-=NATMEM_OFFSET;
2273    
2274     if ((addr>=0x10000000 && addr<0x40000000) ||
2275     (addr>=0x50000000)) {
2276     write_log("Suspicious address in %x SEGV handler.\n",addr);
2277     }
2278    
2279     target=(uae_u8*)sc.eip;
2280     for (i=0;i<5;i++)
2281     vecbuf[i]=target[i];
2282     emit_byte(0xe9);
2283     emit_long((uae_u32)veccode-(uae_u32)target-4);
2284     write_log("Create jump to %p\n",veccode);
2285    
2286     write_log("Handled one access!\n");
2287     fflush(stdout);
2288     segvcount++;
2289    
2290     target=veccode;
2291    
2292     if (dir==SIG_READ) {
2293     switch(size) {
2294     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2295     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2296     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2297     default: abort();
2298     }
2299     }
2300     else { /* write */
2301     switch(size) {
2302     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2303     case 2: put_word(addr,*((uae_u16*)pr)); break;
2304     case 4: put_long(addr,*((uae_u32*)pr)); break;
2305     default: abort();
2306     }
2307     }
2308     for (i=0;i<5;i++)
2309     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2310     raw_mov_l_mi((uae_u32)&in_handler,0);
2311     emit_byte(0xe9);
2312     emit_long(sc.eip+len-(uae_u32)target-4);
2313     in_handler=1;
2314     target=tmp;
2315     }
2316     bi=active;
2317     while (bi) {
2318     if (bi->handler &&
2319     (uae_u8*)bi->direct_handler<=i &&
2320     (uae_u8*)bi->nexthandler>i) {
2321     write_log("deleted trigger (%p<%p<%p) %p\n",
2322     bi->handler,
2323     i,
2324     bi->nexthandler,
2325     bi->pc_p);
2326     invalidate_block(bi);
2327     raise_in_cl_list(bi);
2328     set_special(0);
2329     return;
2330     }
2331     bi=bi->next;
2332     }
2333     /* Not found in the active list. Might be a rom routine that
2334     is in the dormant list */
2335     bi=dormant;
2336     while (bi) {
2337     if (bi->handler &&
2338     (uae_u8*)bi->direct_handler<=i &&
2339     (uae_u8*)bi->nexthandler>i) {
2340     write_log("deleted trigger (%p<%p<%p) %p\n",
2341     bi->handler,
2342     i,
2343     bi->nexthandler,
2344     bi->pc_p);
2345     invalidate_block(bi);
2346     raise_in_cl_list(bi);
2347     set_special(0);
2348     return;
2349     }
2350     bi=bi->next;
2351     }
2352     write_log("Huh? Could not find trigger!\n");
2353     return;
2354     }
2355     }
2356     write_log("Can't handle access!\n");
2357     for (j=0;j<10;j++) {
2358     write_log("instruction byte %2d is %02x\n",j,i[j]);
2359     }
2360     write_log("Please send the above info (starting at \"fault address\") to\n"
2361     "bmeyer@csse.monash.edu.au\n"
2362     "This shouldn't happen ;-)\n");
2363     fflush(stdout);
2364     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2365     }
2366     #endif
2367    
2368    
2369     /*************************************************************************
2370     * Checking for CPU features *
2371     *************************************************************************/
2372    
2373 gbeauche 1.3 struct cpuinfo_x86 {
2374     uae_u8 x86; // CPU family
2375     uae_u8 x86_vendor; // CPU vendor
2376     uae_u8 x86_processor; // CPU canonical processor type
2377     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
2378     uae_u32 x86_hwcap;
2379     uae_u8 x86_model;
2380     uae_u8 x86_mask;
2381     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
2382     char x86_vendor_id[16];
2383     };
2384     struct cpuinfo_x86 cpuinfo;
2385    
2386     enum {
2387     X86_VENDOR_INTEL = 0,
2388     X86_VENDOR_CYRIX = 1,
2389     X86_VENDOR_AMD = 2,
2390     X86_VENDOR_UMC = 3,
2391     X86_VENDOR_NEXGEN = 4,
2392     X86_VENDOR_CENTAUR = 5,
2393     X86_VENDOR_RISE = 6,
2394     X86_VENDOR_TRANSMETA = 7,
2395     X86_VENDOR_NSC = 8,
2396     X86_VENDOR_UNKNOWN = 0xff
2397     };
2398    
2399     enum {
2400     X86_PROCESSOR_I386, /* 80386 */
2401     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
2402     X86_PROCESSOR_PENTIUM,
2403     X86_PROCESSOR_PENTIUMPRO,
2404     X86_PROCESSOR_K6,
2405     X86_PROCESSOR_ATHLON,
2406     X86_PROCESSOR_PENTIUM4,
2407     X86_PROCESSOR_max
2408     };
2409    
2410     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2411     "80386",
2412     "80486",
2413     "Pentium",
2414     "PentiumPro",
2415     "K6",
2416     "Athlon",
2417     "Pentium4"
2418     };
2419    
2420     static struct ptt {
2421     const int align_loop;
2422     const int align_loop_max_skip;
2423     const int align_jump;
2424     const int align_jump_max_skip;
2425     const int align_func;
2426     }
2427     x86_alignments[X86_PROCESSOR_max] = {
2428     { 4, 3, 4, 3, 4 },
2429     { 16, 15, 16, 15, 16 },
2430     { 16, 7, 16, 7, 16 },
2431     { 16, 15, 16, 7, 16 },
2432     { 32, 7, 32, 7, 32 },
2433 gbeauche 1.4 { 16, 7, 16, 7, 16 },
2434 gbeauche 1.3 { 0, 0, 0, 0, 0 }
2435     };
2436 gbeauche 1.1
2437 gbeauche 1.3 static void
2438     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2439 gbeauche 1.1 {
2440 gbeauche 1.3 char *v = c->x86_vendor_id;
2441    
2442     if (!strcmp(v, "GenuineIntel"))
2443     c->x86_vendor = X86_VENDOR_INTEL;
2444     else if (!strcmp(v, "AuthenticAMD"))
2445     c->x86_vendor = X86_VENDOR_AMD;
2446     else if (!strcmp(v, "CyrixInstead"))
2447     c->x86_vendor = X86_VENDOR_CYRIX;
2448     else if (!strcmp(v, "Geode by NSC"))
2449     c->x86_vendor = X86_VENDOR_NSC;
2450     else if (!strcmp(v, "UMC UMC UMC "))
2451     c->x86_vendor = X86_VENDOR_UMC;
2452     else if (!strcmp(v, "CentaurHauls"))
2453     c->x86_vendor = X86_VENDOR_CENTAUR;
2454     else if (!strcmp(v, "NexGenDriven"))
2455     c->x86_vendor = X86_VENDOR_NEXGEN;
2456     else if (!strcmp(v, "RiseRiseRise"))
2457     c->x86_vendor = X86_VENDOR_RISE;
2458     else if (!strcmp(v, "GenuineTMx86") ||
2459     !strcmp(v, "TransmetaCPU"))
2460     c->x86_vendor = X86_VENDOR_TRANSMETA;
2461     else
2462     c->x86_vendor = X86_VENDOR_UNKNOWN;
2463     }
2464 gbeauche 1.1
2465 gbeauche 1.3 static void
2466     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2467     {
2468     static uae_u8 cpuid_space[256];
2469     uae_u8* tmp=get_target();
2470 gbeauche 1.1
2471 gbeauche 1.3 set_target(cpuid_space);
2472     raw_push_l_r(0); /* eax */
2473     raw_push_l_r(1); /* ecx */
2474     raw_push_l_r(2); /* edx */
2475     raw_push_l_r(3); /* ebx */
2476     raw_mov_l_rm(0,(uae_u32)&op);
2477     raw_cpuid(0);
2478     if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2479     if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2480     if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2481     if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2482     raw_pop_l_r(3);
2483     raw_pop_l_r(2);
2484     raw_pop_l_r(1);
2485     raw_pop_l_r(0);
2486     raw_ret();
2487     set_target(tmp);
2488 gbeauche 1.1
2489 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
2490 gbeauche 1.1 }
2491    
2492 gbeauche 1.3 static void
2493     raw_init_cpu(void)
2494 gbeauche 1.1 {
2495 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
2496    
2497     /* Defaults */
2498     c->x86_vendor = X86_VENDOR_UNKNOWN;
2499     c->cpuid_level = -1; /* CPUID not detected */
2500     c->x86_model = c->x86_mask = 0; /* So far unknown... */
2501     c->x86_vendor_id[0] = '\0'; /* Unset */
2502     c->x86_hwcap = 0;
2503    
2504     /* Get vendor name */
2505     c->x86_vendor_id[12] = '\0';
2506     cpuid(0x00000000,
2507     (uae_u32 *)&c->cpuid_level,
2508     (uae_u32 *)&c->x86_vendor_id[0],
2509     (uae_u32 *)&c->x86_vendor_id[8],
2510     (uae_u32 *)&c->x86_vendor_id[4]);
2511     x86_get_cpu_vendor(c);
2512    
2513     /* Intel-defined flags: level 0x00000001 */
2514     c->x86_brand_id = 0;
2515     if ( c->cpuid_level >= 0x00000001 ) {
2516     uae_u32 tfms, brand_id;
2517     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2518     c->x86 = (tfms >> 8) & 15;
2519     c->x86_model = (tfms >> 4) & 15;
2520     c->x86_brand_id = brand_id & 0xff;
2521     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2522     (c->x86 == 0xf)) {
2523     /* AMD Extended Family and Model Values */
2524     c->x86 += (tfms >> 20) & 0xff;
2525     c->x86_model += (tfms >> 12) & 0xf0;
2526     }
2527     c->x86_mask = tfms & 15;
2528     } else {
2529     /* Have CPUID level 0 only - unheard of */
2530     c->x86 = 4;
2531     }
2532    
2533     /* Canonicalize processor ID */
2534     c->x86_processor = X86_PROCESSOR_max;
2535     switch (c->x86) {
2536     case 3:
2537     c->x86_processor = X86_PROCESSOR_I386;
2538     break;
2539     case 4:
2540     c->x86_processor = X86_PROCESSOR_I486;
2541     break;
2542     case 5:
2543     if (c->x86_vendor == X86_VENDOR_AMD)
2544     c->x86_processor = X86_PROCESSOR_K6;
2545     else
2546     c->x86_processor = X86_PROCESSOR_PENTIUM;
2547     break;
2548     case 6:
2549     if (c->x86_vendor == X86_VENDOR_AMD)
2550     c->x86_processor = X86_PROCESSOR_ATHLON;
2551     else
2552     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2553     break;
2554     case 15:
2555     if (c->x86_vendor == X86_VENDOR_INTEL) {
2556     /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2557     if (c->x86_brand_id >= 8)
2558     c->x86_processor = X86_PROCESSOR_PENTIUM4;
2559     }
2560     break;
2561     }
2562     if (c->x86_processor == X86_PROCESSOR_max) {
2563     fprintf(stderr, "Error: unknown processor type\n");
2564     fprintf(stderr, " Family : %d\n", c->x86);
2565     fprintf(stderr, " Model : %d\n", c->x86_model);
2566     fprintf(stderr, " Mask : %d\n", c->x86_mask);
2567     if (c->x86_brand_id)
2568     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
2569     abort();
2570     }
2571    
2572     /* Have CMOV support? */
2573     have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2574    
2575     /* Can the host CPU suffer from partial register stalls? */
2576     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2577     #if 1
2578     /* It appears that partial register writes are a bad idea even on
2579 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
2580     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2581 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
2582     have_rat_stall = true;
2583 gbeauche 1.1 #endif
2584 gbeauche 1.3
2585     /* Alignments */
2586     if (tune_alignment) {
2587     align_loops = x86_alignments[c->x86_processor].align_loop;
2588     align_jumps = x86_alignments[c->x86_processor].align_jump;
2589     }
2590    
2591     write_log("Max CPUID level=%d Processor is %s [%s]\n",
2592     c->cpuid_level, c->x86_vendor_id,
2593     x86_processor_string_table[c->x86_processor]);
2594 gbeauche 1.1 }
2595    
2596    
2597     /*************************************************************************
2598     * FPU stuff *
2599     *************************************************************************/
2600    
2601    
2602     static __inline__ void raw_fp_init(void)
2603     {
2604     int i;
2605    
2606     for (i=0;i<N_FREGS;i++)
2607     live.spos[i]=-2;
2608     live.tos=-1; /* Stack is empty */
2609     }
2610    
2611     static __inline__ void raw_fp_cleanup_drop(void)
2612     {
2613     #if 0
2614     /* using FINIT instead of popping all the entries.
2615     Seems to have side effects --- there is display corruption in
2616     Quake when this is used */
2617     if (live.tos>1) {
2618     emit_byte(0x9b);
2619     emit_byte(0xdb);
2620     emit_byte(0xe3);
2621     live.tos=-1;
2622     }
2623     #endif
2624     while (live.tos>=1) {
2625     emit_byte(0xde);
2626     emit_byte(0xd9);
2627     live.tos-=2;
2628     }
2629     while (live.tos>=0) {
2630     emit_byte(0xdd);
2631     emit_byte(0xd8);
2632     live.tos--;
2633     }
2634     raw_fp_init();
2635     }
2636    
2637     static __inline__ void make_tos(int r)
2638     {
2639     int p,q;
2640    
2641     if (live.spos[r]<0) { /* Register not yet on stack */
2642     emit_byte(0xd9);
2643     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2644     live.tos++;
2645     live.spos[r]=live.tos;
2646     live.onstack[live.tos]=r;
2647     return;
2648     }
2649     /* Register is on stack */
2650     if (live.tos==live.spos[r])
2651     return;
2652     p=live.spos[r];
2653     q=live.onstack[live.tos];
2654    
2655     emit_byte(0xd9);
2656     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2657     live.onstack[live.tos]=r;
2658     live.spos[r]=live.tos;
2659     live.onstack[p]=q;
2660     live.spos[q]=p;
2661     }
2662    
2663     static __inline__ void make_tos2(int r, int r2)
2664     {
2665     int q;
2666    
2667     make_tos(r2); /* Put the reg that's supposed to end up in position2
2668     on top */
2669    
2670     if (live.spos[r]<0) { /* Register not yet on stack */
2671     make_tos(r); /* This will extend the stack */
2672     return;
2673     }
2674     /* Register is on stack */
2675     emit_byte(0xd9);
2676     emit_byte(0xc9); /* Move r2 into position 2 */
2677    
2678     q=live.onstack[live.tos-1];
2679     live.onstack[live.tos]=q;
2680     live.spos[q]=live.tos;
2681     live.onstack[live.tos-1]=r2;
2682     live.spos[r2]=live.tos-1;
2683    
2684     make_tos(r); /* And r into 1 */
2685     }
2686    
2687     static __inline__ int stackpos(int r)
2688     {
2689     if (live.spos[r]<0)
2690     abort();
2691     if (live.tos<live.spos[r]) {
2692     printf("Looking for spos for fnreg %d\n",r);
2693     abort();
2694     }
2695     return live.tos-live.spos[r];
2696     }
2697    
2698     static __inline__ void usereg(int r)
2699     {
2700     if (live.spos[r]<0)
2701     make_tos(r);
2702     }
2703    
2704     /* This is called with one FP value in a reg *above* tos, which it will
2705     pop off the stack if necessary */
2706     static __inline__ void tos_make(int r)
2707     {
2708     if (live.spos[r]<0) {
2709     live.tos++;
2710     live.spos[r]=live.tos;
2711     live.onstack[live.tos]=r;
2712     return;
2713     }
2714     emit_byte(0xdd);
2715     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2716     and pop it*/
2717     }
2718    
2719    
2720     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2721     {
2722     make_tos(r);
2723     emit_byte(0xdd);
2724     emit_byte(0x15);
2725     emit_long(m);
2726     }
2727     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2728    
2729     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2730     {
2731     make_tos(r);
2732     emit_byte(0xdd);
2733     emit_byte(0x1d);
2734     emit_long(m);
2735     live.onstack[live.tos]=-1;
2736     live.tos--;
2737     live.spos[r]=-2;
2738     }
2739     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2740    
2741     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2742     {
2743     emit_byte(0xdd);
2744     emit_byte(0x05);
2745     emit_long(m);
2746     tos_make(r);
2747     }
2748     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2749    
2750     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2751     {
2752     emit_byte(0xdb);
2753     emit_byte(0x05);
2754     emit_long(m);
2755     tos_make(r);
2756     }
2757     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2758    
2759     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2760     {
2761     make_tos(r);
2762     emit_byte(0xdb);
2763     emit_byte(0x15);
2764     emit_long(m);
2765     }
2766     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2767    
2768     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2769     {
2770     emit_byte(0xd9);
2771     emit_byte(0x05);
2772     emit_long(m);
2773     tos_make(r);
2774     }
2775     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2776    
2777     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2778     {
2779     make_tos(r);
2780     emit_byte(0xd9);
2781     emit_byte(0x15);
2782     emit_long(m);
2783     }
2784     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2785    
2786     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2787     {
2788     int rs;
2789    
2790     /* Stupid x87 can't write a long double to mem without popping the
2791     stack! */
2792     usereg(r);
2793     rs=stackpos(r);
2794     emit_byte(0xd9); /* Get a copy to the top of stack */
2795     emit_byte(0xc0+rs);
2796    
2797     emit_byte(0xdb); /* store and pop it */
2798     emit_byte(0x3d);
2799     emit_long(m);
2800     }
2801     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2802    
2803     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2804     {
2805     int rs;
2806    
2807     make_tos(r);
2808     emit_byte(0xdb); /* store and pop it */
2809     emit_byte(0x3d);
2810     emit_long(m);
2811     live.onstack[live.tos]=-1;
2812     live.tos--;
2813     live.spos[r]=-2;
2814     }
2815     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2816    
2817     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2818     {
2819     emit_byte(0xdb);
2820     emit_byte(0x2d);
2821     emit_long(m);
2822     tos_make(r);
2823     }
2824     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2825    
2826     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2827     {
2828     emit_byte(0xd9);
2829     emit_byte(0xeb);
2830     tos_make(r);
2831     }
2832     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2833    
2834     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2835     {
2836     emit_byte(0xd9);
2837     emit_byte(0xec);
2838     tos_make(r);
2839     }
2840     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2841    
2842     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2843     {
2844     emit_byte(0xd9);
2845     emit_byte(0xea);
2846     tos_make(r);
2847     }
2848     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2849    
2850     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2851     {
2852     emit_byte(0xd9);
2853     emit_byte(0xed);
2854     tos_make(r);
2855     }
2856     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2857    
2858     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2859     {
2860     emit_byte(0xd9);
2861     emit_byte(0xe8);
2862     tos_make(r);
2863     }
2864     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2865    
2866     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2867     {
2868     emit_byte(0xd9);
2869     emit_byte(0xee);
2870     tos_make(r);
2871     }
2872     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2873    
2874     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2875     {
2876     int ds;
2877    
2878     usereg(s);
2879     ds=stackpos(s);
2880     if (ds==0 && live.spos[d]>=0) {
2881     /* source is on top of stack, and we already have the dest */
2882     int dd=stackpos(d);
2883     emit_byte(0xdd);
2884     emit_byte(0xd0+dd);
2885     }
2886     else {
2887     emit_byte(0xd9);
2888     emit_byte(0xc0+ds); /* duplicate source on tos */
2889     tos_make(d); /* store to destination, pop if necessary */
2890     }
2891     }
2892     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2893    
2894     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2895     {
2896     emit_byte(0xd9);
2897     emit_byte(0xa8+index);
2898     emit_long(base);
2899     }
2900     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2901    
2902    
2903     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2904     {
2905     int ds;
2906    
2907     if (d!=s) {
2908     usereg(s);
2909     ds=stackpos(s);
2910     emit_byte(0xd9);
2911     emit_byte(0xc0+ds); /* duplicate source */
2912     emit_byte(0xd9);
2913     emit_byte(0xfa); /* take square root */
2914     tos_make(d); /* store to destination */
2915     }
2916     else {
2917     make_tos(d);
2918     emit_byte(0xd9);
2919     emit_byte(0xfa); /* take square root */
2920     }
2921     }
2922     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2923    
2924     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2925     {
2926     int ds;
2927    
2928     if (d!=s) {
2929     usereg(s);
2930     ds=stackpos(s);
2931     emit_byte(0xd9);
2932     emit_byte(0xc0+ds); /* duplicate source */
2933     emit_byte(0xd9);
2934     emit_byte(0xe1); /* take fabs */
2935     tos_make(d); /* store to destination */
2936     }
2937     else {
2938     make_tos(d);
2939     emit_byte(0xd9);
2940     emit_byte(0xe1); /* take fabs */
2941     }
2942     }
2943     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2944    
2945     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2946     {
2947     int ds;
2948    
2949     if (d!=s) {
2950     usereg(s);
2951     ds=stackpos(s);
2952     emit_byte(0xd9);
2953     emit_byte(0xc0+ds); /* duplicate source */
2954     emit_byte(0xd9);
2955     emit_byte(0xfc); /* take frndint */
2956     tos_make(d); /* store to destination */
2957     }
2958     else {
2959     make_tos(d);
2960     emit_byte(0xd9);
2961     emit_byte(0xfc); /* take frndint */
2962     }
2963     }
2964     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2965    
2966     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2967     {
2968     int ds;
2969    
2970     if (d!=s) {
2971     usereg(s);
2972     ds=stackpos(s);
2973     emit_byte(0xd9);
2974     emit_byte(0xc0+ds); /* duplicate source */
2975     emit_byte(0xd9);
2976     emit_byte(0xff); /* take cos */
2977     tos_make(d); /* store to destination */
2978     }
2979     else {
2980     make_tos(d);
2981     emit_byte(0xd9);
2982     emit_byte(0xff); /* take cos */
2983     }
2984     }
2985     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2986    
2987     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
2988     {
2989     int ds;
2990    
2991     if (d!=s) {
2992     usereg(s);
2993     ds=stackpos(s);
2994     emit_byte(0xd9);
2995     emit_byte(0xc0+ds); /* duplicate source */
2996     emit_byte(0xd9);
2997     emit_byte(0xfe); /* take sin */
2998     tos_make(d); /* store to destination */
2999     }
3000     else {
3001     make_tos(d);
3002     emit_byte(0xd9);
3003     emit_byte(0xfe); /* take sin */
3004     }
3005     }
3006     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3007    
3008     double one=1;
3009     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3010     {
3011     int ds;
3012    
3013     usereg(s);
3014     ds=stackpos(s);
3015     emit_byte(0xd9);
3016     emit_byte(0xc0+ds); /* duplicate source */
3017    
3018     emit_byte(0xd9);
3019     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3020     emit_byte(0xd9);
3021     emit_byte(0xfc); /* rndint */
3022     emit_byte(0xd9);
3023     emit_byte(0xc9); /* swap top two elements */
3024     emit_byte(0xd8);
3025     emit_byte(0xe1); /* subtract rounded from original */
3026     emit_byte(0xd9);
3027     emit_byte(0xf0); /* f2xm1 */
3028     emit_byte(0xdc);
3029     emit_byte(0x05);
3030     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3031     emit_byte(0xd9);
3032     emit_byte(0xfd); /* and scale it */
3033     emit_byte(0xdd);
3034     emit_byte(0xd9); /* take he rounded value off */
3035     tos_make(d); /* store to destination */
3036     }
3037     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3038    
3039     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3040     {
3041     int ds;
3042    
3043     usereg(s);
3044     ds=stackpos(s);
3045     emit_byte(0xd9);
3046     emit_byte(0xc0+ds); /* duplicate source */
3047     emit_byte(0xd9);
3048     emit_byte(0xea); /* fldl2e */
3049     emit_byte(0xde);
3050     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
3051    
3052     emit_byte(0xd9);
3053     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3054     emit_byte(0xd9);
3055     emit_byte(0xfc); /* rndint */
3056     emit_byte(0xd9);
3057     emit_byte(0xc9); /* swap top two elements */
3058     emit_byte(0xd8);
3059     emit_byte(0xe1); /* subtract rounded from original */
3060     emit_byte(0xd9);
3061     emit_byte(0xf0); /* f2xm1 */
3062     emit_byte(0xdc);
3063     emit_byte(0x05);
3064     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3065     emit_byte(0xd9);
3066     emit_byte(0xfd); /* and scale it */
3067     emit_byte(0xdd);
3068     emit_byte(0xd9); /* take he rounded value off */
3069     tos_make(d); /* store to destination */
3070     }
3071     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3072    
3073     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3074     {
3075     int ds;
3076    
3077     usereg(s);
3078     ds=stackpos(s);
3079     emit_byte(0xd9);
3080     emit_byte(0xc0+ds); /* duplicate source */
3081     emit_byte(0xd9);
3082     emit_byte(0xe8); /* push '1' */
3083     emit_byte(0xd9);
3084     emit_byte(0xc9); /* swap top two */
3085     emit_byte(0xd9);
3086     emit_byte(0xf1); /* take 1*log2(x) */
3087     tos_make(d); /* store to destination */
3088     }
3089     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3090    
3091    
3092     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3093     {
3094     int ds;
3095    
3096     if (d!=s) {
3097     usereg(s);
3098     ds=stackpos(s);
3099     emit_byte(0xd9);
3100     emit_byte(0xc0+ds); /* duplicate source */
3101     emit_byte(0xd9);
3102     emit_byte(0xe0); /* take fchs */
3103     tos_make(d); /* store to destination */
3104     }
3105     else {
3106     make_tos(d);
3107     emit_byte(0xd9);
3108     emit_byte(0xe0); /* take fchs */
3109     }
3110     }
3111     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3112    
3113     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3114     {
3115     int ds;
3116    
3117     usereg(s);
3118     usereg(d);
3119    
3120     if (live.spos[s]==live.tos) {
3121     /* Source is on top of stack */
3122     ds=stackpos(d);
3123     emit_byte(0xdc);
3124     emit_byte(0xc0+ds); /* add source to dest*/
3125     }
3126     else {
3127     make_tos(d);
3128     ds=stackpos(s);
3129    
3130     emit_byte(0xd8);
3131     emit_byte(0xc0+ds); /* add source to dest*/
3132     }
3133     }
3134     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3135    
3136     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3137     {
3138     int ds;
3139    
3140     usereg(s);
3141     usereg(d);
3142    
3143     if (live.spos[s]==live.tos) {
3144     /* Source is on top of stack */
3145     ds=stackpos(d);
3146     emit_byte(0xdc);
3147     emit_byte(0xe8+ds); /* sub source from dest*/
3148     }
3149     else {
3150     make_tos(d);
3151     ds=stackpos(s);
3152    
3153     emit_byte(0xd8);
3154     emit_byte(0xe0+ds); /* sub src from dest */
3155     }
3156     }
3157     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3158    
3159     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3160     {
3161     int ds;
3162    
3163     usereg(s);
3164     usereg(d);
3165    
3166     make_tos(d);
3167     ds=stackpos(s);
3168    
3169     emit_byte(0xdd);
3170     emit_byte(0xe0+ds); /* cmp dest with source*/
3171     }
3172     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3173    
3174     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3175     {
3176     int ds;
3177    
3178     usereg(s);
3179     usereg(d);
3180    
3181     if (live.spos[s]==live.tos) {
3182     /* Source is on top of stack */
3183     ds=stackpos(d);
3184     emit_byte(0xdc);
3185     emit_byte(0xc8+ds); /* mul dest by source*/
3186     }
3187     else {
3188     make_tos(d);
3189     ds=stackpos(s);
3190    
3191     emit_byte(0xd8);
3192     emit_byte(0xc8+ds); /* mul dest by source*/
3193     }
3194     }
3195     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3196    
3197     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3198     {
3199     int ds;
3200    
3201     usereg(s);
3202     usereg(d);
3203    
3204     if (live.spos[s]==live.tos) {
3205     /* Source is on top of stack */
3206     ds=stackpos(d);
3207     emit_byte(0xdc);
3208     emit_byte(0xf8+ds); /* div dest by source */
3209     }
3210     else {
3211     make_tos(d);
3212     ds=stackpos(s);
3213    
3214     emit_byte(0xd8);
3215     emit_byte(0xf0+ds); /* div dest by source*/
3216     }
3217     }
3218     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3219    
3220     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3221     {
3222     int ds;
3223    
3224     usereg(s);
3225     usereg(d);
3226    
3227     make_tos2(d,s);
3228     ds=stackpos(s);
3229    
3230     if (ds!=1) {
3231     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3232     abort();
3233     }
3234     emit_byte(0xd9);
3235     emit_byte(0xf8); /* take rem from dest by source */
3236     }
3237     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3238    
3239     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3240     {
3241     int ds;
3242    
3243     usereg(s);
3244     usereg(d);
3245    
3246     make_tos2(d,s);
3247     ds=stackpos(s);
3248    
3249     if (ds!=1) {
3250     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3251     abort();
3252     }
3253     emit_byte(0xd9);
3254     emit_byte(0xf5); /* take rem1 from dest by source */
3255     }
3256     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3257    
3258    
3259     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3260     {
3261     make_tos(r);
3262     emit_byte(0xd9); /* ftst */
3263     emit_byte(0xe4);
3264     }
3265     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3266    
3267     /* %eax register is clobbered if target processor doesn't support fucomi */
3268     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3269     #define FFLAG_NREG EAX_INDEX
3270    
3271     static __inline__ void raw_fflags_into_flags(int r)
3272     {
3273     int p;
3274    
3275     usereg(r);
3276     p=stackpos(r);
3277    
3278     emit_byte(0xd9);
3279     emit_byte(0xee); /* Push 0 */
3280     emit_byte(0xd9);
3281     emit_byte(0xc9+p); /* swap top two around */
3282     if (have_cmov) {
3283     // gb-- fucomi is for P6 cores only, not K6-2 then...
3284     emit_byte(0xdb);
3285     emit_byte(0xe9+p); /* fucomi them */
3286     }
3287     else {
3288     emit_byte(0xdd);
3289     emit_byte(0xe1+p); /* fucom them */
3290     emit_byte(0x9b);
3291     emit_byte(0xdf);
3292     emit_byte(0xe0); /* fstsw ax */
3293     raw_sahf(0); /* sahf */
3294     }
3295     emit_byte(0xdd);
3296     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3297     }