ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.3
Committed: 2002-09-19T14:59:03Z (22 years ago) by gbeauche
Branch: MAIN
Changes since 1.2: +210 -77 lines
Log Message:
- Rewrite raw_init_cpu() to match more details, from kernel sources.
- Add possibility to tune code alignment to the underlying processor. However,
  this is turned off as I don't see much improvement and align_jumps = 64
  for Athlon looks suspicious to me.
- Remove two extra align_target() that are already covered.
- Remove unused may_trap() predicate.

File Contents

# User Rev Content
1 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
2     only target, and it's easier this way... */
3    
4     /*************************************************************************
5     * Some basic information about the the target CPU *
6     *************************************************************************/
7    
8     #define EAX_INDEX 0
9     #define ECX_INDEX 1
10     #define EDX_INDEX 2
11     #define EBX_INDEX 3
12     #define ESP_INDEX 4
13     #define EBP_INDEX 5
14     #define ESI_INDEX 6
15     #define EDI_INDEX 7
16    
17     /* The register in which subroutines return an integer return value */
18     #define REG_RESULT 0
19    
20     /* The registers subroutines take their first and second argument in */
21     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
22     /* Handle the _fastcall parameters of ECX and EDX */
23     #define REG_PAR1 1
24     #define REG_PAR2 2
25     #else
26     #define REG_PAR1 0
27     #define REG_PAR2 2
28     #endif
29    
30     /* Three registers that are not used for any of the above */
31     #define REG_NOPAR1 6
32     #define REG_NOPAR2 5
33     #define REG_NOPAR3 3
34    
35     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
36     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
37     #define REG_PC_TMP 0
38     #else
39     #define REG_PC_TMP 1 /* Another register that is not the above */
40     #endif
41    
42     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
43     -1 if any reg will do */
44     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
45     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
46    
47     uae_s8 always_used[]={4,-1};
48     uae_s8 can_byte[]={0,1,2,3,-1};
49     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
50    
51     /* cpuopti mutate instruction handlers to assume registers are saved
52     by the caller */
53     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
54    
55     /* This *should* be the same as call_saved. But:
56     - We might not really know which registers are saved, and which aren't,
57     so we need to preserve some, but don't want to rely on everyone else
58     also saving those registers
59     - Special registers (such like the stack pointer) should not be "preserved"
60     by pushing, even though they are "saved" across function calls
61     */
62     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
63    
64     /* Whether classes of instructions do or don't clobber the native flags */
65     #define CLOBBER_MOV
66     #define CLOBBER_LEA
67     #define CLOBBER_CMOV
68     #define CLOBBER_POP
69     #define CLOBBER_PUSH
70     #define CLOBBER_SUB clobber_flags()
71     #define CLOBBER_SBB clobber_flags()
72     #define CLOBBER_CMP clobber_flags()
73     #define CLOBBER_ADD clobber_flags()
74     #define CLOBBER_ADC clobber_flags()
75     #define CLOBBER_AND clobber_flags()
76     #define CLOBBER_OR clobber_flags()
77     #define CLOBBER_XOR clobber_flags()
78    
79     #define CLOBBER_ROL clobber_flags()
80     #define CLOBBER_ROR clobber_flags()
81     #define CLOBBER_SHLL clobber_flags()
82     #define CLOBBER_SHRL clobber_flags()
83     #define CLOBBER_SHRA clobber_flags()
84     #define CLOBBER_TEST clobber_flags()
85     #define CLOBBER_CL16
86     #define CLOBBER_CL8
87     #define CLOBBER_SE16
88     #define CLOBBER_SE8
89     #define CLOBBER_ZE16
90     #define CLOBBER_ZE8
91     #define CLOBBER_SW16 clobber_flags()
92     #define CLOBBER_SW32
93     #define CLOBBER_SETCC
94     #define CLOBBER_MUL clobber_flags()
95     #define CLOBBER_BT clobber_flags()
96     #define CLOBBER_BSF clobber_flags()
97    
98 gbeauche 1.2 const bool optimize_accum = true;
99 gbeauche 1.1 const bool optimize_imm8 = true;
100     const bool optimize_shift_once = true;
101    
102     /*************************************************************************
103     * Actual encoding of the instructions on the target CPU *
104     *************************************************************************/
105    
106 gbeauche 1.2 static __inline__ int isaccum(int r)
107     {
108     return (r == EAX_INDEX);
109     }
110    
111 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
112     {
113     return (x>=-128 && x<=127);
114     }
115    
116     static __inline__ int isword(uae_s32 x)
117     {
118     return (x>=-32768 && x<=32767);
119     }
120    
121     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
122     {
123     emit_byte(0x50+r);
124     }
125     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
126    
127     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
128     {
129     emit_byte(0x58+r);
130     }
131     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
132    
133     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
134     {
135     emit_byte(0x0f);
136     emit_byte(0xba);
137     emit_byte(0xe0+r);
138     emit_byte(i);
139     }
140     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
141    
142     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
143     {
144     emit_byte(0x0f);
145     emit_byte(0xa3);
146     emit_byte(0xc0+8*b+r);
147     }
148     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
149    
150     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
151     {
152     emit_byte(0x0f);
153     emit_byte(0xba);
154     emit_byte(0xf8+r);
155     emit_byte(i);
156     }
157     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
158    
159     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
160     {
161     emit_byte(0x0f);
162     emit_byte(0xbb);
163     emit_byte(0xc0+8*b+r);
164     }
165     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
166    
167    
168     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
169     {
170     emit_byte(0x0f);
171     emit_byte(0xba);
172     emit_byte(0xf0+r);
173     emit_byte(i);
174     }
175     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
176    
177     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
178     {
179     emit_byte(0x0f);
180     emit_byte(0xb3);
181     emit_byte(0xc0+8*b+r);
182     }
183     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
184    
185     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
186     {
187     emit_byte(0x0f);
188     emit_byte(0xba);
189     emit_byte(0xe8+r);
190     emit_byte(i);
191     }
192     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
193    
194     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
195     {
196     emit_byte(0x0f);
197     emit_byte(0xab);
198     emit_byte(0xc0+8*b+r);
199     }
200     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
201    
202     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
203     {
204     emit_byte(0x66);
205     if (isbyte(i)) {
206     emit_byte(0x83);
207     emit_byte(0xe8+d);
208     emit_byte(i);
209     }
210     else {
211 gbeauche 1.2 if (optimize_accum && isaccum(d))
212     emit_byte(0x2d);
213     else {
214 gbeauche 1.1 emit_byte(0x81);
215     emit_byte(0xe8+d);
216 gbeauche 1.2 }
217 gbeauche 1.1 emit_word(i);
218     }
219     }
220     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
221    
222    
223     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
224     {
225     emit_byte(0x8b);
226     emit_byte(0x05+8*d);
227     emit_long(s);
228     }
229     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
230    
231     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
232     {
233     emit_byte(0xc7);
234     emit_byte(0x05);
235     emit_long(d);
236     emit_long(s);
237     }
238     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
239    
240     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
241     {
242     emit_byte(0x66);
243     emit_byte(0xc7);
244     emit_byte(0x05);
245     emit_long(d);
246     emit_word(s);
247     }
248     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
249    
250     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
251     {
252     emit_byte(0xc6);
253     emit_byte(0x05);
254     emit_long(d);
255     emit_byte(s);
256     }
257     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
258    
259     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
260     {
261     if (optimize_shift_once && (i == 1)) {
262     emit_byte(0xd0);
263     emit_byte(0x05);
264     emit_long(d);
265     }
266     else {
267     emit_byte(0xc0);
268     emit_byte(0x05);
269     emit_long(d);
270     emit_byte(i);
271     }
272     }
273     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
274    
275     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
276     {
277     if (optimize_shift_once && (i == 1)) {
278     emit_byte(0xd0);
279     emit_byte(0xc0+r);
280     }
281     else {
282     emit_byte(0xc0);
283     emit_byte(0xc0+r);
284     emit_byte(i);
285     }
286     }
287     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
288    
289     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
290     {
291     emit_byte(0x66);
292     emit_byte(0xc1);
293     emit_byte(0xc0+r);
294     emit_byte(i);
295     }
296     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
297    
298     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
299     {
300     if (optimize_shift_once && (i == 1)) {
301     emit_byte(0xd1);
302     emit_byte(0xc0+r);
303     }
304     else {
305     emit_byte(0xc1);
306     emit_byte(0xc0+r);
307     emit_byte(i);
308     }
309     }
310     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
311    
312     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
313     {
314     emit_byte(0xd3);
315     emit_byte(0xc0+d);
316     }
317     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
318    
319     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
320     {
321     emit_byte(0x66);
322     emit_byte(0xd3);
323     emit_byte(0xc0+d);
324     }
325     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
326    
327     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
328     {
329     emit_byte(0xd2);
330     emit_byte(0xc0+d);
331     }
332     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
333    
334     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
335     {
336     emit_byte(0xd3);
337     emit_byte(0xe0+d);
338     }
339     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
340    
341     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
342     {
343     emit_byte(0x66);
344     emit_byte(0xd3);
345     emit_byte(0xe0+d);
346     }
347     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
348    
349     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
350     {
351     emit_byte(0xd2);
352     emit_byte(0xe0+d);
353     }
354     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
355    
356     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
357     {
358     if (optimize_shift_once && (i == 1)) {
359     emit_byte(0xd0);
360     emit_byte(0xc8+r);
361     }
362     else {
363     emit_byte(0xc0);
364     emit_byte(0xc8+r);
365     emit_byte(i);
366     }
367     }
368     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
369    
370     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
371     {
372     emit_byte(0x66);
373     emit_byte(0xc1);
374     emit_byte(0xc8+r);
375     emit_byte(i);
376     }
377     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
378    
379     // gb-- used for making an fpcr value in compemu_fpp.cpp
380     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
381     {
382     emit_byte(0x0b);
383     emit_byte(0x05+8*d);
384     emit_long(s);
385     }
386     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
387    
388     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
389     {
390     if (optimize_shift_once && (i == 1)) {
391     emit_byte(0xd1);
392     emit_byte(0xc8+r);
393     }
394     else {
395     emit_byte(0xc1);
396     emit_byte(0xc8+r);
397     emit_byte(i);
398     }
399     }
400     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
401    
402     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
403     {
404     emit_byte(0xd3);
405     emit_byte(0xc8+d);
406     }
407     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
408    
409     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
410     {
411     emit_byte(0x66);
412     emit_byte(0xd3);
413     emit_byte(0xc8+d);
414     }
415     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
416    
417     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
418     {
419     emit_byte(0xd2);
420     emit_byte(0xc8+d);
421     }
422     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
423    
424     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
425     {
426     emit_byte(0xd3);
427     emit_byte(0xe8+d);
428     }
429     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
430    
431     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
432     {
433     emit_byte(0x66);
434     emit_byte(0xd3);
435     emit_byte(0xe8+d);
436     }
437     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
438    
439     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
440     {
441     emit_byte(0xd2);
442     emit_byte(0xe8+d);
443     }
444     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
445    
446     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
447     {
448     emit_byte(0xd3);
449     emit_byte(0xf8+d);
450     }
451     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
452    
453     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
454     {
455     emit_byte(0x66);
456     emit_byte(0xd3);
457     emit_byte(0xf8+d);
458     }
459     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
460    
461     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
462     {
463     emit_byte(0xd2);
464     emit_byte(0xf8+d);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
469     {
470     if (optimize_shift_once && (i == 1)) {
471     emit_byte(0xd1);
472     emit_byte(0xe0+r);
473     }
474     else {
475     emit_byte(0xc1);
476     emit_byte(0xe0+r);
477     emit_byte(i);
478     }
479     }
480     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
481    
482     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
483     {
484     emit_byte(0x66);
485     emit_byte(0xc1);
486     emit_byte(0xe0+r);
487     emit_byte(i);
488     }
489     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
490    
491     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
492     {
493     if (optimize_shift_once && (i == 1)) {
494     emit_byte(0xd0);
495     emit_byte(0xe0+r);
496     }
497     else {
498     emit_byte(0xc0);
499     emit_byte(0xe0+r);
500     emit_byte(i);
501     }
502     }
503     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
504    
505     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
506     {
507     if (optimize_shift_once && (i == 1)) {
508     emit_byte(0xd1);
509     emit_byte(0xe8+r);
510     }
511     else {
512     emit_byte(0xc1);
513     emit_byte(0xe8+r);
514     emit_byte(i);
515     }
516     }
517     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
518    
519     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
520     {
521     emit_byte(0x66);
522     emit_byte(0xc1);
523     emit_byte(0xe8+r);
524     emit_byte(i);
525     }
526     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
527    
528     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
529     {
530     if (optimize_shift_once && (i == 1)) {
531     emit_byte(0xd0);
532     emit_byte(0xe8+r);
533     }
534     else {
535     emit_byte(0xc0);
536     emit_byte(0xe8+r);
537     emit_byte(i);
538     }
539     }
540     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
541    
542     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
543     {
544     if (optimize_shift_once && (i == 1)) {
545     emit_byte(0xd1);
546     emit_byte(0xf8+r);
547     }
548     else {
549     emit_byte(0xc1);
550     emit_byte(0xf8+r);
551     emit_byte(i);
552     }
553     }
554     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
555    
556     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
557     {
558     emit_byte(0x66);
559     emit_byte(0xc1);
560     emit_byte(0xf8+r);
561     emit_byte(i);
562     }
563     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
564    
565     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
566     {
567     if (optimize_shift_once && (i == 1)) {
568     emit_byte(0xd0);
569     emit_byte(0xf8+r);
570     }
571     else {
572     emit_byte(0xc0);
573     emit_byte(0xf8+r);
574     emit_byte(i);
575     }
576     }
577     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
578    
579     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
580     {
581     emit_byte(0x9e);
582     }
583     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
584    
585     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
586     {
587     emit_byte(0x0f);
588     emit_byte(0xa2);
589     }
590     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
591    
592     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
593     {
594     emit_byte(0x9f);
595     }
596     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
597    
598     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
599     {
600     emit_byte(0x0f);
601     emit_byte(0x90+cc);
602     emit_byte(0xc0+d);
603     }
604     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
605    
606     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
607     {
608     emit_byte(0x0f);
609     emit_byte(0x90+cc);
610     emit_byte(0x05);
611     emit_long(d);
612     }
613     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
614    
615     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
616     {
617     if (have_cmov) {
618     emit_byte(0x0f);
619     emit_byte(0x40+cc);
620     emit_byte(0xc0+8*d+s);
621     }
622     else { /* replacement using branch and mov */
623     int uncc=(cc^1);
624     emit_byte(0x70+uncc);
625     emit_byte(2); /* skip next 2 bytes if not cc=true */
626     emit_byte(0x89);
627     emit_byte(0xc0+8*s+d);
628     }
629     }
630     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
631    
632     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
633     {
634     emit_byte(0x0f);
635     emit_byte(0xbc);
636     emit_byte(0xc0+8*d+s);
637     }
638     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
639    
640     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
641     {
642     emit_byte(0x0f);
643     emit_byte(0xbf);
644     emit_byte(0xc0+8*d+s);
645     }
646     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
647    
648     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
649     {
650     emit_byte(0x0f);
651     emit_byte(0xbe);
652     emit_byte(0xc0+8*d+s);
653     }
654     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
655    
656     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
657     {
658     emit_byte(0x0f);
659     emit_byte(0xb7);
660     emit_byte(0xc0+8*d+s);
661     }
662     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
663    
664     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
665     {
666     emit_byte(0x0f);
667     emit_byte(0xb6);
668     emit_byte(0xc0+8*d+s);
669     }
670     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
671    
672     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
673     {
674     emit_byte(0x0f);
675     emit_byte(0xaf);
676     emit_byte(0xc0+8*d+s);
677     }
678     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
679    
680     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
681     {
682     if (d!=MUL_NREG1 || s!=MUL_NREG2)
683     abort();
684     emit_byte(0xf7);
685     emit_byte(0xea);
686     }
687     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
688    
689     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
690     {
691     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
692     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
693     abort();
694     }
695     emit_byte(0xf7);
696     emit_byte(0xe2);
697     }
698     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
699    
700     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
701     {
702     abort(); /* %^$&%^$%#^ x86! */
703     emit_byte(0x0f);
704     emit_byte(0xaf);
705     emit_byte(0xc0+8*d+s);
706     }
707     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
708    
709     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
710     {
711     emit_byte(0x88);
712     emit_byte(0xc0+8*s+d);
713     }
714     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
715    
716     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
717     {
718     emit_byte(0x66);
719     emit_byte(0x89);
720     emit_byte(0xc0+8*s+d);
721     }
722     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
723    
724     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
725     {
726     int isebp=(baser==5)?0x40:0;
727     int fi;
728    
729     switch(factor) {
730     case 1: fi=0; break;
731     case 2: fi=1; break;
732     case 4: fi=2; break;
733     case 8: fi=3; break;
734     default: abort();
735     }
736    
737    
738     emit_byte(0x8b);
739     emit_byte(0x04+8*d+isebp);
740     emit_byte(baser+8*index+0x40*fi);
741     if (isebp)
742     emit_byte(0x00);
743     }
744     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
745    
746     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
747     {
748     int fi;
749     int isebp;
750    
751     switch(factor) {
752     case 1: fi=0; break;
753     case 2: fi=1; break;
754     case 4: fi=2; break;
755     case 8: fi=3; break;
756     default: abort();
757     }
758     isebp=(baser==5)?0x40:0;
759    
760     emit_byte(0x66);
761     emit_byte(0x8b);
762     emit_byte(0x04+8*d+isebp);
763     emit_byte(baser+8*index+0x40*fi);
764     if (isebp)
765     emit_byte(0x00);
766     }
767     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
768    
769     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
770     {
771     int fi;
772     int isebp;
773    
774     switch(factor) {
775     case 1: fi=0; break;
776     case 2: fi=1; break;
777     case 4: fi=2; break;
778     case 8: fi=3; break;
779     default: abort();
780     }
781     isebp=(baser==5)?0x40:0;
782    
783     emit_byte(0x8a);
784     emit_byte(0x04+8*d+isebp);
785     emit_byte(baser+8*index+0x40*fi);
786     if (isebp)
787     emit_byte(0x00);
788     }
789     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
790    
791     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
792     {
793     int fi;
794     int isebp;
795    
796     switch(factor) {
797     case 1: fi=0; break;
798     case 2: fi=1; break;
799     case 4: fi=2; break;
800     case 8: fi=3; break;
801     default: abort();
802     }
803    
804    
805     isebp=(baser==5)?0x40:0;
806    
807     emit_byte(0x89);
808     emit_byte(0x04+8*s+isebp);
809     emit_byte(baser+8*index+0x40*fi);
810     if (isebp)
811     emit_byte(0x00);
812     }
813     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
814    
815     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
816     {
817     int fi;
818     int isebp;
819    
820     switch(factor) {
821     case 1: fi=0; break;
822     case 2: fi=1; break;
823     case 4: fi=2; break;
824     case 8: fi=3; break;
825     default: abort();
826     }
827     isebp=(baser==5)?0x40:0;
828    
829     emit_byte(0x66);
830     emit_byte(0x89);
831     emit_byte(0x04+8*s+isebp);
832     emit_byte(baser+8*index+0x40*fi);
833     if (isebp)
834     emit_byte(0x00);
835     }
836     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
837    
838     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
839     {
840     int fi;
841     int isebp;
842    
843     switch(factor) {
844     case 1: fi=0; break;
845     case 2: fi=1; break;
846     case 4: fi=2; break;
847     case 8: fi=3; break;
848     default: abort();
849     }
850     isebp=(baser==5)?0x40:0;
851    
852     emit_byte(0x88);
853     emit_byte(0x04+8*s+isebp);
854     emit_byte(baser+8*index+0x40*fi);
855     if (isebp)
856     emit_byte(0x00);
857     }
858     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
859    
860     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
861     {
862     int fi;
863    
864     switch(factor) {
865     case 1: fi=0; break;
866     case 2: fi=1; break;
867     case 4: fi=2; break;
868     case 8: fi=3; break;
869     default: abort();
870     }
871    
872     emit_byte(0x89);
873     emit_byte(0x84+8*s);
874     emit_byte(baser+8*index+0x40*fi);
875     emit_long(base);
876     }
877     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
878    
879     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
880     {
881     int fi;
882    
883     switch(factor) {
884     case 1: fi=0; break;
885     case 2: fi=1; break;
886     case 4: fi=2; break;
887     case 8: fi=3; break;
888     default: abort();
889     }
890    
891     emit_byte(0x66);
892     emit_byte(0x89);
893     emit_byte(0x84+8*s);
894     emit_byte(baser+8*index+0x40*fi);
895     emit_long(base);
896     }
897     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
898    
899     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
900     {
901     int fi;
902    
903     switch(factor) {
904     case 1: fi=0; break;
905     case 2: fi=1; break;
906     case 4: fi=2; break;
907     case 8: fi=3; break;
908     default: abort();
909     }
910    
911     emit_byte(0x88);
912     emit_byte(0x84+8*s);
913     emit_byte(baser+8*index+0x40*fi);
914     emit_long(base);
915     }
916     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
917    
918     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
919     {
920     int fi;
921    
922     switch(factor) {
923     case 1: fi=0; break;
924     case 2: fi=1; break;
925     case 4: fi=2; break;
926     case 8: fi=3; break;
927     default: abort();
928     }
929    
930     emit_byte(0x8b);
931     emit_byte(0x84+8*d);
932     emit_byte(baser+8*index+0x40*fi);
933     emit_long(base);
934     }
935     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
936    
937     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
938     {
939     int fi;
940    
941     switch(factor) {
942     case 1: fi=0; break;
943     case 2: fi=1; break;
944     case 4: fi=2; break;
945     case 8: fi=3; break;
946     default: abort();
947     }
948    
949     emit_byte(0x66);
950     emit_byte(0x8b);
951     emit_byte(0x84+8*d);
952     emit_byte(baser+8*index+0x40*fi);
953     emit_long(base);
954     }
955     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
956    
957     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
958     {
959     int fi;
960    
961     switch(factor) {
962     case 1: fi=0; break;
963     case 2: fi=1; break;
964     case 4: fi=2; break;
965     case 8: fi=3; break;
966     default: abort();
967     }
968    
969     emit_byte(0x8a);
970     emit_byte(0x84+8*d);
971     emit_byte(baser+8*index+0x40*fi);
972     emit_long(base);
973     }
974     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
975    
976     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
977     {
978     int fi;
979     switch(factor) {
980     case 1: fi=0; break;
981     case 2: fi=1; break;
982     case 4: fi=2; break;
983     case 8: fi=3; break;
984     default:
985     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
986     abort();
987     }
988     emit_byte(0x8b);
989     emit_byte(0x04+8*d);
990     emit_byte(0x05+8*index+64*fi);
991     emit_long(base);
992     }
993     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
994    
995     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
996     {
997     int fi;
998     switch(factor) {
999     case 1: fi=0; break;
1000     case 2: fi=1; break;
1001     case 4: fi=2; break;
1002     case 8: fi=3; break;
1003     default:
1004     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1005     abort();
1006     }
1007     if (have_cmov) {
1008     emit_byte(0x0f);
1009     emit_byte(0x40+cond);
1010     emit_byte(0x04+8*d);
1011     emit_byte(0x05+8*index+64*fi);
1012     emit_long(base);
1013     }
1014     else { /* replacement using branch and mov */
1015     int uncc=(cond^1);
1016     emit_byte(0x70+uncc);
1017     emit_byte(7); /* skip next 7 bytes if not cc=true */
1018     emit_byte(0x8b);
1019     emit_byte(0x04+8*d);
1020     emit_byte(0x05+8*index+64*fi);
1021     emit_long(base);
1022     }
1023     }
1024     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1025    
1026     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1027     {
1028     if (have_cmov) {
1029     emit_byte(0x0f);
1030     emit_byte(0x40+cond);
1031     emit_byte(0x05+8*d);
1032     emit_long(mem);
1033     }
1034     else { /* replacement using branch and mov */
1035     int uncc=(cond^1);
1036     emit_byte(0x70+uncc);
1037     emit_byte(6); /* skip next 6 bytes if not cc=true */
1038     emit_byte(0x8b);
1039     emit_byte(0x05+8*d);
1040     emit_long(mem);
1041     }
1042     }
1043     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1044    
1045     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1046     {
1047     emit_byte(0x8b);
1048     emit_byte(0x40+8*d+s);
1049     emit_byte(offset);
1050     }
1051     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1052    
1053     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1054     {
1055     emit_byte(0x66);
1056     emit_byte(0x8b);
1057     emit_byte(0x40+8*d+s);
1058     emit_byte(offset);
1059     }
1060     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1061    
1062     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1063     {
1064     emit_byte(0x8a);
1065     emit_byte(0x40+8*d+s);
1066     emit_byte(offset);
1067     }
1068     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1069    
1070     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1071     {
1072     emit_byte(0x8b);
1073     emit_byte(0x80+8*d+s);
1074     emit_long(offset);
1075     }
1076     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1077    
1078     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1079     {
1080     emit_byte(0x66);
1081     emit_byte(0x8b);
1082     emit_byte(0x80+8*d+s);
1083     emit_long(offset);
1084     }
1085     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1086    
1087     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1088     {
1089     emit_byte(0x8a);
1090     emit_byte(0x80+8*d+s);
1091     emit_long(offset);
1092     }
1093     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1094    
1095     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1096     {
1097     emit_byte(0xc7);
1098     emit_byte(0x40+d);
1099     emit_byte(offset);
1100     emit_long(i);
1101     }
1102     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1103    
1104     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1105     {
1106     emit_byte(0x66);
1107     emit_byte(0xc7);
1108     emit_byte(0x40+d);
1109     emit_byte(offset);
1110     emit_word(i);
1111     }
1112     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1113    
1114     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1115     {
1116     emit_byte(0xc6);
1117     emit_byte(0x40+d);
1118     emit_byte(offset);
1119     emit_byte(i);
1120     }
1121     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1122    
1123     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1124     {
1125     emit_byte(0x89);
1126     emit_byte(0x40+8*s+d);
1127     emit_byte(offset);
1128     }
1129     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1130    
1131     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1132     {
1133     emit_byte(0x66);
1134     emit_byte(0x89);
1135     emit_byte(0x40+8*s+d);
1136     emit_byte(offset);
1137     }
1138     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1139    
1140     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1141     {
1142     emit_byte(0x88);
1143     emit_byte(0x40+8*s+d);
1144     emit_byte(offset);
1145     }
1146     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1147    
1148     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1149     {
1150     if (optimize_imm8 && isbyte(offset)) {
1151     emit_byte(0x8d);
1152     emit_byte(0x40+8*d+s);
1153     emit_byte(offset);
1154     }
1155     else {
1156     emit_byte(0x8d);
1157     emit_byte(0x80+8*d+s);
1158     emit_long(offset);
1159     }
1160     }
1161     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1162    
1163     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1164     {
1165     int fi;
1166    
1167     switch(factor) {
1168     case 1: fi=0; break;
1169     case 2: fi=1; break;
1170     case 4: fi=2; break;
1171     case 8: fi=3; break;
1172     default: abort();
1173     }
1174    
1175     if (optimize_imm8 && isbyte(offset)) {
1176     emit_byte(0x8d);
1177     emit_byte(0x44+8*d);
1178     emit_byte(0x40*fi+8*index+s);
1179     emit_byte(offset);
1180     }
1181     else {
1182     emit_byte(0x8d);
1183     emit_byte(0x84+8*d);
1184     emit_byte(0x40*fi+8*index+s);
1185     emit_long(offset);
1186     }
1187     }
1188     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1189    
1190     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1191     {
1192     int isebp=(s==5)?0x40:0;
1193     int fi;
1194    
1195     switch(factor) {
1196     case 1: fi=0; break;
1197     case 2: fi=1; break;
1198     case 4: fi=2; break;
1199     case 8: fi=3; break;
1200     default: abort();
1201     }
1202    
1203     emit_byte(0x8d);
1204     emit_byte(0x04+8*d+isebp);
1205     emit_byte(0x40*fi+8*index+s);
1206     if (isebp)
1207     emit_byte(0);
1208     }
1209     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1210    
1211     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1212     {
1213     if (optimize_imm8 && isbyte(offset)) {
1214     emit_byte(0x89);
1215     emit_byte(0x40+8*s+d);
1216     emit_byte(offset);
1217     }
1218     else {
1219     emit_byte(0x89);
1220     emit_byte(0x80+8*s+d);
1221     emit_long(offset);
1222     }
1223     }
1224     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1225    
1226     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1227     {
1228     emit_byte(0x66);
1229     emit_byte(0x89);
1230     emit_byte(0x80+8*s+d);
1231     emit_long(offset);
1232     }
1233     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1234    
1235     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1236     {
1237     if (optimize_imm8 && isbyte(offset)) {
1238     emit_byte(0x88);
1239     emit_byte(0x40+8*s+d);
1240     emit_byte(offset);
1241     }
1242     else {
1243     emit_byte(0x88);
1244     emit_byte(0x80+8*s+d);
1245     emit_long(offset);
1246     }
1247     }
1248     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1249    
1250     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1251     {
1252     emit_byte(0x0f);
1253     emit_byte(0xc8+r);
1254     }
1255     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1256    
1257     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1258     {
1259     emit_byte(0x66);
1260     emit_byte(0xc1);
1261     emit_byte(0xc0+r);
1262     emit_byte(0x08);
1263     }
1264     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1265    
1266     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1267     {
1268     emit_byte(0x89);
1269     emit_byte(0xc0+8*s+d);
1270     }
1271     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1272    
1273     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1274     {
1275     emit_byte(0x89);
1276     emit_byte(0x05+8*s);
1277     emit_long(d);
1278     }
1279     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1280    
1281     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1282     {
1283     emit_byte(0x66);
1284     emit_byte(0x89);
1285     emit_byte(0x05+8*s);
1286     emit_long(d);
1287     }
1288     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1289    
1290     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1291     {
1292     emit_byte(0x66);
1293     emit_byte(0x8b);
1294     emit_byte(0x05+8*d);
1295     emit_long(s);
1296     }
1297     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1298    
1299     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1300     {
1301     emit_byte(0x88);
1302     emit_byte(0x05+8*s);
1303     emit_long(d);
1304     }
1305     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1306    
1307     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1308     {
1309     emit_byte(0x8a);
1310     emit_byte(0x05+8*d);
1311     emit_long(s);
1312     }
1313     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1314    
1315     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1316     {
1317     emit_byte(0xb8+d);
1318     emit_long(s);
1319     }
1320     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1321    
1322     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1323     {
1324     emit_byte(0x66);
1325     emit_byte(0xb8+d);
1326     emit_word(s);
1327     }
1328     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1329    
1330     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1331     {
1332     emit_byte(0xb0+d);
1333     emit_byte(s);
1334     }
1335     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1336    
1337     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1338     {
1339     emit_byte(0x81);
1340     emit_byte(0x15);
1341     emit_long(d);
1342     emit_long(s);
1343     }
1344     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1345    
1346     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1347     {
1348     if (optimize_imm8 && isbyte(s)) {
1349     emit_byte(0x83);
1350     emit_byte(0x05);
1351     emit_long(d);
1352     emit_byte(s);
1353     }
1354     else {
1355     emit_byte(0x81);
1356     emit_byte(0x05);
1357     emit_long(d);
1358     emit_long(s);
1359     }
1360     }
1361     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1362    
1363     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1364     {
1365     emit_byte(0x66);
1366     emit_byte(0x81);
1367     emit_byte(0x05);
1368     emit_long(d);
1369     emit_word(s);
1370     }
1371     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1372    
1373     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1374     {
1375     emit_byte(0x80);
1376     emit_byte(0x05);
1377     emit_long(d);
1378     emit_byte(s);
1379     }
1380     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1381    
1382     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1383     {
1384 gbeauche 1.2 if (optimize_accum && isaccum(d))
1385     emit_byte(0xa9);
1386     else {
1387 gbeauche 1.1 emit_byte(0xf7);
1388     emit_byte(0xc0+d);
1389 gbeauche 1.2 }
1390 gbeauche 1.1 emit_long(i);
1391     }
1392     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1393    
1394     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1395     {
1396     emit_byte(0x85);
1397     emit_byte(0xc0+8*s+d);
1398     }
1399     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1400    
1401     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1402     {
1403     emit_byte(0x66);
1404     emit_byte(0x85);
1405     emit_byte(0xc0+8*s+d);
1406     }
1407     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1408    
1409     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1410     {
1411     emit_byte(0x84);
1412     emit_byte(0xc0+8*s+d);
1413     }
1414     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1415    
1416     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1417     {
1418     if (optimize_imm8 && isbyte(i)) {
1419 gbeauche 1.2 emit_byte(0x83);
1420     emit_byte(0xe0+d);
1421     emit_byte(i);
1422 gbeauche 1.1 }
1423     else {
1424 gbeauche 1.2 if (optimize_accum && isaccum(d))
1425     emit_byte(0x25);
1426     else {
1427     emit_byte(0x81);
1428     emit_byte(0xe0+d);
1429     }
1430     emit_long(i);
1431 gbeauche 1.1 }
1432     }
1433     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1434    
1435     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1436     {
1437 gbeauche 1.2 emit_byte(0x66);
1438     if (optimize_imm8 && isbyte(i)) {
1439     emit_byte(0x83);
1440     emit_byte(0xe0+d);
1441     emit_byte(i);
1442     }
1443     else {
1444     if (optimize_accum && isaccum(d))
1445     emit_byte(0x25);
1446     else {
1447     emit_byte(0x81);
1448     emit_byte(0xe0+d);
1449     }
1450     emit_word(i);
1451     }
1452 gbeauche 1.1 }
1453     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1454    
1455     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1456     {
1457     emit_byte(0x21);
1458     emit_byte(0xc0+8*s+d);
1459     }
1460     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1461    
1462     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1463     {
1464     emit_byte(0x66);
1465     emit_byte(0x21);
1466     emit_byte(0xc0+8*s+d);
1467     }
1468     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1469    
1470     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1471     {
1472     emit_byte(0x20);
1473     emit_byte(0xc0+8*s+d);
1474     }
1475     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1476    
1477     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1478     {
1479     if (optimize_imm8 && isbyte(i)) {
1480     emit_byte(0x83);
1481     emit_byte(0xc8+d);
1482     emit_byte(i);
1483     }
1484     else {
1485 gbeauche 1.2 if (optimize_accum && isaccum(d))
1486     emit_byte(0x0d);
1487     else {
1488 gbeauche 1.1 emit_byte(0x81);
1489     emit_byte(0xc8+d);
1490 gbeauche 1.2 }
1491 gbeauche 1.1 emit_long(i);
1492     }
1493     }
1494     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1495    
1496     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1497     {
1498     emit_byte(0x09);
1499     emit_byte(0xc0+8*s+d);
1500     }
1501     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1502    
1503     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1504     {
1505     emit_byte(0x66);
1506     emit_byte(0x09);
1507     emit_byte(0xc0+8*s+d);
1508     }
1509     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1510    
1511     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1512     {
1513     emit_byte(0x08);
1514     emit_byte(0xc0+8*s+d);
1515     }
1516     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1517    
1518     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1519     {
1520     emit_byte(0x11);
1521     emit_byte(0xc0+8*s+d);
1522     }
1523     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1524    
1525     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1526     {
1527     emit_byte(0x66);
1528     emit_byte(0x11);
1529     emit_byte(0xc0+8*s+d);
1530     }
1531     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1532    
1533     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1534     {
1535     emit_byte(0x10);
1536     emit_byte(0xc0+8*s+d);
1537     }
1538     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1539    
1540     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1541     {
1542     emit_byte(0x01);
1543     emit_byte(0xc0+8*s+d);
1544     }
1545     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1546    
1547     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1548     {
1549     emit_byte(0x66);
1550     emit_byte(0x01);
1551     emit_byte(0xc0+8*s+d);
1552     }
1553     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1554    
1555     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1556     {
1557     emit_byte(0x00);
1558     emit_byte(0xc0+8*s+d);
1559     }
1560     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1561    
1562     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1563     {
1564     if (isbyte(i)) {
1565     emit_byte(0x83);
1566     emit_byte(0xe8+d);
1567     emit_byte(i);
1568     }
1569     else {
1570 gbeauche 1.2 if (optimize_accum && isaccum(d))
1571     emit_byte(0x2d);
1572     else {
1573 gbeauche 1.1 emit_byte(0x81);
1574     emit_byte(0xe8+d);
1575 gbeauche 1.2 }
1576 gbeauche 1.1 emit_long(i);
1577     }
1578     }
1579     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1580    
1581     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1582     {
1583 gbeauche 1.2 if (optimize_accum && isaccum(d))
1584     emit_byte(0x2c);
1585     else {
1586 gbeauche 1.1 emit_byte(0x80);
1587     emit_byte(0xe8+d);
1588 gbeauche 1.2 }
1589 gbeauche 1.1 emit_byte(i);
1590     }
1591     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1592    
1593     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1594     {
1595     if (isbyte(i)) {
1596     emit_byte(0x83);
1597     emit_byte(0xc0+d);
1598     emit_byte(i);
1599     }
1600     else {
1601 gbeauche 1.2 if (optimize_accum && isaccum(d))
1602     emit_byte(0x05);
1603     else {
1604 gbeauche 1.1 emit_byte(0x81);
1605     emit_byte(0xc0+d);
1606 gbeauche 1.2 }
1607 gbeauche 1.1 emit_long(i);
1608     }
1609     }
1610     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1611    
1612     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1613     {
1614 gbeauche 1.2 emit_byte(0x66);
1615 gbeauche 1.1 if (isbyte(i)) {
1616     emit_byte(0x83);
1617     emit_byte(0xc0+d);
1618     emit_byte(i);
1619     }
1620     else {
1621 gbeauche 1.2 if (optimize_accum && isaccum(d))
1622     emit_byte(0x05);
1623     else {
1624 gbeauche 1.1 emit_byte(0x81);
1625     emit_byte(0xc0+d);
1626 gbeauche 1.2 }
1627 gbeauche 1.1 emit_word(i);
1628     }
1629     }
1630     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1631    
1632     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1633     {
1634 gbeauche 1.2 if (optimize_accum && isaccum(d))
1635     emit_byte(0x04);
1636     else {
1637     emit_byte(0x80);
1638     emit_byte(0xc0+d);
1639     }
1640 gbeauche 1.1 emit_byte(i);
1641     }
1642     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1643    
1644     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1645     {
1646     emit_byte(0x19);
1647     emit_byte(0xc0+8*s+d);
1648     }
1649     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1650    
1651     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1652     {
1653     emit_byte(0x66);
1654     emit_byte(0x19);
1655     emit_byte(0xc0+8*s+d);
1656     }
1657     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1658    
1659     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1660     {
1661     emit_byte(0x18);
1662     emit_byte(0xc0+8*s+d);
1663     }
1664     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1665    
1666     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1667     {
1668     emit_byte(0x29);
1669     emit_byte(0xc0+8*s+d);
1670     }
1671     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1672    
1673     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1674     {
1675     emit_byte(0x66);
1676     emit_byte(0x29);
1677     emit_byte(0xc0+8*s+d);
1678     }
1679     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1680    
1681     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1682     {
1683     emit_byte(0x28);
1684     emit_byte(0xc0+8*s+d);
1685     }
1686     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1687    
1688     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1689     {
1690     emit_byte(0x39);
1691     emit_byte(0xc0+8*s+d);
1692     }
1693     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1694    
1695     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1696     {
1697     if (optimize_imm8 && isbyte(i)) {
1698     emit_byte(0x83);
1699     emit_byte(0xf8+r);
1700     emit_byte(i);
1701     }
1702     else {
1703 gbeauche 1.2 if (optimize_accum && isaccum(r))
1704     emit_byte(0x3d);
1705     else {
1706 gbeauche 1.1 emit_byte(0x81);
1707     emit_byte(0xf8+r);
1708 gbeauche 1.2 }
1709 gbeauche 1.1 emit_long(i);
1710     }
1711     }
1712     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1713    
1714     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1715     {
1716     emit_byte(0x66);
1717     emit_byte(0x39);
1718     emit_byte(0xc0+8*s+d);
1719     }
1720     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1721    
1722     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1723     {
1724 gbeauche 1.2 if (optimize_accum && isaccum(d))
1725     emit_byte(0x3c);
1726     else {
1727 gbeauche 1.1 emit_byte(0x80);
1728     emit_byte(0xf8+d);
1729 gbeauche 1.2 }
1730 gbeauche 1.1 emit_byte(i);
1731     }
1732     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1733    
1734     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1735     {
1736     emit_byte(0x38);
1737     emit_byte(0xc0+8*s+d);
1738     }
1739     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1740    
1741     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1742     {
1743     int fi;
1744    
1745     switch(factor) {
1746     case 1: fi=0; break;
1747     case 2: fi=1; break;
1748     case 4: fi=2; break;
1749     case 8: fi=3; break;
1750     default: abort();
1751     }
1752     emit_byte(0x39);
1753     emit_byte(0x04+8*d);
1754     emit_byte(5+8*index+0x40*fi);
1755     emit_long(offset);
1756     }
1757     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1758    
1759     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1760     {
1761     emit_byte(0x31);
1762     emit_byte(0xc0+8*s+d);
1763     }
1764     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1765    
1766     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1767     {
1768     emit_byte(0x66);
1769     emit_byte(0x31);
1770     emit_byte(0xc0+8*s+d);
1771     }
1772     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1773    
1774     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1775     {
1776     emit_byte(0x30);
1777     emit_byte(0xc0+8*s+d);
1778     }
1779     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1780    
1781     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1782     {
1783     if (optimize_imm8 && isbyte(s)) {
1784     emit_byte(0x83);
1785     emit_byte(0x2d);
1786     emit_long(d);
1787     emit_byte(s);
1788     }
1789     else {
1790     emit_byte(0x81);
1791     emit_byte(0x2d);
1792     emit_long(d);
1793     emit_long(s);
1794     }
1795     }
1796     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1797    
1798     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1799     {
1800     if (optimize_imm8 && isbyte(s)) {
1801     emit_byte(0x83);
1802     emit_byte(0x3d);
1803     emit_long(d);
1804     emit_byte(s);
1805     }
1806     else {
1807     emit_byte(0x81);
1808     emit_byte(0x3d);
1809     emit_long(d);
1810     emit_long(s);
1811     }
1812     }
1813     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1814    
1815     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1816     {
1817     emit_byte(0x87);
1818     emit_byte(0xc0+8*r1+r2);
1819     }
1820     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1821    
1822     /*************************************************************************
1823     * FIXME: string-related instructions *
1824     *************************************************************************/
1825    
1826     LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1827     {
1828     emit_byte(0xfc);
1829     }
1830     LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1831    
1832     LOWFUNC(WRITE,NONE,0,raw_std,(void))
1833     {
1834     emit_byte(0xfd);
1835     }
1836     LENDFUNC(WRITE,NONE,0,raw_std,(void))
1837    
1838     LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1839     {
1840     emit_byte(0xa4);
1841     }
1842     LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1843    
1844     LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1845     {
1846     emit_byte(0xa5);
1847     }
1848     LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1849    
1850     LOWFUNC(NONE,RMW,0,raw_rep,(void))
1851     {
1852     emit_byte(0xf3);
1853     }
1854     LENDFUNC(NONE,RMW,0,raw_rep,(void))
1855    
1856     LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1857     {
1858     raw_rep();
1859     raw_movs_b();
1860     }
1861     LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1862    
1863     LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1864     {
1865     raw_rep();
1866     raw_movs_l();
1867     }
1868     LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1869    
1870     /*************************************************************************
1871     * FIXME: mem access modes probably wrong *
1872     *************************************************************************/
1873    
1874     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1875     {
1876     emit_byte(0x9c);
1877     }
1878     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1879    
1880     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1881     {
1882     emit_byte(0x9d);
1883     }
1884     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1885    
1886     /*************************************************************************
1887     * Unoptimizable stuff --- jump *
1888     *************************************************************************/
1889    
1890     static __inline__ void raw_call_r(R4 r)
1891     {
1892     emit_byte(0xff);
1893     emit_byte(0xd0+r);
1894     }
1895    
1896     static __inline__ void raw_jmp_r(R4 r)
1897     {
1898     emit_byte(0xff);
1899     emit_byte(0xe0+r);
1900     }
1901    
1902     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1903     {
1904     int mu;
1905     switch(m) {
1906     case 1: mu=0; break;
1907     case 2: mu=1; break;
1908     case 4: mu=2; break;
1909     case 8: mu=3; break;
1910     default: abort();
1911     }
1912     emit_byte(0xff);
1913     emit_byte(0x24);
1914     emit_byte(0x05+8*r+0x40*mu);
1915     emit_long(base);
1916     }
1917    
1918     static __inline__ void raw_jmp_m(uae_u32 base)
1919     {
1920     emit_byte(0xff);
1921     emit_byte(0x25);
1922     emit_long(base);
1923     }
1924    
1925    
1926     static __inline__ void raw_call(uae_u32 t)
1927     {
1928     emit_byte(0xe8);
1929     emit_long(t-(uae_u32)target-4);
1930     }
1931    
1932     static __inline__ void raw_jmp(uae_u32 t)
1933     {
1934     emit_byte(0xe9);
1935     emit_long(t-(uae_u32)target-4);
1936     }
1937    
1938     static __inline__ void raw_jl(uae_u32 t)
1939     {
1940     emit_byte(0x0f);
1941     emit_byte(0x8c);
1942     emit_long(t-(uae_u32)target-4);
1943     }
1944    
1945     static __inline__ void raw_jz(uae_u32 t)
1946     {
1947     emit_byte(0x0f);
1948     emit_byte(0x84);
1949     emit_long(t-(uae_u32)target-4);
1950     }
1951    
1952     static __inline__ void raw_jnz(uae_u32 t)
1953     {
1954     emit_byte(0x0f);
1955     emit_byte(0x85);
1956     emit_long(t-(uae_u32)target-4);
1957     }
1958    
1959     static __inline__ void raw_jnz_l_oponly(void)
1960     {
1961     emit_byte(0x0f);
1962     emit_byte(0x85);
1963     }
1964    
1965     static __inline__ void raw_jcc_l_oponly(int cc)
1966     {
1967     emit_byte(0x0f);
1968     emit_byte(0x80+cc);
1969     }
1970    
1971     static __inline__ void raw_jnz_b_oponly(void)
1972     {
1973     emit_byte(0x75);
1974     }
1975    
1976     static __inline__ void raw_jz_b_oponly(void)
1977     {
1978     emit_byte(0x74);
1979     }
1980    
1981     static __inline__ void raw_jcc_b_oponly(int cc)
1982     {
1983     emit_byte(0x70+cc);
1984     }
1985    
1986     static __inline__ void raw_jmp_l_oponly(void)
1987     {
1988     emit_byte(0xe9);
1989     }
1990    
1991     static __inline__ void raw_jmp_b_oponly(void)
1992     {
1993     emit_byte(0xeb);
1994     }
1995    
1996     static __inline__ void raw_ret(void)
1997     {
1998     emit_byte(0xc3);
1999     }
2000    
2001     static __inline__ void raw_nop(void)
2002     {
2003     emit_byte(0x90);
2004     }
2005    
2006    
2007     /*************************************************************************
2008     * Flag handling, to and fro UAE flag register *
2009     *************************************************************************/
2010    
2011     #ifdef SAHF_SETO_PROFITABLE
2012    
2013     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
2014    
2015     static __inline__ void raw_flags_to_reg(int r)
2016     {
2017     raw_lahf(0); /* Most flags in AH */
2018     //raw_setcc(r,0); /* V flag in AL */
2019     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2020    
2021     #if 1 /* Let's avoid those nasty partial register stalls */
2022     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2023     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
2024     //live.state[FLAGTMP].status=CLEAN;
2025     live.state[FLAGTMP].status=INMEM;
2026     live.state[FLAGTMP].realreg=-1;
2027     /* We just "evicted" FLAGTMP. */
2028     if (live.nat[r].nholds!=1) {
2029     /* Huh? */
2030     abort();
2031     }
2032     live.nat[r].nholds=0;
2033     #endif
2034     }
2035    
2036     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
2037     static __inline__ void raw_reg_to_flags(int r)
2038     {
2039     raw_cmp_b_ri(r,-127); /* set V */
2040     raw_sahf(0);
2041     }
2042    
2043     #else
2044    
2045     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
2046     static __inline__ void raw_flags_to_reg(int r)
2047     {
2048     raw_pushfl();
2049     raw_pop_l_r(r);
2050     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
2051     // live.state[FLAGTMP].status=CLEAN;
2052     live.state[FLAGTMP].status=INMEM;
2053     live.state[FLAGTMP].realreg=-1;
2054     /* We just "evicted" FLAGTMP. */
2055     if (live.nat[r].nholds!=1) {
2056     /* Huh? */
2057     abort();
2058     }
2059     live.nat[r].nholds=0;
2060     }
2061    
2062     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2063     static __inline__ void raw_reg_to_flags(int r)
2064     {
2065     raw_push_l_r(r);
2066     raw_popfl();
2067     }
2068    
2069     #endif
2070    
2071     /* Apparently, there are enough instructions between flag store and
2072     flag reload to avoid the partial memory stall */
2073     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2074     {
2075     #if 1
2076     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2077     #else
2078     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2079     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2080     #endif
2081     }
2082    
2083     /* FLAGX is byte sized, and we *do* write it at that size */
2084     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2085     {
2086     if (live.nat[target].canbyte)
2087     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2088     else if (live.nat[target].canword)
2089     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2090     else
2091     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2092     }
2093    
2094    
2095     static __inline__ void raw_inc_sp(int off)
2096     {
2097 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
2098 gbeauche 1.1 }
2099    
2100     /*************************************************************************
2101     * Handling mistaken direct memory access *
2102     *************************************************************************/
2103    
2104     // gb-- I don't need that part for JIT Basilisk II
2105     #if defined(NATMEM_OFFSET) && 0
2106     #include <asm/sigcontext.h>
2107     #include <signal.h>
2108    
2109     #define SIG_READ 1
2110     #define SIG_WRITE 2
2111    
2112     static int in_handler=0;
2113     static uae_u8 veccode[256];
2114    
2115     static void vec(int x, struct sigcontext sc)
2116     {
2117     uae_u8* i=(uae_u8*)sc.eip;
2118     uae_u32 addr=sc.cr2;
2119     int r=-1;
2120     int size=4;
2121     int dir=-1;
2122     int len=0;
2123     int j;
2124    
2125     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2126     if (!canbang)
2127     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2128     if (in_handler)
2129     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2130    
2131     if (canbang && i>=compiled_code && i<=current_compile_p) {
2132     if (*i==0x66) {
2133     i++;
2134     size=2;
2135     len++;
2136     }
2137    
2138     switch(i[0]) {
2139     case 0x8a:
2140     if ((i[1]&0xc0)==0x80) {
2141     r=(i[1]>>3)&7;
2142     dir=SIG_READ;
2143     size=1;
2144     len+=6;
2145     break;
2146     }
2147     break;
2148     case 0x88:
2149     if ((i[1]&0xc0)==0x80) {
2150     r=(i[1]>>3)&7;
2151     dir=SIG_WRITE;
2152     size=1;
2153     len+=6;
2154     break;
2155     }
2156     break;
2157     case 0x8b:
2158     if ((i[1]&0xc0)==0x80) {
2159     r=(i[1]>>3)&7;
2160     dir=SIG_READ;
2161     len+=6;
2162     break;
2163     }
2164     if ((i[1]&0xc0)==0x40) {
2165     r=(i[1]>>3)&7;
2166     dir=SIG_READ;
2167     len+=3;
2168     break;
2169     }
2170     break;
2171     case 0x89:
2172     if ((i[1]&0xc0)==0x80) {
2173     r=(i[1]>>3)&7;
2174     dir=SIG_WRITE;
2175     len+=6;
2176     break;
2177     }
2178     if ((i[1]&0xc0)==0x40) {
2179     r=(i[1]>>3)&7;
2180     dir=SIG_WRITE;
2181     len+=3;
2182     break;
2183     }
2184     break;
2185     }
2186     }
2187    
2188     if (r!=-1) {
2189     void* pr=NULL;
2190     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2191    
2192     switch(r) {
2193     case 0: pr=&(sc.eax); break;
2194     case 1: pr=&(sc.ecx); break;
2195     case 2: pr=&(sc.edx); break;
2196     case 3: pr=&(sc.ebx); break;
2197     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2198     case 5: pr=(size>1)?
2199     (void*)(&(sc.ebp)):
2200     (void*)(((uae_u8*)&(sc.ecx))+1); break;
2201     case 6: pr=(size>1)?
2202     (void*)(&(sc.esi)):
2203     (void*)(((uae_u8*)&(sc.edx))+1); break;
2204     case 7: pr=(size>1)?
2205     (void*)(&(sc.edi)):
2206     (void*)(((uae_u8*)&(sc.ebx))+1); break;
2207     default: abort();
2208     }
2209     if (pr) {
2210     blockinfo* bi;
2211    
2212     if (currprefs.comp_oldsegv) {
2213     addr-=NATMEM_OFFSET;
2214    
2215     if ((addr>=0x10000000 && addr<0x40000000) ||
2216     (addr>=0x50000000)) {
2217     write_log("Suspicious address in %x SEGV handler.\n",addr);
2218     }
2219     if (dir==SIG_READ) {
2220     switch(size) {
2221     case 1: *((uae_u8*)pr)=get_byte(addr); break;
2222     case 2: *((uae_u16*)pr)=get_word(addr); break;
2223     case 4: *((uae_u32*)pr)=get_long(addr); break;
2224     default: abort();
2225     }
2226     }
2227     else { /* write */
2228     switch(size) {
2229     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2230     case 2: put_word(addr,*((uae_u16*)pr)); break;
2231     case 4: put_long(addr,*((uae_u32*)pr)); break;
2232     default: abort();
2233     }
2234     }
2235     write_log("Handled one access!\n");
2236     fflush(stdout);
2237     segvcount++;
2238     sc.eip+=len;
2239     }
2240     else {
2241     void* tmp=target;
2242     int i;
2243     uae_u8 vecbuf[5];
2244    
2245     addr-=NATMEM_OFFSET;
2246    
2247     if ((addr>=0x10000000 && addr<0x40000000) ||
2248     (addr>=0x50000000)) {
2249     write_log("Suspicious address in %x SEGV handler.\n",addr);
2250     }
2251    
2252     target=(uae_u8*)sc.eip;
2253     for (i=0;i<5;i++)
2254     vecbuf[i]=target[i];
2255     emit_byte(0xe9);
2256     emit_long((uae_u32)veccode-(uae_u32)target-4);
2257     write_log("Create jump to %p\n",veccode);
2258    
2259     write_log("Handled one access!\n");
2260     fflush(stdout);
2261     segvcount++;
2262    
2263     target=veccode;
2264    
2265     if (dir==SIG_READ) {
2266     switch(size) {
2267     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2268     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2269     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2270     default: abort();
2271     }
2272     }
2273     else { /* write */
2274     switch(size) {
2275     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2276     case 2: put_word(addr,*((uae_u16*)pr)); break;
2277     case 4: put_long(addr,*((uae_u32*)pr)); break;
2278     default: abort();
2279     }
2280     }
2281     for (i=0;i<5;i++)
2282     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2283     raw_mov_l_mi((uae_u32)&in_handler,0);
2284     emit_byte(0xe9);
2285     emit_long(sc.eip+len-(uae_u32)target-4);
2286     in_handler=1;
2287     target=tmp;
2288     }
2289     bi=active;
2290     while (bi) {
2291     if (bi->handler &&
2292     (uae_u8*)bi->direct_handler<=i &&
2293     (uae_u8*)bi->nexthandler>i) {
2294     write_log("deleted trigger (%p<%p<%p) %p\n",
2295     bi->handler,
2296     i,
2297     bi->nexthandler,
2298     bi->pc_p);
2299     invalidate_block(bi);
2300     raise_in_cl_list(bi);
2301     set_special(0);
2302     return;
2303     }
2304     bi=bi->next;
2305     }
2306     /* Not found in the active list. Might be a rom routine that
2307     is in the dormant list */
2308     bi=dormant;
2309     while (bi) {
2310     if (bi->handler &&
2311     (uae_u8*)bi->direct_handler<=i &&
2312     (uae_u8*)bi->nexthandler>i) {
2313     write_log("deleted trigger (%p<%p<%p) %p\n",
2314     bi->handler,
2315     i,
2316     bi->nexthandler,
2317     bi->pc_p);
2318     invalidate_block(bi);
2319     raise_in_cl_list(bi);
2320     set_special(0);
2321     return;
2322     }
2323     bi=bi->next;
2324     }
2325     write_log("Huh? Could not find trigger!\n");
2326     return;
2327     }
2328     }
2329     write_log("Can't handle access!\n");
2330     for (j=0;j<10;j++) {
2331     write_log("instruction byte %2d is %02x\n",j,i[j]);
2332     }
2333     write_log("Please send the above info (starting at \"fault address\") to\n"
2334     "bmeyer@csse.monash.edu.au\n"
2335     "This shouldn't happen ;-)\n");
2336     fflush(stdout);
2337     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2338     }
2339     #endif
2340    
2341    
2342     /*************************************************************************
2343     * Checking for CPU features *
2344     *************************************************************************/
2345    
2346 gbeauche 1.3 struct cpuinfo_x86 {
2347     uae_u8 x86; // CPU family
2348     uae_u8 x86_vendor; // CPU vendor
2349     uae_u8 x86_processor; // CPU canonical processor type
2350     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
2351     uae_u32 x86_hwcap;
2352     uae_u8 x86_model;
2353     uae_u8 x86_mask;
2354     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
2355     char x86_vendor_id[16];
2356     };
2357     struct cpuinfo_x86 cpuinfo;
2358    
2359     enum {
2360     X86_VENDOR_INTEL = 0,
2361     X86_VENDOR_CYRIX = 1,
2362     X86_VENDOR_AMD = 2,
2363     X86_VENDOR_UMC = 3,
2364     X86_VENDOR_NEXGEN = 4,
2365     X86_VENDOR_CENTAUR = 5,
2366     X86_VENDOR_RISE = 6,
2367     X86_VENDOR_TRANSMETA = 7,
2368     X86_VENDOR_NSC = 8,
2369     X86_VENDOR_UNKNOWN = 0xff
2370     };
2371    
2372     enum {
2373     X86_PROCESSOR_I386, /* 80386 */
2374     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
2375     X86_PROCESSOR_PENTIUM,
2376     X86_PROCESSOR_PENTIUMPRO,
2377     X86_PROCESSOR_K6,
2378     X86_PROCESSOR_ATHLON,
2379     X86_PROCESSOR_PENTIUM4,
2380     X86_PROCESSOR_max
2381     };
2382    
2383     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2384     "80386",
2385     "80486",
2386     "Pentium",
2387     "PentiumPro",
2388     "K6",
2389     "Athlon",
2390     "Pentium4"
2391     };
2392    
2393     static struct ptt {
2394     const int align_loop;
2395     const int align_loop_max_skip;
2396     const int align_jump;
2397     const int align_jump_max_skip;
2398     const int align_func;
2399     }
2400     x86_alignments[X86_PROCESSOR_max] = {
2401     { 4, 3, 4, 3, 4 },
2402     { 16, 15, 16, 15, 16 },
2403     { 16, 7, 16, 7, 16 },
2404     { 16, 15, 16, 7, 16 },
2405     { 32, 7, 32, 7, 32 },
2406     { 16, 7, 64, 7, 16 },
2407     { 0, 0, 0, 0, 0 }
2408     };
2409 gbeauche 1.1
2410 gbeauche 1.3 static void
2411     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2412 gbeauche 1.1 {
2413 gbeauche 1.3 char *v = c->x86_vendor_id;
2414    
2415     if (!strcmp(v, "GenuineIntel"))
2416     c->x86_vendor = X86_VENDOR_INTEL;
2417     else if (!strcmp(v, "AuthenticAMD"))
2418     c->x86_vendor = X86_VENDOR_AMD;
2419     else if (!strcmp(v, "CyrixInstead"))
2420     c->x86_vendor = X86_VENDOR_CYRIX;
2421     else if (!strcmp(v, "Geode by NSC"))
2422     c->x86_vendor = X86_VENDOR_NSC;
2423     else if (!strcmp(v, "UMC UMC UMC "))
2424     c->x86_vendor = X86_VENDOR_UMC;
2425     else if (!strcmp(v, "CentaurHauls"))
2426     c->x86_vendor = X86_VENDOR_CENTAUR;
2427     else if (!strcmp(v, "NexGenDriven"))
2428     c->x86_vendor = X86_VENDOR_NEXGEN;
2429     else if (!strcmp(v, "RiseRiseRise"))
2430     c->x86_vendor = X86_VENDOR_RISE;
2431     else if (!strcmp(v, "GenuineTMx86") ||
2432     !strcmp(v, "TransmetaCPU"))
2433     c->x86_vendor = X86_VENDOR_TRANSMETA;
2434     else
2435     c->x86_vendor = X86_VENDOR_UNKNOWN;
2436     }
2437 gbeauche 1.1
2438 gbeauche 1.3 static void
2439     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2440     {
2441     static uae_u8 cpuid_space[256];
2442     uae_u8* tmp=get_target();
2443 gbeauche 1.1
2444 gbeauche 1.3 set_target(cpuid_space);
2445     raw_push_l_r(0); /* eax */
2446     raw_push_l_r(1); /* ecx */
2447     raw_push_l_r(2); /* edx */
2448     raw_push_l_r(3); /* ebx */
2449     raw_mov_l_rm(0,(uae_u32)&op);
2450     raw_cpuid(0);
2451     if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2452     if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2453     if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2454     if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2455     raw_pop_l_r(3);
2456     raw_pop_l_r(2);
2457     raw_pop_l_r(1);
2458     raw_pop_l_r(0);
2459     raw_ret();
2460     set_target(tmp);
2461 gbeauche 1.1
2462 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
2463 gbeauche 1.1 }
2464    
2465 gbeauche 1.3 static void
2466     raw_init_cpu(void)
2467 gbeauche 1.1 {
2468 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
2469    
2470     /* Defaults */
2471     c->x86_vendor = X86_VENDOR_UNKNOWN;
2472     c->cpuid_level = -1; /* CPUID not detected */
2473     c->x86_model = c->x86_mask = 0; /* So far unknown... */
2474     c->x86_vendor_id[0] = '\0'; /* Unset */
2475     c->x86_hwcap = 0;
2476    
2477     /* Get vendor name */
2478     c->x86_vendor_id[12] = '\0';
2479     cpuid(0x00000000,
2480     (uae_u32 *)&c->cpuid_level,
2481     (uae_u32 *)&c->x86_vendor_id[0],
2482     (uae_u32 *)&c->x86_vendor_id[8],
2483     (uae_u32 *)&c->x86_vendor_id[4]);
2484     x86_get_cpu_vendor(c);
2485    
2486     /* Intel-defined flags: level 0x00000001 */
2487     c->x86_brand_id = 0;
2488     if ( c->cpuid_level >= 0x00000001 ) {
2489     uae_u32 tfms, brand_id;
2490     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2491     c->x86 = (tfms >> 8) & 15;
2492     c->x86_model = (tfms >> 4) & 15;
2493     c->x86_brand_id = brand_id & 0xff;
2494     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2495     (c->x86 == 0xf)) {
2496     /* AMD Extended Family and Model Values */
2497     c->x86 += (tfms >> 20) & 0xff;
2498     c->x86_model += (tfms >> 12) & 0xf0;
2499     }
2500     c->x86_mask = tfms & 15;
2501     } else {
2502     /* Have CPUID level 0 only - unheard of */
2503     c->x86 = 4;
2504     }
2505    
2506     /* Canonicalize processor ID */
2507     c->x86_processor = X86_PROCESSOR_max;
2508     switch (c->x86) {
2509     case 3:
2510     c->x86_processor = X86_PROCESSOR_I386;
2511     break;
2512     case 4:
2513     c->x86_processor = X86_PROCESSOR_I486;
2514     break;
2515     case 5:
2516     if (c->x86_vendor == X86_VENDOR_AMD)
2517     c->x86_processor = X86_PROCESSOR_K6;
2518     else
2519     c->x86_processor = X86_PROCESSOR_PENTIUM;
2520     break;
2521     case 6:
2522     if (c->x86_vendor == X86_VENDOR_AMD)
2523     c->x86_processor = X86_PROCESSOR_ATHLON;
2524     else
2525     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2526     break;
2527     case 15:
2528     if (c->x86_vendor == X86_VENDOR_INTEL) {
2529     /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2530     if (c->x86_brand_id >= 8)
2531     c->x86_processor = X86_PROCESSOR_PENTIUM4;
2532     }
2533     break;
2534     }
2535     if (c->x86_processor == X86_PROCESSOR_max) {
2536     fprintf(stderr, "Error: unknown processor type\n");
2537     fprintf(stderr, " Family : %d\n", c->x86);
2538     fprintf(stderr, " Model : %d\n", c->x86_model);
2539     fprintf(stderr, " Mask : %d\n", c->x86_mask);
2540     if (c->x86_brand_id)
2541     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
2542     abort();
2543     }
2544    
2545     /* Have CMOV support? */
2546     have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2547    
2548     /* Can the host CPU suffer from partial register stalls? */
2549     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2550     #if 1
2551     /* It appears that partial register writes are a bad idea even on
2552 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
2553     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2554 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
2555     have_rat_stall = true;
2556 gbeauche 1.1 #endif
2557 gbeauche 1.3
2558     /* Alignments */
2559     if (tune_alignment) {
2560     align_loops = x86_alignments[c->x86_processor].align_loop;
2561     align_jumps = x86_alignments[c->x86_processor].align_jump;
2562     }
2563    
2564     write_log("Max CPUID level=%d Processor is %s [%s]\n",
2565     c->cpuid_level, c->x86_vendor_id,
2566     x86_processor_string_table[c->x86_processor]);
2567 gbeauche 1.1 }
2568    
2569    
2570     /*************************************************************************
2571     * FPU stuff *
2572     *************************************************************************/
2573    
2574    
2575     static __inline__ void raw_fp_init(void)
2576     {
2577     int i;
2578    
2579     for (i=0;i<N_FREGS;i++)
2580     live.spos[i]=-2;
2581     live.tos=-1; /* Stack is empty */
2582     }
2583    
2584     static __inline__ void raw_fp_cleanup_drop(void)
2585     {
2586     #if 0
2587     /* using FINIT instead of popping all the entries.
2588     Seems to have side effects --- there is display corruption in
2589     Quake when this is used */
2590     if (live.tos>1) {
2591     emit_byte(0x9b);
2592     emit_byte(0xdb);
2593     emit_byte(0xe3);
2594     live.tos=-1;
2595     }
2596     #endif
2597     while (live.tos>=1) {
2598     emit_byte(0xde);
2599     emit_byte(0xd9);
2600     live.tos-=2;
2601     }
2602     while (live.tos>=0) {
2603     emit_byte(0xdd);
2604     emit_byte(0xd8);
2605     live.tos--;
2606     }
2607     raw_fp_init();
2608     }
2609    
2610     static __inline__ void make_tos(int r)
2611     {
2612     int p,q;
2613    
2614     if (live.spos[r]<0) { /* Register not yet on stack */
2615     emit_byte(0xd9);
2616     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2617     live.tos++;
2618     live.spos[r]=live.tos;
2619     live.onstack[live.tos]=r;
2620     return;
2621     }
2622     /* Register is on stack */
2623     if (live.tos==live.spos[r])
2624     return;
2625     p=live.spos[r];
2626     q=live.onstack[live.tos];
2627    
2628     emit_byte(0xd9);
2629     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2630     live.onstack[live.tos]=r;
2631     live.spos[r]=live.tos;
2632     live.onstack[p]=q;
2633     live.spos[q]=p;
2634     }
2635    
2636     static __inline__ void make_tos2(int r, int r2)
2637     {
2638     int q;
2639    
2640     make_tos(r2); /* Put the reg that's supposed to end up in position2
2641     on top */
2642    
2643     if (live.spos[r]<0) { /* Register not yet on stack */
2644     make_tos(r); /* This will extend the stack */
2645     return;
2646     }
2647     /* Register is on stack */
2648     emit_byte(0xd9);
2649     emit_byte(0xc9); /* Move r2 into position 2 */
2650    
2651     q=live.onstack[live.tos-1];
2652     live.onstack[live.tos]=q;
2653     live.spos[q]=live.tos;
2654     live.onstack[live.tos-1]=r2;
2655     live.spos[r2]=live.tos-1;
2656    
2657     make_tos(r); /* And r into 1 */
2658     }
2659    
2660     static __inline__ int stackpos(int r)
2661     {
2662     if (live.spos[r]<0)
2663     abort();
2664     if (live.tos<live.spos[r]) {
2665     printf("Looking for spos for fnreg %d\n",r);
2666     abort();
2667     }
2668     return live.tos-live.spos[r];
2669     }
2670    
2671     static __inline__ void usereg(int r)
2672     {
2673     if (live.spos[r]<0)
2674     make_tos(r);
2675     }
2676    
2677     /* This is called with one FP value in a reg *above* tos, which it will
2678     pop off the stack if necessary */
2679     static __inline__ void tos_make(int r)
2680     {
2681     if (live.spos[r]<0) {
2682     live.tos++;
2683     live.spos[r]=live.tos;
2684     live.onstack[live.tos]=r;
2685     return;
2686     }
2687     emit_byte(0xdd);
2688     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2689     and pop it*/
2690     }
2691    
2692    
2693     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2694     {
2695     make_tos(r);
2696     emit_byte(0xdd);
2697     emit_byte(0x15);
2698     emit_long(m);
2699     }
2700     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2701    
2702     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2703     {
2704     make_tos(r);
2705     emit_byte(0xdd);
2706     emit_byte(0x1d);
2707     emit_long(m);
2708     live.onstack[live.tos]=-1;
2709     live.tos--;
2710     live.spos[r]=-2;
2711     }
2712     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2713    
2714     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2715     {
2716     emit_byte(0xdd);
2717     emit_byte(0x05);
2718     emit_long(m);
2719     tos_make(r);
2720     }
2721     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2722    
2723     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2724     {
2725     emit_byte(0xdb);
2726     emit_byte(0x05);
2727     emit_long(m);
2728     tos_make(r);
2729     }
2730     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2731    
2732     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2733     {
2734     make_tos(r);
2735     emit_byte(0xdb);
2736     emit_byte(0x15);
2737     emit_long(m);
2738     }
2739     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2740    
2741     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2742     {
2743     emit_byte(0xd9);
2744     emit_byte(0x05);
2745     emit_long(m);
2746     tos_make(r);
2747     }
2748     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2749    
2750     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2751     {
2752     make_tos(r);
2753     emit_byte(0xd9);
2754     emit_byte(0x15);
2755     emit_long(m);
2756     }
2757     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2758    
2759     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2760     {
2761     int rs;
2762    
2763     /* Stupid x87 can't write a long double to mem without popping the
2764     stack! */
2765     usereg(r);
2766     rs=stackpos(r);
2767     emit_byte(0xd9); /* Get a copy to the top of stack */
2768     emit_byte(0xc0+rs);
2769    
2770     emit_byte(0xdb); /* store and pop it */
2771     emit_byte(0x3d);
2772     emit_long(m);
2773     }
2774     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2775    
2776     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2777     {
2778     int rs;
2779    
2780     make_tos(r);
2781     emit_byte(0xdb); /* store and pop it */
2782     emit_byte(0x3d);
2783     emit_long(m);
2784     live.onstack[live.tos]=-1;
2785     live.tos--;
2786     live.spos[r]=-2;
2787     }
2788     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2789    
2790     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2791     {
2792     emit_byte(0xdb);
2793     emit_byte(0x2d);
2794     emit_long(m);
2795     tos_make(r);
2796     }
2797     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2798    
2799     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2800     {
2801     emit_byte(0xd9);
2802     emit_byte(0xeb);
2803     tos_make(r);
2804     }
2805     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2806    
2807     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2808     {
2809     emit_byte(0xd9);
2810     emit_byte(0xec);
2811     tos_make(r);
2812     }
2813     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2814    
2815     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2816     {
2817     emit_byte(0xd9);
2818     emit_byte(0xea);
2819     tos_make(r);
2820     }
2821     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2822    
2823     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2824     {
2825     emit_byte(0xd9);
2826     emit_byte(0xed);
2827     tos_make(r);
2828     }
2829     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2830    
2831     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2832     {
2833     emit_byte(0xd9);
2834     emit_byte(0xe8);
2835     tos_make(r);
2836     }
2837     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2838    
2839     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2840     {
2841     emit_byte(0xd9);
2842     emit_byte(0xee);
2843     tos_make(r);
2844     }
2845     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2846    
2847     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2848     {
2849     int ds;
2850    
2851     usereg(s);
2852     ds=stackpos(s);
2853     if (ds==0 && live.spos[d]>=0) {
2854     /* source is on top of stack, and we already have the dest */
2855     int dd=stackpos(d);
2856     emit_byte(0xdd);
2857     emit_byte(0xd0+dd);
2858     }
2859     else {
2860     emit_byte(0xd9);
2861     emit_byte(0xc0+ds); /* duplicate source on tos */
2862     tos_make(d); /* store to destination, pop if necessary */
2863     }
2864     }
2865     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2866    
2867     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2868     {
2869     emit_byte(0xd9);
2870     emit_byte(0xa8+index);
2871     emit_long(base);
2872     }
2873     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2874    
2875    
2876     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2877     {
2878     int ds;
2879    
2880     if (d!=s) {
2881     usereg(s);
2882     ds=stackpos(s);
2883     emit_byte(0xd9);
2884     emit_byte(0xc0+ds); /* duplicate source */
2885     emit_byte(0xd9);
2886     emit_byte(0xfa); /* take square root */
2887     tos_make(d); /* store to destination */
2888     }
2889     else {
2890     make_tos(d);
2891     emit_byte(0xd9);
2892     emit_byte(0xfa); /* take square root */
2893     }
2894     }
2895     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2896    
2897     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2898     {
2899     int ds;
2900    
2901     if (d!=s) {
2902     usereg(s);
2903     ds=stackpos(s);
2904     emit_byte(0xd9);
2905     emit_byte(0xc0+ds); /* duplicate source */
2906     emit_byte(0xd9);
2907     emit_byte(0xe1); /* take fabs */
2908     tos_make(d); /* store to destination */
2909     }
2910     else {
2911     make_tos(d);
2912     emit_byte(0xd9);
2913     emit_byte(0xe1); /* take fabs */
2914     }
2915     }
2916     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2917    
2918     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2919     {
2920     int ds;
2921    
2922     if (d!=s) {
2923     usereg(s);
2924     ds=stackpos(s);
2925     emit_byte(0xd9);
2926     emit_byte(0xc0+ds); /* duplicate source */
2927     emit_byte(0xd9);
2928     emit_byte(0xfc); /* take frndint */
2929     tos_make(d); /* store to destination */
2930     }
2931     else {
2932     make_tos(d);
2933     emit_byte(0xd9);
2934     emit_byte(0xfc); /* take frndint */
2935     }
2936     }
2937     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2938    
2939     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2940     {
2941     int ds;
2942    
2943     if (d!=s) {
2944     usereg(s);
2945     ds=stackpos(s);
2946     emit_byte(0xd9);
2947     emit_byte(0xc0+ds); /* duplicate source */
2948     emit_byte(0xd9);
2949     emit_byte(0xff); /* take cos */
2950     tos_make(d); /* store to destination */
2951     }
2952     else {
2953     make_tos(d);
2954     emit_byte(0xd9);
2955     emit_byte(0xff); /* take cos */
2956     }
2957     }
2958     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2959    
2960     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
2961     {
2962     int ds;
2963    
2964     if (d!=s) {
2965     usereg(s);
2966     ds=stackpos(s);
2967     emit_byte(0xd9);
2968     emit_byte(0xc0+ds); /* duplicate source */
2969     emit_byte(0xd9);
2970     emit_byte(0xfe); /* take sin */
2971     tos_make(d); /* store to destination */
2972     }
2973     else {
2974     make_tos(d);
2975     emit_byte(0xd9);
2976     emit_byte(0xfe); /* take sin */
2977     }
2978     }
2979     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
2980    
2981     double one=1;
2982     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
2983     {
2984     int ds;
2985    
2986     usereg(s);
2987     ds=stackpos(s);
2988     emit_byte(0xd9);
2989     emit_byte(0xc0+ds); /* duplicate source */
2990    
2991     emit_byte(0xd9);
2992     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
2993     emit_byte(0xd9);
2994     emit_byte(0xfc); /* rndint */
2995     emit_byte(0xd9);
2996     emit_byte(0xc9); /* swap top two elements */
2997     emit_byte(0xd8);
2998     emit_byte(0xe1); /* subtract rounded from original */
2999     emit_byte(0xd9);
3000     emit_byte(0xf0); /* f2xm1 */
3001     emit_byte(0xdc);
3002     emit_byte(0x05);
3003     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3004     emit_byte(0xd9);
3005     emit_byte(0xfd); /* and scale it */
3006     emit_byte(0xdd);
3007     emit_byte(0xd9); /* take he rounded value off */
3008     tos_make(d); /* store to destination */
3009     }
3010     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3011    
3012     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3013     {
3014     int ds;
3015    
3016     usereg(s);
3017     ds=stackpos(s);
3018     emit_byte(0xd9);
3019     emit_byte(0xc0+ds); /* duplicate source */
3020     emit_byte(0xd9);
3021     emit_byte(0xea); /* fldl2e */
3022     emit_byte(0xde);
3023     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
3024    
3025     emit_byte(0xd9);
3026     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3027     emit_byte(0xd9);
3028     emit_byte(0xfc); /* rndint */
3029     emit_byte(0xd9);
3030     emit_byte(0xc9); /* swap top two elements */
3031     emit_byte(0xd8);
3032     emit_byte(0xe1); /* subtract rounded from original */
3033     emit_byte(0xd9);
3034     emit_byte(0xf0); /* f2xm1 */
3035     emit_byte(0xdc);
3036     emit_byte(0x05);
3037     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3038     emit_byte(0xd9);
3039     emit_byte(0xfd); /* and scale it */
3040     emit_byte(0xdd);
3041     emit_byte(0xd9); /* take he rounded value off */
3042     tos_make(d); /* store to destination */
3043     }
3044     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3045    
3046     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3047     {
3048     int ds;
3049    
3050     usereg(s);
3051     ds=stackpos(s);
3052     emit_byte(0xd9);
3053     emit_byte(0xc0+ds); /* duplicate source */
3054     emit_byte(0xd9);
3055     emit_byte(0xe8); /* push '1' */
3056     emit_byte(0xd9);
3057     emit_byte(0xc9); /* swap top two */
3058     emit_byte(0xd9);
3059     emit_byte(0xf1); /* take 1*log2(x) */
3060     tos_make(d); /* store to destination */
3061     }
3062     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3063    
3064    
3065     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3066     {
3067     int ds;
3068    
3069     if (d!=s) {
3070     usereg(s);
3071     ds=stackpos(s);
3072     emit_byte(0xd9);
3073     emit_byte(0xc0+ds); /* duplicate source */
3074     emit_byte(0xd9);
3075     emit_byte(0xe0); /* take fchs */
3076     tos_make(d); /* store to destination */
3077     }
3078     else {
3079     make_tos(d);
3080     emit_byte(0xd9);
3081     emit_byte(0xe0); /* take fchs */
3082     }
3083     }
3084     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3085    
3086     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3087     {
3088     int ds;
3089    
3090     usereg(s);
3091     usereg(d);
3092    
3093     if (live.spos[s]==live.tos) {
3094     /* Source is on top of stack */
3095     ds=stackpos(d);
3096     emit_byte(0xdc);
3097     emit_byte(0xc0+ds); /* add source to dest*/
3098     }
3099     else {
3100     make_tos(d);
3101     ds=stackpos(s);
3102    
3103     emit_byte(0xd8);
3104     emit_byte(0xc0+ds); /* add source to dest*/
3105     }
3106     }
3107     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3108    
3109     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3110     {
3111     int ds;
3112    
3113     usereg(s);
3114     usereg(d);
3115    
3116     if (live.spos[s]==live.tos) {
3117     /* Source is on top of stack */
3118     ds=stackpos(d);
3119     emit_byte(0xdc);
3120     emit_byte(0xe8+ds); /* sub source from dest*/
3121     }
3122     else {
3123     make_tos(d);
3124     ds=stackpos(s);
3125    
3126     emit_byte(0xd8);
3127     emit_byte(0xe0+ds); /* sub src from dest */
3128     }
3129     }
3130     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3131    
3132     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3133     {
3134     int ds;
3135    
3136     usereg(s);
3137     usereg(d);
3138    
3139     make_tos(d);
3140     ds=stackpos(s);
3141    
3142     emit_byte(0xdd);
3143     emit_byte(0xe0+ds); /* cmp dest with source*/
3144     }
3145     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3146    
3147     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3148     {
3149     int ds;
3150    
3151     usereg(s);
3152     usereg(d);
3153    
3154     if (live.spos[s]==live.tos) {
3155     /* Source is on top of stack */
3156     ds=stackpos(d);
3157     emit_byte(0xdc);
3158     emit_byte(0xc8+ds); /* mul dest by source*/
3159     }
3160     else {
3161     make_tos(d);
3162     ds=stackpos(s);
3163    
3164     emit_byte(0xd8);
3165     emit_byte(0xc8+ds); /* mul dest by source*/
3166     }
3167     }
3168     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3169    
3170     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3171     {
3172     int ds;
3173    
3174     usereg(s);
3175     usereg(d);
3176    
3177     if (live.spos[s]==live.tos) {
3178     /* Source is on top of stack */
3179     ds=stackpos(d);
3180     emit_byte(0xdc);
3181     emit_byte(0xf8+ds); /* div dest by source */
3182     }
3183     else {
3184     make_tos(d);
3185     ds=stackpos(s);
3186    
3187     emit_byte(0xd8);
3188     emit_byte(0xf0+ds); /* div dest by source*/
3189     }
3190     }
3191     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3192    
3193     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3194     {
3195     int ds;
3196    
3197     usereg(s);
3198     usereg(d);
3199    
3200     make_tos2(d,s);
3201     ds=stackpos(s);
3202    
3203     if (ds!=1) {
3204     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3205     abort();
3206     }
3207     emit_byte(0xd9);
3208     emit_byte(0xf8); /* take rem from dest by source */
3209     }
3210     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3211    
3212     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3213     {
3214     int ds;
3215    
3216     usereg(s);
3217     usereg(d);
3218    
3219     make_tos2(d,s);
3220     ds=stackpos(s);
3221    
3222     if (ds!=1) {
3223     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3224     abort();
3225     }
3226     emit_byte(0xd9);
3227     emit_byte(0xf5); /* take rem1 from dest by source */
3228     }
3229     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3230    
3231    
3232     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3233     {
3234     make_tos(r);
3235     emit_byte(0xd9); /* ftst */
3236     emit_byte(0xe4);
3237     }
3238     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3239    
3240     /* %eax register is clobbered if target processor doesn't support fucomi */
3241     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3242     #define FFLAG_NREG EAX_INDEX
3243    
3244     static __inline__ void raw_fflags_into_flags(int r)
3245     {
3246     int p;
3247    
3248     usereg(r);
3249     p=stackpos(r);
3250    
3251     emit_byte(0xd9);
3252     emit_byte(0xee); /* Push 0 */
3253     emit_byte(0xd9);
3254     emit_byte(0xc9+p); /* swap top two around */
3255     if (have_cmov) {
3256     // gb-- fucomi is for P6 cores only, not K6-2 then...
3257     emit_byte(0xdb);
3258     emit_byte(0xe9+p); /* fucomi them */
3259     }
3260     else {
3261     emit_byte(0xdd);
3262     emit_byte(0xe1+p); /* fucom them */
3263     emit_byte(0x9b);
3264     emit_byte(0xdf);
3265     emit_byte(0xe0); /* fstsw ax */
3266     raw_sahf(0); /* sahf */
3267     }
3268     emit_byte(0xdd);
3269     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3270     }