ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.1
Committed: 2002-09-17T16:04:06Z (22 years ago) by gbeauche
Branch: MAIN
Log Message:
Import JIT compiler

File Contents

# User Rev Content
1 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
2     only target, and it's easier this way... */
3    
4     /*************************************************************************
5     * Some basic information about the the target CPU *
6     *************************************************************************/
7    
8     #define EAX_INDEX 0
9     #define ECX_INDEX 1
10     #define EDX_INDEX 2
11     #define EBX_INDEX 3
12     #define ESP_INDEX 4
13     #define EBP_INDEX 5
14     #define ESI_INDEX 6
15     #define EDI_INDEX 7
16    
17     /* The register in which subroutines return an integer return value */
18     #define REG_RESULT 0
19    
20     /* The registers subroutines take their first and second argument in */
21     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
22     /* Handle the _fastcall parameters of ECX and EDX */
23     #define REG_PAR1 1
24     #define REG_PAR2 2
25     #else
26     #define REG_PAR1 0
27     #define REG_PAR2 2
28     #endif
29    
30     /* Three registers that are not used for any of the above */
31     #define REG_NOPAR1 6
32     #define REG_NOPAR2 5
33     #define REG_NOPAR3 3
34    
35     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
36     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
37     #define REG_PC_TMP 0
38     #else
39     #define REG_PC_TMP 1 /* Another register that is not the above */
40     #endif
41    
42     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
43     -1 if any reg will do */
44     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
45     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
46    
47     uae_s8 always_used[]={4,-1};
48     uae_s8 can_byte[]={0,1,2,3,-1};
49     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
50    
51     /* cpuopti mutate instruction handlers to assume registers are saved
52     by the caller */
53     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
54    
55     /* This *should* be the same as call_saved. But:
56     - We might not really know which registers are saved, and which aren't,
57     so we need to preserve some, but don't want to rely on everyone else
58     also saving those registers
59     - Special registers (such like the stack pointer) should not be "preserved"
60     by pushing, even though they are "saved" across function calls
61     */
62     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
63    
64     /* Whether classes of instructions do or don't clobber the native flags */
65     #define CLOBBER_MOV
66     #define CLOBBER_LEA
67     #define CLOBBER_CMOV
68     #define CLOBBER_POP
69     #define CLOBBER_PUSH
70     #define CLOBBER_SUB clobber_flags()
71     #define CLOBBER_SBB clobber_flags()
72     #define CLOBBER_CMP clobber_flags()
73     #define CLOBBER_ADD clobber_flags()
74     #define CLOBBER_ADC clobber_flags()
75     #define CLOBBER_AND clobber_flags()
76     #define CLOBBER_OR clobber_flags()
77     #define CLOBBER_XOR clobber_flags()
78    
79     #define CLOBBER_ROL clobber_flags()
80     #define CLOBBER_ROR clobber_flags()
81     #define CLOBBER_SHLL clobber_flags()
82     #define CLOBBER_SHRL clobber_flags()
83     #define CLOBBER_SHRA clobber_flags()
84     #define CLOBBER_TEST clobber_flags()
85     #define CLOBBER_CL16
86     #define CLOBBER_CL8
87     #define CLOBBER_SE16
88     #define CLOBBER_SE8
89     #define CLOBBER_ZE16
90     #define CLOBBER_ZE8
91     #define CLOBBER_SW16 clobber_flags()
92     #define CLOBBER_SW32
93     #define CLOBBER_SETCC
94     #define CLOBBER_MUL clobber_flags()
95     #define CLOBBER_BT clobber_flags()
96     #define CLOBBER_BSF clobber_flags()
97    
98     const bool optimize_imm8 = true;
99     const bool optimize_shift_once = true;
100    
101     /*************************************************************************
102     * Actual encoding of the instructions on the target CPU *
103     *************************************************************************/
104    
105     static __inline__ int isbyte(uae_s32 x)
106     {
107     return (x>=-128 && x<=127);
108     }
109    
110     static __inline__ int isword(uae_s32 x)
111     {
112     return (x>=-32768 && x<=32767);
113     }
114    
115     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
116     {
117     emit_byte(0x50+r);
118     }
119     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
120    
121     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
122     {
123     emit_byte(0x58+r);
124     }
125     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
126    
127     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
128     {
129     emit_byte(0x0f);
130     emit_byte(0xba);
131     emit_byte(0xe0+r);
132     emit_byte(i);
133     }
134     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
135    
136     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
137     {
138     emit_byte(0x0f);
139     emit_byte(0xa3);
140     emit_byte(0xc0+8*b+r);
141     }
142     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
143    
144     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
145     {
146     emit_byte(0x0f);
147     emit_byte(0xba);
148     emit_byte(0xf8+r);
149     emit_byte(i);
150     }
151     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
152    
153     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
154     {
155     emit_byte(0x0f);
156     emit_byte(0xbb);
157     emit_byte(0xc0+8*b+r);
158     }
159     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
160    
161    
162     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
163     {
164     emit_byte(0x0f);
165     emit_byte(0xba);
166     emit_byte(0xf0+r);
167     emit_byte(i);
168     }
169     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
170    
171     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
172     {
173     emit_byte(0x0f);
174     emit_byte(0xb3);
175     emit_byte(0xc0+8*b+r);
176     }
177     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
178    
179     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
180     {
181     emit_byte(0x0f);
182     emit_byte(0xba);
183     emit_byte(0xe8+r);
184     emit_byte(i);
185     }
186     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
187    
188     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
189     {
190     emit_byte(0x0f);
191     emit_byte(0xab);
192     emit_byte(0xc0+8*b+r);
193     }
194     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
195    
196     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
197     {
198     emit_byte(0x66);
199     if (isbyte(i)) {
200     emit_byte(0x83);
201     emit_byte(0xe8+d);
202     emit_byte(i);
203     }
204     else {
205     emit_byte(0x81);
206     emit_byte(0xe8+d);
207     emit_word(i);
208     }
209     }
210     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
211    
212    
213     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
214     {
215     emit_byte(0x8b);
216     emit_byte(0x05+8*d);
217     emit_long(s);
218     }
219     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
220    
221     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
222     {
223     emit_byte(0xc7);
224     emit_byte(0x05);
225     emit_long(d);
226     emit_long(s);
227     }
228     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
229    
230     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
231     {
232     emit_byte(0x66);
233     emit_byte(0xc7);
234     emit_byte(0x05);
235     emit_long(d);
236     emit_word(s);
237     }
238     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
239    
240     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
241     {
242     emit_byte(0xc6);
243     emit_byte(0x05);
244     emit_long(d);
245     emit_byte(s);
246     }
247     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
248    
249     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
250     {
251     if (optimize_shift_once && (i == 1)) {
252     emit_byte(0xd0);
253     emit_byte(0x05);
254     emit_long(d);
255     }
256     else {
257     emit_byte(0xc0);
258     emit_byte(0x05);
259     emit_long(d);
260     emit_byte(i);
261     }
262     }
263     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
264    
265     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
266     {
267     if (optimize_shift_once && (i == 1)) {
268     emit_byte(0xd0);
269     emit_byte(0xc0+r);
270     }
271     else {
272     emit_byte(0xc0);
273     emit_byte(0xc0+r);
274     emit_byte(i);
275     }
276     }
277     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
278    
279     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
280     {
281     emit_byte(0x66);
282     emit_byte(0xc1);
283     emit_byte(0xc0+r);
284     emit_byte(i);
285     }
286     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
287    
288     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
289     {
290     if (optimize_shift_once && (i == 1)) {
291     emit_byte(0xd1);
292     emit_byte(0xc0+r);
293     }
294     else {
295     emit_byte(0xc1);
296     emit_byte(0xc0+r);
297     emit_byte(i);
298     }
299     }
300     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
301    
302     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
303     {
304     emit_byte(0xd3);
305     emit_byte(0xc0+d);
306     }
307     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
308    
309     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
310     {
311     emit_byte(0x66);
312     emit_byte(0xd3);
313     emit_byte(0xc0+d);
314     }
315     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
316    
317     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
318     {
319     emit_byte(0xd2);
320     emit_byte(0xc0+d);
321     }
322     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
323    
324     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
325     {
326     emit_byte(0xd3);
327     emit_byte(0xe0+d);
328     }
329     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
330    
331     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
332     {
333     emit_byte(0x66);
334     emit_byte(0xd3);
335     emit_byte(0xe0+d);
336     }
337     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
338    
339     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
340     {
341     emit_byte(0xd2);
342     emit_byte(0xe0+d);
343     }
344     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
345    
346     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
347     {
348     if (optimize_shift_once && (i == 1)) {
349     emit_byte(0xd0);
350     emit_byte(0xc8+r);
351     }
352     else {
353     emit_byte(0xc0);
354     emit_byte(0xc8+r);
355     emit_byte(i);
356     }
357     }
358     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
359    
360     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
361     {
362     emit_byte(0x66);
363     emit_byte(0xc1);
364     emit_byte(0xc8+r);
365     emit_byte(i);
366     }
367     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
368    
369     // gb-- used for making an fpcr value in compemu_fpp.cpp
370     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
371     {
372     emit_byte(0x0b);
373     emit_byte(0x05+8*d);
374     emit_long(s);
375     }
376     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
377    
378     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
379     {
380     if (optimize_shift_once && (i == 1)) {
381     emit_byte(0xd1);
382     emit_byte(0xc8+r);
383     }
384     else {
385     emit_byte(0xc1);
386     emit_byte(0xc8+r);
387     emit_byte(i);
388     }
389     }
390     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
391    
392     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
393     {
394     emit_byte(0xd3);
395     emit_byte(0xc8+d);
396     }
397     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
398    
399     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
400     {
401     emit_byte(0x66);
402     emit_byte(0xd3);
403     emit_byte(0xc8+d);
404     }
405     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
406    
407     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
408     {
409     emit_byte(0xd2);
410     emit_byte(0xc8+d);
411     }
412     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
413    
414     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
415     {
416     emit_byte(0xd3);
417     emit_byte(0xe8+d);
418     }
419     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
420    
421     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
422     {
423     emit_byte(0x66);
424     emit_byte(0xd3);
425     emit_byte(0xe8+d);
426     }
427     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
428    
429     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
430     {
431     emit_byte(0xd2);
432     emit_byte(0xe8+d);
433     }
434     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
435    
436     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
437     {
438     emit_byte(0xd3);
439     emit_byte(0xf8+d);
440     }
441     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
442    
443     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
444     {
445     emit_byte(0x66);
446     emit_byte(0xd3);
447     emit_byte(0xf8+d);
448     }
449     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
450    
451     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
452     {
453     emit_byte(0xd2);
454     emit_byte(0xf8+d);
455     }
456     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
457    
458     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
459     {
460     if (optimize_shift_once && (i == 1)) {
461     emit_byte(0xd1);
462     emit_byte(0xe0+r);
463     }
464     else {
465     emit_byte(0xc1);
466     emit_byte(0xe0+r);
467     emit_byte(i);
468     }
469     }
470     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
471    
472     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
473     {
474     emit_byte(0x66);
475     emit_byte(0xc1);
476     emit_byte(0xe0+r);
477     emit_byte(i);
478     }
479     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
480    
481     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
482     {
483     if (optimize_shift_once && (i == 1)) {
484     emit_byte(0xd0);
485     emit_byte(0xe0+r);
486     }
487     else {
488     emit_byte(0xc0);
489     emit_byte(0xe0+r);
490     emit_byte(i);
491     }
492     }
493     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
494    
495     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
496     {
497     if (optimize_shift_once && (i == 1)) {
498     emit_byte(0xd1);
499     emit_byte(0xe8+r);
500     }
501     else {
502     emit_byte(0xc1);
503     emit_byte(0xe8+r);
504     emit_byte(i);
505     }
506     }
507     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
508    
509     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
510     {
511     emit_byte(0x66);
512     emit_byte(0xc1);
513     emit_byte(0xe8+r);
514     emit_byte(i);
515     }
516     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
517    
518     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
519     {
520     if (optimize_shift_once && (i == 1)) {
521     emit_byte(0xd0);
522     emit_byte(0xe8+r);
523     }
524     else {
525     emit_byte(0xc0);
526     emit_byte(0xe8+r);
527     emit_byte(i);
528     }
529     }
530     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
531    
532     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
533     {
534     if (optimize_shift_once && (i == 1)) {
535     emit_byte(0xd1);
536     emit_byte(0xf8+r);
537     }
538     else {
539     emit_byte(0xc1);
540     emit_byte(0xf8+r);
541     emit_byte(i);
542     }
543     }
544     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
545    
546     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
547     {
548     emit_byte(0x66);
549     emit_byte(0xc1);
550     emit_byte(0xf8+r);
551     emit_byte(i);
552     }
553     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
554    
555     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
556     {
557     if (optimize_shift_once && (i == 1)) {
558     emit_byte(0xd0);
559     emit_byte(0xf8+r);
560     }
561     else {
562     emit_byte(0xc0);
563     emit_byte(0xf8+r);
564     emit_byte(i);
565     }
566     }
567     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
568    
569     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
570     {
571     emit_byte(0x9e);
572     }
573     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
574    
575     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
576     {
577     emit_byte(0x0f);
578     emit_byte(0xa2);
579     }
580     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
581    
582     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
583     {
584     emit_byte(0x9f);
585     }
586     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
587    
588     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
589     {
590     emit_byte(0x0f);
591     emit_byte(0x90+cc);
592     emit_byte(0xc0+d);
593     }
594     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
595    
596     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
597     {
598     emit_byte(0x0f);
599     emit_byte(0x90+cc);
600     emit_byte(0x05);
601     emit_long(d);
602     }
603     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
604    
605     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
606     {
607     if (have_cmov) {
608     emit_byte(0x0f);
609     emit_byte(0x40+cc);
610     emit_byte(0xc0+8*d+s);
611     }
612     else { /* replacement using branch and mov */
613     int uncc=(cc^1);
614     emit_byte(0x70+uncc);
615     emit_byte(2); /* skip next 2 bytes if not cc=true */
616     emit_byte(0x89);
617     emit_byte(0xc0+8*s+d);
618     }
619     }
620     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
621    
622     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
623     {
624     emit_byte(0x0f);
625     emit_byte(0xbc);
626     emit_byte(0xc0+8*d+s);
627     }
628     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
629    
630     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
631     {
632     emit_byte(0x0f);
633     emit_byte(0xbf);
634     emit_byte(0xc0+8*d+s);
635     }
636     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
637    
638     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
639     {
640     emit_byte(0x0f);
641     emit_byte(0xbe);
642     emit_byte(0xc0+8*d+s);
643     }
644     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
645    
646     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
647     {
648     emit_byte(0x0f);
649     emit_byte(0xb7);
650     emit_byte(0xc0+8*d+s);
651     }
652     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
653    
654     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
655     {
656     emit_byte(0x0f);
657     emit_byte(0xb6);
658     emit_byte(0xc0+8*d+s);
659     }
660     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
661    
662     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
663     {
664     emit_byte(0x0f);
665     emit_byte(0xaf);
666     emit_byte(0xc0+8*d+s);
667     }
668     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
669    
670     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
671     {
672     if (d!=MUL_NREG1 || s!=MUL_NREG2)
673     abort();
674     emit_byte(0xf7);
675     emit_byte(0xea);
676     }
677     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
678    
679     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
680     {
681     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
682     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
683     abort();
684     }
685     emit_byte(0xf7);
686     emit_byte(0xe2);
687     }
688     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
689    
690     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
691     {
692     abort(); /* %^$&%^$%#^ x86! */
693     emit_byte(0x0f);
694     emit_byte(0xaf);
695     emit_byte(0xc0+8*d+s);
696     }
697     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
698    
699     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
700     {
701     emit_byte(0x88);
702     emit_byte(0xc0+8*s+d);
703     }
704     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
705    
706     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
707     {
708     emit_byte(0x66);
709     emit_byte(0x89);
710     emit_byte(0xc0+8*s+d);
711     }
712     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
713    
714     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
715     {
716     int isebp=(baser==5)?0x40:0;
717     int fi;
718    
719     switch(factor) {
720     case 1: fi=0; break;
721     case 2: fi=1; break;
722     case 4: fi=2; break;
723     case 8: fi=3; break;
724     default: abort();
725     }
726    
727    
728     emit_byte(0x8b);
729     emit_byte(0x04+8*d+isebp);
730     emit_byte(baser+8*index+0x40*fi);
731     if (isebp)
732     emit_byte(0x00);
733     }
734     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
735    
736     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
737     {
738     int fi;
739     int isebp;
740    
741     switch(factor) {
742     case 1: fi=0; break;
743     case 2: fi=1; break;
744     case 4: fi=2; break;
745     case 8: fi=3; break;
746     default: abort();
747     }
748     isebp=(baser==5)?0x40:0;
749    
750     emit_byte(0x66);
751     emit_byte(0x8b);
752     emit_byte(0x04+8*d+isebp);
753     emit_byte(baser+8*index+0x40*fi);
754     if (isebp)
755     emit_byte(0x00);
756     }
757     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
758    
759     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
760     {
761     int fi;
762     int isebp;
763    
764     switch(factor) {
765     case 1: fi=0; break;
766     case 2: fi=1; break;
767     case 4: fi=2; break;
768     case 8: fi=3; break;
769     default: abort();
770     }
771     isebp=(baser==5)?0x40:0;
772    
773     emit_byte(0x8a);
774     emit_byte(0x04+8*d+isebp);
775     emit_byte(baser+8*index+0x40*fi);
776     if (isebp)
777     emit_byte(0x00);
778     }
779     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
780    
781     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
782     {
783     int fi;
784     int isebp;
785    
786     switch(factor) {
787     case 1: fi=0; break;
788     case 2: fi=1; break;
789     case 4: fi=2; break;
790     case 8: fi=3; break;
791     default: abort();
792     }
793    
794    
795     isebp=(baser==5)?0x40:0;
796    
797     emit_byte(0x89);
798     emit_byte(0x04+8*s+isebp);
799     emit_byte(baser+8*index+0x40*fi);
800     if (isebp)
801     emit_byte(0x00);
802     }
803     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
804    
805     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
806     {
807     int fi;
808     int isebp;
809    
810     switch(factor) {
811     case 1: fi=0; break;
812     case 2: fi=1; break;
813     case 4: fi=2; break;
814     case 8: fi=3; break;
815     default: abort();
816     }
817     isebp=(baser==5)?0x40:0;
818    
819     emit_byte(0x66);
820     emit_byte(0x89);
821     emit_byte(0x04+8*s+isebp);
822     emit_byte(baser+8*index+0x40*fi);
823     if (isebp)
824     emit_byte(0x00);
825     }
826     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
827    
828     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
829     {
830     int fi;
831     int isebp;
832    
833     switch(factor) {
834     case 1: fi=0; break;
835     case 2: fi=1; break;
836     case 4: fi=2; break;
837     case 8: fi=3; break;
838     default: abort();
839     }
840     isebp=(baser==5)?0x40:0;
841    
842     emit_byte(0x88);
843     emit_byte(0x04+8*s+isebp);
844     emit_byte(baser+8*index+0x40*fi);
845     if (isebp)
846     emit_byte(0x00);
847     }
848     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
849    
850     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
851     {
852     int fi;
853    
854     switch(factor) {
855     case 1: fi=0; break;
856     case 2: fi=1; break;
857     case 4: fi=2; break;
858     case 8: fi=3; break;
859     default: abort();
860     }
861    
862     emit_byte(0x89);
863     emit_byte(0x84+8*s);
864     emit_byte(baser+8*index+0x40*fi);
865     emit_long(base);
866     }
867     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
868    
869     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
870     {
871     int fi;
872    
873     switch(factor) {
874     case 1: fi=0; break;
875     case 2: fi=1; break;
876     case 4: fi=2; break;
877     case 8: fi=3; break;
878     default: abort();
879     }
880    
881     emit_byte(0x66);
882     emit_byte(0x89);
883     emit_byte(0x84+8*s);
884     emit_byte(baser+8*index+0x40*fi);
885     emit_long(base);
886     }
887     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
888    
889     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
890     {
891     int fi;
892    
893     switch(factor) {
894     case 1: fi=0; break;
895     case 2: fi=1; break;
896     case 4: fi=2; break;
897     case 8: fi=3; break;
898     default: abort();
899     }
900    
901     emit_byte(0x88);
902     emit_byte(0x84+8*s);
903     emit_byte(baser+8*index+0x40*fi);
904     emit_long(base);
905     }
906     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
907    
908     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
909     {
910     int fi;
911    
912     switch(factor) {
913     case 1: fi=0; break;
914     case 2: fi=1; break;
915     case 4: fi=2; break;
916     case 8: fi=3; break;
917     default: abort();
918     }
919    
920     emit_byte(0x8b);
921     emit_byte(0x84+8*d);
922     emit_byte(baser+8*index+0x40*fi);
923     emit_long(base);
924     }
925     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
926    
927     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
928     {
929     int fi;
930    
931     switch(factor) {
932     case 1: fi=0; break;
933     case 2: fi=1; break;
934     case 4: fi=2; break;
935     case 8: fi=3; break;
936     default: abort();
937     }
938    
939     emit_byte(0x66);
940     emit_byte(0x8b);
941     emit_byte(0x84+8*d);
942     emit_byte(baser+8*index+0x40*fi);
943     emit_long(base);
944     }
945     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
946    
947     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
948     {
949     int fi;
950    
951     switch(factor) {
952     case 1: fi=0; break;
953     case 2: fi=1; break;
954     case 4: fi=2; break;
955     case 8: fi=3; break;
956     default: abort();
957     }
958    
959     emit_byte(0x8a);
960     emit_byte(0x84+8*d);
961     emit_byte(baser+8*index+0x40*fi);
962     emit_long(base);
963     }
964     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
965    
966     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
967     {
968     int fi;
969     switch(factor) {
970     case 1: fi=0; break;
971     case 2: fi=1; break;
972     case 4: fi=2; break;
973     case 8: fi=3; break;
974     default:
975     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
976     abort();
977     }
978     emit_byte(0x8b);
979     emit_byte(0x04+8*d);
980     emit_byte(0x05+8*index+64*fi);
981     emit_long(base);
982     }
983     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
984    
985     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
986     {
987     int fi;
988     switch(factor) {
989     case 1: fi=0; break;
990     case 2: fi=1; break;
991     case 4: fi=2; break;
992     case 8: fi=3; break;
993     default:
994     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
995     abort();
996     }
997     if (have_cmov) {
998     emit_byte(0x0f);
999     emit_byte(0x40+cond);
1000     emit_byte(0x04+8*d);
1001     emit_byte(0x05+8*index+64*fi);
1002     emit_long(base);
1003     }
1004     else { /* replacement using branch and mov */
1005     int uncc=(cond^1);
1006     emit_byte(0x70+uncc);
1007     emit_byte(7); /* skip next 7 bytes if not cc=true */
1008     emit_byte(0x8b);
1009     emit_byte(0x04+8*d);
1010     emit_byte(0x05+8*index+64*fi);
1011     emit_long(base);
1012     }
1013     }
1014     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1015    
1016     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1017     {
1018     if (have_cmov) {
1019     emit_byte(0x0f);
1020     emit_byte(0x40+cond);
1021     emit_byte(0x05+8*d);
1022     emit_long(mem);
1023     }
1024     else { /* replacement using branch and mov */
1025     int uncc=(cond^1);
1026     emit_byte(0x70+uncc);
1027     emit_byte(6); /* skip next 6 bytes if not cc=true */
1028     emit_byte(0x8b);
1029     emit_byte(0x05+8*d);
1030     emit_long(mem);
1031     }
1032     }
1033     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1034    
1035     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1036     {
1037     emit_byte(0x8b);
1038     emit_byte(0x40+8*d+s);
1039     emit_byte(offset);
1040     }
1041     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1042    
1043     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1044     {
1045     emit_byte(0x66);
1046     emit_byte(0x8b);
1047     emit_byte(0x40+8*d+s);
1048     emit_byte(offset);
1049     }
1050     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1051    
1052     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1053     {
1054     emit_byte(0x8a);
1055     emit_byte(0x40+8*d+s);
1056     emit_byte(offset);
1057     }
1058     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1059    
1060     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1061     {
1062     emit_byte(0x8b);
1063     emit_byte(0x80+8*d+s);
1064     emit_long(offset);
1065     }
1066     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1067    
1068     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1069     {
1070     emit_byte(0x66);
1071     emit_byte(0x8b);
1072     emit_byte(0x80+8*d+s);
1073     emit_long(offset);
1074     }
1075     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1076    
1077     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1078     {
1079     emit_byte(0x8a);
1080     emit_byte(0x80+8*d+s);
1081     emit_long(offset);
1082     }
1083     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1084    
1085     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1086     {
1087     emit_byte(0xc7);
1088     emit_byte(0x40+d);
1089     emit_byte(offset);
1090     emit_long(i);
1091     }
1092     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1093    
1094     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1095     {
1096     emit_byte(0x66);
1097     emit_byte(0xc7);
1098     emit_byte(0x40+d);
1099     emit_byte(offset);
1100     emit_word(i);
1101     }
1102     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1103    
1104     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1105     {
1106     emit_byte(0xc6);
1107     emit_byte(0x40+d);
1108     emit_byte(offset);
1109     emit_byte(i);
1110     }
1111     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1112    
1113     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1114     {
1115     emit_byte(0x89);
1116     emit_byte(0x40+8*s+d);
1117     emit_byte(offset);
1118     }
1119     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1120    
1121     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1122     {
1123     emit_byte(0x66);
1124     emit_byte(0x89);
1125     emit_byte(0x40+8*s+d);
1126     emit_byte(offset);
1127     }
1128     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1129    
1130     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1131     {
1132     emit_byte(0x88);
1133     emit_byte(0x40+8*s+d);
1134     emit_byte(offset);
1135     }
1136     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1137    
1138     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1139     {
1140     if (optimize_imm8 && isbyte(offset)) {
1141     emit_byte(0x8d);
1142     emit_byte(0x40+8*d+s);
1143     emit_byte(offset);
1144     }
1145     else {
1146     emit_byte(0x8d);
1147     emit_byte(0x80+8*d+s);
1148     emit_long(offset);
1149     }
1150     }
1151     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1152    
1153     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1154     {
1155     int fi;
1156    
1157     switch(factor) {
1158     case 1: fi=0; break;
1159     case 2: fi=1; break;
1160     case 4: fi=2; break;
1161     case 8: fi=3; break;
1162     default: abort();
1163     }
1164    
1165     if (optimize_imm8 && isbyte(offset)) {
1166     emit_byte(0x8d);
1167     emit_byte(0x44+8*d);
1168     emit_byte(0x40*fi+8*index+s);
1169     emit_byte(offset);
1170     }
1171     else {
1172     emit_byte(0x8d);
1173     emit_byte(0x84+8*d);
1174     emit_byte(0x40*fi+8*index+s);
1175     emit_long(offset);
1176     }
1177     }
1178     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1179    
1180     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1181     {
1182     int isebp=(s==5)?0x40:0;
1183     int fi;
1184    
1185     switch(factor) {
1186     case 1: fi=0; break;
1187     case 2: fi=1; break;
1188     case 4: fi=2; break;
1189     case 8: fi=3; break;
1190     default: abort();
1191     }
1192    
1193     emit_byte(0x8d);
1194     emit_byte(0x04+8*d+isebp);
1195     emit_byte(0x40*fi+8*index+s);
1196     if (isebp)
1197     emit_byte(0);
1198     }
1199     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1200    
1201     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1202     {
1203     if (optimize_imm8 && isbyte(offset)) {
1204     emit_byte(0x89);
1205     emit_byte(0x40+8*s+d);
1206     emit_byte(offset);
1207     }
1208     else {
1209     emit_byte(0x89);
1210     emit_byte(0x80+8*s+d);
1211     emit_long(offset);
1212     }
1213     }
1214     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1215    
1216     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1217     {
1218     emit_byte(0x66);
1219     emit_byte(0x89);
1220     emit_byte(0x80+8*s+d);
1221     emit_long(offset);
1222     }
1223     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1224    
1225     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1226     {
1227     if (optimize_imm8 && isbyte(offset)) {
1228     emit_byte(0x88);
1229     emit_byte(0x40+8*s+d);
1230     emit_byte(offset);
1231     }
1232     else {
1233     emit_byte(0x88);
1234     emit_byte(0x80+8*s+d);
1235     emit_long(offset);
1236     }
1237     }
1238     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1239    
1240     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1241     {
1242     emit_byte(0x0f);
1243     emit_byte(0xc8+r);
1244     }
1245     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1246    
1247     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1248     {
1249     emit_byte(0x66);
1250     emit_byte(0xc1);
1251     emit_byte(0xc0+r);
1252     emit_byte(0x08);
1253     }
1254     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1255    
1256     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1257     {
1258     emit_byte(0x89);
1259     emit_byte(0xc0+8*s+d);
1260     }
1261     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1262    
1263     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1264     {
1265     emit_byte(0x89);
1266     emit_byte(0x05+8*s);
1267     emit_long(d);
1268     }
1269     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1270    
1271     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1272     {
1273     emit_byte(0x66);
1274     emit_byte(0x89);
1275     emit_byte(0x05+8*s);
1276     emit_long(d);
1277     }
1278     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1279    
1280     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1281     {
1282     emit_byte(0x66);
1283     emit_byte(0x8b);
1284     emit_byte(0x05+8*d);
1285     emit_long(s);
1286     }
1287     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1288    
1289     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1290     {
1291     emit_byte(0x88);
1292     emit_byte(0x05+8*s);
1293     emit_long(d);
1294     }
1295     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1296    
1297     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1298     {
1299     emit_byte(0x8a);
1300     emit_byte(0x05+8*d);
1301     emit_long(s);
1302     }
1303     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1304    
1305     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1306     {
1307     emit_byte(0xb8+d);
1308     emit_long(s);
1309     }
1310     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1311    
1312     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1313     {
1314     emit_byte(0x66);
1315     emit_byte(0xb8+d);
1316     emit_word(s);
1317     }
1318     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1319    
1320     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1321     {
1322     emit_byte(0xb0+d);
1323     emit_byte(s);
1324     }
1325     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1326    
1327     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1328     {
1329     emit_byte(0x81);
1330     emit_byte(0x15);
1331     emit_long(d);
1332     emit_long(s);
1333     }
1334     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1335    
1336     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1337     {
1338     if (optimize_imm8 && isbyte(s)) {
1339     emit_byte(0x83);
1340     emit_byte(0x05);
1341     emit_long(d);
1342     emit_byte(s);
1343     }
1344     else {
1345     emit_byte(0x81);
1346     emit_byte(0x05);
1347     emit_long(d);
1348     emit_long(s);
1349     }
1350     }
1351     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1352    
1353     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1354     {
1355     emit_byte(0x66);
1356     emit_byte(0x81);
1357     emit_byte(0x05);
1358     emit_long(d);
1359     emit_word(s);
1360     }
1361     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1362    
1363     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1364     {
1365     emit_byte(0x80);
1366     emit_byte(0x05);
1367     emit_long(d);
1368     emit_byte(s);
1369     }
1370     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1371    
1372     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1373     {
1374     emit_byte(0xf7);
1375     emit_byte(0xc0+d);
1376     emit_long(i);
1377     }
1378     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1379    
1380     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1381     {
1382     emit_byte(0x85);
1383     emit_byte(0xc0+8*s+d);
1384     }
1385     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1386    
1387     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1388     {
1389     emit_byte(0x66);
1390     emit_byte(0x85);
1391     emit_byte(0xc0+8*s+d);
1392     }
1393     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1394    
1395     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1396     {
1397     emit_byte(0x84);
1398     emit_byte(0xc0+8*s+d);
1399     }
1400     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1401    
1402     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1403     {
1404     if (optimize_imm8 && isbyte(i)) {
1405     emit_byte(0x83);
1406     emit_byte(0xe0+d);
1407     emit_byte(i);
1408     }
1409     else {
1410     emit_byte(0x81);
1411     emit_byte(0xe0+d);
1412     emit_long(i);
1413     }
1414     }
1415     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1416    
1417     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1418     {
1419     emit_byte(0x66);
1420     emit_byte(0x81);
1421     emit_byte(0xe0+d);
1422     emit_word(i);
1423     }
1424     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1425    
1426     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1427     {
1428     emit_byte(0x21);
1429     emit_byte(0xc0+8*s+d);
1430     }
1431     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1432    
1433     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1434     {
1435     emit_byte(0x66);
1436     emit_byte(0x21);
1437     emit_byte(0xc0+8*s+d);
1438     }
1439     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1440    
1441     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1442     {
1443     emit_byte(0x20);
1444     emit_byte(0xc0+8*s+d);
1445     }
1446     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1447    
1448     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1449     {
1450     if (optimize_imm8 && isbyte(i)) {
1451     emit_byte(0x83);
1452     emit_byte(0xc8+d);
1453     emit_byte(i);
1454     }
1455     else {
1456     emit_byte(0x81);
1457     emit_byte(0xc8+d);
1458     emit_long(i);
1459     }
1460     }
1461     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1462    
1463     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1464     {
1465     emit_byte(0x09);
1466     emit_byte(0xc0+8*s+d);
1467     }
1468     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1469    
1470     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1471     {
1472     emit_byte(0x66);
1473     emit_byte(0x09);
1474     emit_byte(0xc0+8*s+d);
1475     }
1476     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1477    
1478     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1479     {
1480     emit_byte(0x08);
1481     emit_byte(0xc0+8*s+d);
1482     }
1483     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1484    
1485     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1486     {
1487     emit_byte(0x11);
1488     emit_byte(0xc0+8*s+d);
1489     }
1490     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1491    
1492     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1493     {
1494     emit_byte(0x66);
1495     emit_byte(0x11);
1496     emit_byte(0xc0+8*s+d);
1497     }
1498     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1499    
1500     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1501     {
1502     emit_byte(0x10);
1503     emit_byte(0xc0+8*s+d);
1504     }
1505     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1506    
1507     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1508     {
1509     emit_byte(0x01);
1510     emit_byte(0xc0+8*s+d);
1511     }
1512     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1513    
1514     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1515     {
1516     emit_byte(0x66);
1517     emit_byte(0x01);
1518     emit_byte(0xc0+8*s+d);
1519     }
1520     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1521    
1522     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1523     {
1524     emit_byte(0x00);
1525     emit_byte(0xc0+8*s+d);
1526     }
1527     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1528    
1529     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1530     {
1531     if (isbyte(i)) {
1532     emit_byte(0x83);
1533     emit_byte(0xe8+d);
1534     emit_byte(i);
1535     }
1536     else {
1537     emit_byte(0x81);
1538     emit_byte(0xe8+d);
1539     emit_long(i);
1540     }
1541     }
1542     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1543    
1544     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1545     {
1546     emit_byte(0x80);
1547     emit_byte(0xe8+d);
1548     emit_byte(i);
1549     }
1550     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1551    
1552     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1553     {
1554     if (isbyte(i)) {
1555     emit_byte(0x83);
1556     emit_byte(0xc0+d);
1557     emit_byte(i);
1558     }
1559     else {
1560     emit_byte(0x81);
1561     emit_byte(0xc0+d);
1562     emit_long(i);
1563     }
1564     }
1565     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1566    
1567     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1568     {
1569     if (isbyte(i)) {
1570     emit_byte(0x66);
1571     emit_byte(0x83);
1572     emit_byte(0xc0+d);
1573     emit_byte(i);
1574     }
1575     else {
1576     emit_byte(0x66);
1577     emit_byte(0x81);
1578     emit_byte(0xc0+d);
1579     emit_word(i);
1580     }
1581     }
1582     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1583    
1584     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1585     {
1586     emit_byte(0x80);
1587     emit_byte(0xc0+d);
1588     emit_byte(i);
1589     }
1590     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1591    
1592     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1593     {
1594     emit_byte(0x19);
1595     emit_byte(0xc0+8*s+d);
1596     }
1597     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1598    
1599     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1600     {
1601     emit_byte(0x66);
1602     emit_byte(0x19);
1603     emit_byte(0xc0+8*s+d);
1604     }
1605     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1606    
1607     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1608     {
1609     emit_byte(0x18);
1610     emit_byte(0xc0+8*s+d);
1611     }
1612     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1613    
1614     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1615     {
1616     emit_byte(0x29);
1617     emit_byte(0xc0+8*s+d);
1618     }
1619     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1620    
1621     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1622     {
1623     emit_byte(0x66);
1624     emit_byte(0x29);
1625     emit_byte(0xc0+8*s+d);
1626     }
1627     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1628    
1629     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1630     {
1631     emit_byte(0x28);
1632     emit_byte(0xc0+8*s+d);
1633     }
1634     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1635    
1636     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1637     {
1638     emit_byte(0x39);
1639     emit_byte(0xc0+8*s+d);
1640     }
1641     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1642    
1643     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1644     {
1645     if (optimize_imm8 && isbyte(i)) {
1646     emit_byte(0x83);
1647     emit_byte(0xf8+r);
1648     emit_byte(i);
1649     }
1650     else {
1651     emit_byte(0x81);
1652     emit_byte(0xf8+r);
1653     emit_long(i);
1654     }
1655     }
1656     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1657    
1658     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1659     {
1660     emit_byte(0x66);
1661     emit_byte(0x39);
1662     emit_byte(0xc0+8*s+d);
1663     }
1664     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1665    
1666     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1667     {
1668     emit_byte(0x80);
1669     emit_byte(0xf8+d);
1670     emit_byte(i);
1671     }
1672     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1673    
1674     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1675     {
1676     emit_byte(0x38);
1677     emit_byte(0xc0+8*s+d);
1678     }
1679     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1680    
1681     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1682     {
1683     int fi;
1684    
1685     switch(factor) {
1686     case 1: fi=0; break;
1687     case 2: fi=1; break;
1688     case 4: fi=2; break;
1689     case 8: fi=3; break;
1690     default: abort();
1691     }
1692     emit_byte(0x39);
1693     emit_byte(0x04+8*d);
1694     emit_byte(5+8*index+0x40*fi);
1695     emit_long(offset);
1696     }
1697     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1698    
1699     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1700     {
1701     emit_byte(0x31);
1702     emit_byte(0xc0+8*s+d);
1703     }
1704     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1705    
1706     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1707     {
1708     emit_byte(0x66);
1709     emit_byte(0x31);
1710     emit_byte(0xc0+8*s+d);
1711     }
1712     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1713    
1714     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1715     {
1716     emit_byte(0x30);
1717     emit_byte(0xc0+8*s+d);
1718     }
1719     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1720    
1721     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1722     {
1723     if (optimize_imm8 && isbyte(s)) {
1724     emit_byte(0x83);
1725     emit_byte(0x2d);
1726     emit_long(d);
1727     emit_byte(s);
1728     }
1729     else {
1730     emit_byte(0x81);
1731     emit_byte(0x2d);
1732     emit_long(d);
1733     emit_long(s);
1734     }
1735     }
1736     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1737    
1738     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1739     {
1740     if (optimize_imm8 && isbyte(s)) {
1741     emit_byte(0x83);
1742     emit_byte(0x3d);
1743     emit_long(d);
1744     emit_byte(s);
1745     }
1746     else {
1747     emit_byte(0x81);
1748     emit_byte(0x3d);
1749     emit_long(d);
1750     emit_long(s);
1751     }
1752     }
1753     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1754    
1755     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1756     {
1757     emit_byte(0x87);
1758     emit_byte(0xc0+8*r1+r2);
1759     }
1760     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1761    
1762     /*************************************************************************
1763     * FIXME: string-related instructions *
1764     *************************************************************************/
1765    
1766     LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1767     {
1768     emit_byte(0xfc);
1769     }
1770     LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1771    
1772     LOWFUNC(WRITE,NONE,0,raw_std,(void))
1773     {
1774     emit_byte(0xfd);
1775     }
1776     LENDFUNC(WRITE,NONE,0,raw_std,(void))
1777    
1778     LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1779     {
1780     emit_byte(0xa4);
1781     }
1782     LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1783    
1784     LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1785     {
1786     emit_byte(0xa5);
1787     }
1788     LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1789    
1790     LOWFUNC(NONE,RMW,0,raw_rep,(void))
1791     {
1792     emit_byte(0xf3);
1793     }
1794     LENDFUNC(NONE,RMW,0,raw_rep,(void))
1795    
1796     LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1797     {
1798     raw_rep();
1799     raw_movs_b();
1800     }
1801     LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1802    
1803     LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1804     {
1805     raw_rep();
1806     raw_movs_l();
1807     }
1808     LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1809    
1810     /*************************************************************************
1811     * FIXME: mem access modes probably wrong *
1812     *************************************************************************/
1813    
1814     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1815     {
1816     emit_byte(0x9c);
1817     }
1818     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1819    
1820     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1821     {
1822     emit_byte(0x9d);
1823     }
1824     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1825    
1826     /*************************************************************************
1827     * Unoptimizable stuff --- jump *
1828     *************************************************************************/
1829    
1830     static __inline__ void raw_call_r(R4 r)
1831     {
1832     emit_byte(0xff);
1833     emit_byte(0xd0+r);
1834     }
1835    
1836     static __inline__ void raw_jmp_r(R4 r)
1837     {
1838     emit_byte(0xff);
1839     emit_byte(0xe0+r);
1840     }
1841    
1842     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1843     {
1844     int mu;
1845     switch(m) {
1846     case 1: mu=0; break;
1847     case 2: mu=1; break;
1848     case 4: mu=2; break;
1849     case 8: mu=3; break;
1850     default: abort();
1851     }
1852     emit_byte(0xff);
1853     emit_byte(0x24);
1854     emit_byte(0x05+8*r+0x40*mu);
1855     emit_long(base);
1856     }
1857    
1858     static __inline__ void raw_jmp_m(uae_u32 base)
1859     {
1860     emit_byte(0xff);
1861     emit_byte(0x25);
1862     emit_long(base);
1863     }
1864    
1865    
1866     static __inline__ void raw_call(uae_u32 t)
1867     {
1868     emit_byte(0xe8);
1869     emit_long(t-(uae_u32)target-4);
1870     }
1871    
1872     static __inline__ void raw_jmp(uae_u32 t)
1873     {
1874     emit_byte(0xe9);
1875     emit_long(t-(uae_u32)target-4);
1876     }
1877    
1878     static __inline__ void raw_jl(uae_u32 t)
1879     {
1880     emit_byte(0x0f);
1881     emit_byte(0x8c);
1882     emit_long(t-(uae_u32)target-4);
1883     }
1884    
1885     static __inline__ void raw_jz(uae_u32 t)
1886     {
1887     emit_byte(0x0f);
1888     emit_byte(0x84);
1889     emit_long(t-(uae_u32)target-4);
1890     }
1891    
1892     static __inline__ void raw_jnz(uae_u32 t)
1893     {
1894     emit_byte(0x0f);
1895     emit_byte(0x85);
1896     emit_long(t-(uae_u32)target-4);
1897     }
1898    
1899     static __inline__ void raw_jnz_l_oponly(void)
1900     {
1901     emit_byte(0x0f);
1902     emit_byte(0x85);
1903     }
1904    
1905     static __inline__ void raw_jcc_l_oponly(int cc)
1906     {
1907     emit_byte(0x0f);
1908     emit_byte(0x80+cc);
1909     }
1910    
1911     static __inline__ void raw_jnz_b_oponly(void)
1912     {
1913     emit_byte(0x75);
1914     }
1915    
1916     static __inline__ void raw_jz_b_oponly(void)
1917     {
1918     emit_byte(0x74);
1919     }
1920    
1921     static __inline__ void raw_jcc_b_oponly(int cc)
1922     {
1923     emit_byte(0x70+cc);
1924     }
1925    
1926     static __inline__ void raw_jmp_l_oponly(void)
1927     {
1928     emit_byte(0xe9);
1929     }
1930    
1931     static __inline__ void raw_jmp_b_oponly(void)
1932     {
1933     emit_byte(0xeb);
1934     }
1935    
1936     static __inline__ void raw_ret(void)
1937     {
1938     emit_byte(0xc3);
1939     }
1940    
1941     static __inline__ void raw_nop(void)
1942     {
1943     emit_byte(0x90);
1944     }
1945    
1946    
1947     /*************************************************************************
1948     * Flag handling, to and fro UAE flag register *
1949     *************************************************************************/
1950    
1951     #ifdef SAHF_SETO_PROFITABLE
1952    
1953     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
1954    
1955     static __inline__ void raw_flags_to_reg(int r)
1956     {
1957     raw_lahf(0); /* Most flags in AH */
1958     //raw_setcc(r,0); /* V flag in AL */
1959     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
1960    
1961     #if 1 /* Let's avoid those nasty partial register stalls */
1962     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
1963     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
1964     //live.state[FLAGTMP].status=CLEAN;
1965     live.state[FLAGTMP].status=INMEM;
1966     live.state[FLAGTMP].realreg=-1;
1967     /* We just "evicted" FLAGTMP. */
1968     if (live.nat[r].nholds!=1) {
1969     /* Huh? */
1970     abort();
1971     }
1972     live.nat[r].nholds=0;
1973     #endif
1974     }
1975    
1976     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
1977     static __inline__ void raw_reg_to_flags(int r)
1978     {
1979     raw_cmp_b_ri(r,-127); /* set V */
1980     raw_sahf(0);
1981     }
1982    
1983     #else
1984    
1985     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
1986     static __inline__ void raw_flags_to_reg(int r)
1987     {
1988     raw_pushfl();
1989     raw_pop_l_r(r);
1990     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
1991     // live.state[FLAGTMP].status=CLEAN;
1992     live.state[FLAGTMP].status=INMEM;
1993     live.state[FLAGTMP].realreg=-1;
1994     /* We just "evicted" FLAGTMP. */
1995     if (live.nat[r].nholds!=1) {
1996     /* Huh? */
1997     abort();
1998     }
1999     live.nat[r].nholds=0;
2000     }
2001    
2002     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2003     static __inline__ void raw_reg_to_flags(int r)
2004     {
2005     raw_push_l_r(r);
2006     raw_popfl();
2007     }
2008    
2009     #endif
2010    
2011     /* Apparently, there are enough instructions between flag store and
2012     flag reload to avoid the partial memory stall */
2013     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2014     {
2015     #if 1
2016     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2017     #else
2018     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2019     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2020     #endif
2021     }
2022    
2023     /* FLAGX is byte sized, and we *do* write it at that size */
2024     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2025     {
2026     if (live.nat[target].canbyte)
2027     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2028     else if (live.nat[target].canword)
2029     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2030     else
2031     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2032     }
2033    
2034    
2035     static __inline__ void raw_inc_sp(int off)
2036     {
2037     raw_add_l_ri(4,off);
2038     }
2039    
2040     /*************************************************************************
2041     * Handling mistaken direct memory access *
2042     *************************************************************************/
2043    
2044     // gb-- I don't need that part for JIT Basilisk II
2045     #if defined(NATMEM_OFFSET) && 0
2046     #include <asm/sigcontext.h>
2047     #include <signal.h>
2048    
2049     #define SIG_READ 1
2050     #define SIG_WRITE 2
2051    
2052     static int in_handler=0;
2053     static uae_u8 veccode[256];
2054    
2055     static void vec(int x, struct sigcontext sc)
2056     {
2057     uae_u8* i=(uae_u8*)sc.eip;
2058     uae_u32 addr=sc.cr2;
2059     int r=-1;
2060     int size=4;
2061     int dir=-1;
2062     int len=0;
2063     int j;
2064    
2065     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2066     if (!canbang)
2067     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2068     if (in_handler)
2069     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2070    
2071     if (canbang && i>=compiled_code && i<=current_compile_p) {
2072     if (*i==0x66) {
2073     i++;
2074     size=2;
2075     len++;
2076     }
2077    
2078     switch(i[0]) {
2079     case 0x8a:
2080     if ((i[1]&0xc0)==0x80) {
2081     r=(i[1]>>3)&7;
2082     dir=SIG_READ;
2083     size=1;
2084     len+=6;
2085     break;
2086     }
2087     break;
2088     case 0x88:
2089     if ((i[1]&0xc0)==0x80) {
2090     r=(i[1]>>3)&7;
2091     dir=SIG_WRITE;
2092     size=1;
2093     len+=6;
2094     break;
2095     }
2096     break;
2097     case 0x8b:
2098     if ((i[1]&0xc0)==0x80) {
2099     r=(i[1]>>3)&7;
2100     dir=SIG_READ;
2101     len+=6;
2102     break;
2103     }
2104     if ((i[1]&0xc0)==0x40) {
2105     r=(i[1]>>3)&7;
2106     dir=SIG_READ;
2107     len+=3;
2108     break;
2109     }
2110     break;
2111     case 0x89:
2112     if ((i[1]&0xc0)==0x80) {
2113     r=(i[1]>>3)&7;
2114     dir=SIG_WRITE;
2115     len+=6;
2116     break;
2117     }
2118     if ((i[1]&0xc0)==0x40) {
2119     r=(i[1]>>3)&7;
2120     dir=SIG_WRITE;
2121     len+=3;
2122     break;
2123     }
2124     break;
2125     }
2126     }
2127    
2128     if (r!=-1) {
2129     void* pr=NULL;
2130     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2131    
2132     switch(r) {
2133     case 0: pr=&(sc.eax); break;
2134     case 1: pr=&(sc.ecx); break;
2135     case 2: pr=&(sc.edx); break;
2136     case 3: pr=&(sc.ebx); break;
2137     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2138     case 5: pr=(size>1)?
2139     (void*)(&(sc.ebp)):
2140     (void*)(((uae_u8*)&(sc.ecx))+1); break;
2141     case 6: pr=(size>1)?
2142     (void*)(&(sc.esi)):
2143     (void*)(((uae_u8*)&(sc.edx))+1); break;
2144     case 7: pr=(size>1)?
2145     (void*)(&(sc.edi)):
2146     (void*)(((uae_u8*)&(sc.ebx))+1); break;
2147     default: abort();
2148     }
2149     if (pr) {
2150     blockinfo* bi;
2151    
2152     if (currprefs.comp_oldsegv) {
2153     addr-=NATMEM_OFFSET;
2154    
2155     if ((addr>=0x10000000 && addr<0x40000000) ||
2156     (addr>=0x50000000)) {
2157     write_log("Suspicious address in %x SEGV handler.\n",addr);
2158     }
2159     if (dir==SIG_READ) {
2160     switch(size) {
2161     case 1: *((uae_u8*)pr)=get_byte(addr); break;
2162     case 2: *((uae_u16*)pr)=get_word(addr); break;
2163     case 4: *((uae_u32*)pr)=get_long(addr); break;
2164     default: abort();
2165     }
2166     }
2167     else { /* write */
2168     switch(size) {
2169     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2170     case 2: put_word(addr,*((uae_u16*)pr)); break;
2171     case 4: put_long(addr,*((uae_u32*)pr)); break;
2172     default: abort();
2173     }
2174     }
2175     write_log("Handled one access!\n");
2176     fflush(stdout);
2177     segvcount++;
2178     sc.eip+=len;
2179     }
2180     else {
2181     void* tmp=target;
2182     int i;
2183     uae_u8 vecbuf[5];
2184    
2185     addr-=NATMEM_OFFSET;
2186    
2187     if ((addr>=0x10000000 && addr<0x40000000) ||
2188     (addr>=0x50000000)) {
2189     write_log("Suspicious address in %x SEGV handler.\n",addr);
2190     }
2191    
2192     target=(uae_u8*)sc.eip;
2193     for (i=0;i<5;i++)
2194     vecbuf[i]=target[i];
2195     emit_byte(0xe9);
2196     emit_long((uae_u32)veccode-(uae_u32)target-4);
2197     write_log("Create jump to %p\n",veccode);
2198    
2199     write_log("Handled one access!\n");
2200     fflush(stdout);
2201     segvcount++;
2202    
2203     target=veccode;
2204    
2205     if (dir==SIG_READ) {
2206     switch(size) {
2207     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2208     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2209     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2210     default: abort();
2211     }
2212     }
2213     else { /* write */
2214     switch(size) {
2215     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2216     case 2: put_word(addr,*((uae_u16*)pr)); break;
2217     case 4: put_long(addr,*((uae_u32*)pr)); break;
2218     default: abort();
2219     }
2220     }
2221     for (i=0;i<5;i++)
2222     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2223     raw_mov_l_mi((uae_u32)&in_handler,0);
2224     emit_byte(0xe9);
2225     emit_long(sc.eip+len-(uae_u32)target-4);
2226     in_handler=1;
2227     target=tmp;
2228     }
2229     bi=active;
2230     while (bi) {
2231     if (bi->handler &&
2232     (uae_u8*)bi->direct_handler<=i &&
2233     (uae_u8*)bi->nexthandler>i) {
2234     write_log("deleted trigger (%p<%p<%p) %p\n",
2235     bi->handler,
2236     i,
2237     bi->nexthandler,
2238     bi->pc_p);
2239     invalidate_block(bi);
2240     raise_in_cl_list(bi);
2241     set_special(0);
2242     return;
2243     }
2244     bi=bi->next;
2245     }
2246     /* Not found in the active list. Might be a rom routine that
2247     is in the dormant list */
2248     bi=dormant;
2249     while (bi) {
2250     if (bi->handler &&
2251     (uae_u8*)bi->direct_handler<=i &&
2252     (uae_u8*)bi->nexthandler>i) {
2253     write_log("deleted trigger (%p<%p<%p) %p\n",
2254     bi->handler,
2255     i,
2256     bi->nexthandler,
2257     bi->pc_p);
2258     invalidate_block(bi);
2259     raise_in_cl_list(bi);
2260     set_special(0);
2261     return;
2262     }
2263     bi=bi->next;
2264     }
2265     write_log("Huh? Could not find trigger!\n");
2266     return;
2267     }
2268     }
2269     write_log("Can't handle access!\n");
2270     for (j=0;j<10;j++) {
2271     write_log("instruction byte %2d is %02x\n",j,i[j]);
2272     }
2273     write_log("Please send the above info (starting at \"fault address\") to\n"
2274     "bmeyer@csse.monash.edu.au\n"
2275     "This shouldn't happen ;-)\n");
2276     fflush(stdout);
2277     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2278     }
2279     #endif
2280    
2281    
2282     /*************************************************************************
2283     * Checking for CPU features *
2284     *************************************************************************/
2285    
2286     typedef struct {
2287     uae_u32 eax;
2288     uae_u32 ecx;
2289     uae_u32 edx;
2290     uae_u32 ebx;
2291     } x86_regs;
2292    
2293    
2294     /* This could be so much easier if it could make assumptions about the
2295     compiler... */
2296    
2297     static uae_u8 cpuid_space[256];
2298     static uae_u32 cpuid_ptr;
2299     static uae_u32 cpuid_level;
2300    
2301     static x86_regs cpuid(uae_u32 level)
2302     {
2303     x86_regs answer;
2304     uae_u8* tmp=get_target();
2305    
2306     cpuid_ptr=(uae_u32)&answer;
2307     cpuid_level=level;
2308    
2309     set_target(cpuid_space);
2310     raw_push_l_r(0); /* eax */
2311     raw_push_l_r(1); /* ecx */
2312     raw_push_l_r(2); /* edx */
2313     raw_push_l_r(3); /* ebx */
2314     raw_push_l_r(7); /* edi */
2315     raw_mov_l_rm(0,(uae_u32)&cpuid_level);
2316     raw_cpuid(0);
2317     raw_mov_l_rm(7,(uae_u32)&cpuid_ptr);
2318     raw_mov_l_Rr(7,0,0);
2319     raw_mov_l_Rr(7,1,4);
2320     raw_mov_l_Rr(7,2,8);
2321     raw_mov_l_Rr(7,3,12);
2322     raw_pop_l_r(7);
2323     raw_pop_l_r(3);
2324     raw_pop_l_r(2);
2325     raw_pop_l_r(1);
2326     raw_pop_l_r(0);
2327     raw_ret();
2328     set_target(tmp);
2329    
2330     ((cpuop_func*)cpuid_space)(0);
2331     return answer;
2332     }
2333    
2334     static void raw_init_cpu(void)
2335     {
2336     x86_regs x;
2337     uae_u32 maxlev;
2338    
2339     x=cpuid(0);
2340     maxlev=x.eax;
2341     write_log("Max CPUID level=%d Processor is %c%c%c%c%c%c%c%c%c%c%c%c\n",
2342     maxlev,
2343     x.ebx,
2344     x.ebx>>8,
2345     x.ebx>>16,
2346     x.ebx>>24,
2347     x.edx,
2348     x.edx>>8,
2349     x.edx>>16,
2350     x.edx>>24,
2351     x.ecx,
2352     x.ecx>>8,
2353     x.ecx>>16,
2354     x.ecx>>24
2355     );
2356     have_rat_stall=(x.ecx==0x6c65746e);
2357    
2358     if (maxlev>=1) {
2359     x=cpuid(1);
2360     if (x.edx&(1<<15))
2361     have_cmov=1;
2362     }
2363     if (!have_cmov)
2364     have_rat_stall=0;
2365     #if 0 /* For testing of non-cmov code! */
2366     have_cmov=0;
2367     #endif
2368     #if 1 /* It appears that partial register writes are a bad idea even on
2369     AMD K7 cores, even though they are not supposed to have the
2370     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2371     if (have_cmov)
2372     have_rat_stall=1;
2373     #endif
2374     }
2375    
2376    
2377     /*************************************************************************
2378     * FPU stuff *
2379     *************************************************************************/
2380    
2381    
2382     static __inline__ void raw_fp_init(void)
2383     {
2384     int i;
2385    
2386     for (i=0;i<N_FREGS;i++)
2387     live.spos[i]=-2;
2388     live.tos=-1; /* Stack is empty */
2389     }
2390    
2391     static __inline__ void raw_fp_cleanup_drop(void)
2392     {
2393     #if 0
2394     /* using FINIT instead of popping all the entries.
2395     Seems to have side effects --- there is display corruption in
2396     Quake when this is used */
2397     if (live.tos>1) {
2398     emit_byte(0x9b);
2399     emit_byte(0xdb);
2400     emit_byte(0xe3);
2401     live.tos=-1;
2402     }
2403     #endif
2404     while (live.tos>=1) {
2405     emit_byte(0xde);
2406     emit_byte(0xd9);
2407     live.tos-=2;
2408     }
2409     while (live.tos>=0) {
2410     emit_byte(0xdd);
2411     emit_byte(0xd8);
2412     live.tos--;
2413     }
2414     raw_fp_init();
2415     }
2416    
2417     static __inline__ void make_tos(int r)
2418     {
2419     int p,q;
2420    
2421     if (live.spos[r]<0) { /* Register not yet on stack */
2422     emit_byte(0xd9);
2423     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2424     live.tos++;
2425     live.spos[r]=live.tos;
2426     live.onstack[live.tos]=r;
2427     return;
2428     }
2429     /* Register is on stack */
2430     if (live.tos==live.spos[r])
2431     return;
2432     p=live.spos[r];
2433     q=live.onstack[live.tos];
2434    
2435     emit_byte(0xd9);
2436     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2437     live.onstack[live.tos]=r;
2438     live.spos[r]=live.tos;
2439     live.onstack[p]=q;
2440     live.spos[q]=p;
2441     }
2442    
2443     static __inline__ void make_tos2(int r, int r2)
2444     {
2445     int q;
2446    
2447     make_tos(r2); /* Put the reg that's supposed to end up in position2
2448     on top */
2449    
2450     if (live.spos[r]<0) { /* Register not yet on stack */
2451     make_tos(r); /* This will extend the stack */
2452     return;
2453     }
2454     /* Register is on stack */
2455     emit_byte(0xd9);
2456     emit_byte(0xc9); /* Move r2 into position 2 */
2457    
2458     q=live.onstack[live.tos-1];
2459     live.onstack[live.tos]=q;
2460     live.spos[q]=live.tos;
2461     live.onstack[live.tos-1]=r2;
2462     live.spos[r2]=live.tos-1;
2463    
2464     make_tos(r); /* And r into 1 */
2465     }
2466    
2467     static __inline__ int stackpos(int r)
2468     {
2469     if (live.spos[r]<0)
2470     abort();
2471     if (live.tos<live.spos[r]) {
2472     printf("Looking for spos for fnreg %d\n",r);
2473     abort();
2474     }
2475     return live.tos-live.spos[r];
2476     }
2477    
2478     static __inline__ void usereg(int r)
2479     {
2480     if (live.spos[r]<0)
2481     make_tos(r);
2482     }
2483    
2484     /* This is called with one FP value in a reg *above* tos, which it will
2485     pop off the stack if necessary */
2486     static __inline__ void tos_make(int r)
2487     {
2488     if (live.spos[r]<0) {
2489     live.tos++;
2490     live.spos[r]=live.tos;
2491     live.onstack[live.tos]=r;
2492     return;
2493     }
2494     emit_byte(0xdd);
2495     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2496     and pop it*/
2497     }
2498    
2499    
2500     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2501     {
2502     make_tos(r);
2503     emit_byte(0xdd);
2504     emit_byte(0x15);
2505     emit_long(m);
2506     }
2507     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2508    
2509     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2510     {
2511     make_tos(r);
2512     emit_byte(0xdd);
2513     emit_byte(0x1d);
2514     emit_long(m);
2515     live.onstack[live.tos]=-1;
2516     live.tos--;
2517     live.spos[r]=-2;
2518     }
2519     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2520    
2521     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2522     {
2523     emit_byte(0xdd);
2524     emit_byte(0x05);
2525     emit_long(m);
2526     tos_make(r);
2527     }
2528     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2529    
2530     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2531     {
2532     emit_byte(0xdb);
2533     emit_byte(0x05);
2534     emit_long(m);
2535     tos_make(r);
2536     }
2537     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2538    
2539     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2540     {
2541     make_tos(r);
2542     emit_byte(0xdb);
2543     emit_byte(0x15);
2544     emit_long(m);
2545     }
2546     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2547    
2548     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2549     {
2550     emit_byte(0xd9);
2551     emit_byte(0x05);
2552     emit_long(m);
2553     tos_make(r);
2554     }
2555     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2556    
2557     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2558     {
2559     make_tos(r);
2560     emit_byte(0xd9);
2561     emit_byte(0x15);
2562     emit_long(m);
2563     }
2564     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2565    
2566     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2567     {
2568     int rs;
2569    
2570     /* Stupid x87 can't write a long double to mem without popping the
2571     stack! */
2572     usereg(r);
2573     rs=stackpos(r);
2574     emit_byte(0xd9); /* Get a copy to the top of stack */
2575     emit_byte(0xc0+rs);
2576    
2577     emit_byte(0xdb); /* store and pop it */
2578     emit_byte(0x3d);
2579     emit_long(m);
2580     }
2581     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2582    
2583     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2584     {
2585     int rs;
2586    
2587     make_tos(r);
2588     emit_byte(0xdb); /* store and pop it */
2589     emit_byte(0x3d);
2590     emit_long(m);
2591     live.onstack[live.tos]=-1;
2592     live.tos--;
2593     live.spos[r]=-2;
2594     }
2595     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2596    
2597     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2598     {
2599     emit_byte(0xdb);
2600     emit_byte(0x2d);
2601     emit_long(m);
2602     tos_make(r);
2603     }
2604     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2605    
2606     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2607     {
2608     emit_byte(0xd9);
2609     emit_byte(0xeb);
2610     tos_make(r);
2611     }
2612     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2613    
2614     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2615     {
2616     emit_byte(0xd9);
2617     emit_byte(0xec);
2618     tos_make(r);
2619     }
2620     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2621    
2622     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2623     {
2624     emit_byte(0xd9);
2625     emit_byte(0xea);
2626     tos_make(r);
2627     }
2628     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2629    
2630     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2631     {
2632     emit_byte(0xd9);
2633     emit_byte(0xed);
2634     tos_make(r);
2635     }
2636     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2637    
2638     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2639     {
2640     emit_byte(0xd9);
2641     emit_byte(0xe8);
2642     tos_make(r);
2643     }
2644     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2645    
2646     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2647     {
2648     emit_byte(0xd9);
2649     emit_byte(0xee);
2650     tos_make(r);
2651     }
2652     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2653    
2654     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2655     {
2656     int ds;
2657    
2658     usereg(s);
2659     ds=stackpos(s);
2660     if (ds==0 && live.spos[d]>=0) {
2661     /* source is on top of stack, and we already have the dest */
2662     int dd=stackpos(d);
2663     emit_byte(0xdd);
2664     emit_byte(0xd0+dd);
2665     }
2666     else {
2667     emit_byte(0xd9);
2668     emit_byte(0xc0+ds); /* duplicate source on tos */
2669     tos_make(d); /* store to destination, pop if necessary */
2670     }
2671     }
2672     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2673    
2674     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2675     {
2676     emit_byte(0xd9);
2677     emit_byte(0xa8+index);
2678     emit_long(base);
2679     }
2680     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2681    
2682    
2683     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2684     {
2685     int ds;
2686    
2687     if (d!=s) {
2688     usereg(s);
2689     ds=stackpos(s);
2690     emit_byte(0xd9);
2691     emit_byte(0xc0+ds); /* duplicate source */
2692     emit_byte(0xd9);
2693     emit_byte(0xfa); /* take square root */
2694     tos_make(d); /* store to destination */
2695     }
2696     else {
2697     make_tos(d);
2698     emit_byte(0xd9);
2699     emit_byte(0xfa); /* take square root */
2700     }
2701     }
2702     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2703    
2704     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2705     {
2706     int ds;
2707    
2708     if (d!=s) {
2709     usereg(s);
2710     ds=stackpos(s);
2711     emit_byte(0xd9);
2712     emit_byte(0xc0+ds); /* duplicate source */
2713     emit_byte(0xd9);
2714     emit_byte(0xe1); /* take fabs */
2715     tos_make(d); /* store to destination */
2716     }
2717     else {
2718     make_tos(d);
2719     emit_byte(0xd9);
2720     emit_byte(0xe1); /* take fabs */
2721     }
2722     }
2723     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2724    
2725     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2726     {
2727     int ds;
2728    
2729     if (d!=s) {
2730     usereg(s);
2731     ds=stackpos(s);
2732     emit_byte(0xd9);
2733     emit_byte(0xc0+ds); /* duplicate source */
2734     emit_byte(0xd9);
2735     emit_byte(0xfc); /* take frndint */
2736     tos_make(d); /* store to destination */
2737     }
2738     else {
2739     make_tos(d);
2740     emit_byte(0xd9);
2741     emit_byte(0xfc); /* take frndint */
2742     }
2743     }
2744     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2745    
2746     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2747     {
2748     int ds;
2749    
2750     if (d!=s) {
2751     usereg(s);
2752     ds=stackpos(s);
2753     emit_byte(0xd9);
2754     emit_byte(0xc0+ds); /* duplicate source */
2755     emit_byte(0xd9);
2756     emit_byte(0xff); /* take cos */
2757     tos_make(d); /* store to destination */
2758     }
2759     else {
2760     make_tos(d);
2761     emit_byte(0xd9);
2762     emit_byte(0xff); /* take cos */
2763     }
2764     }
2765     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2766    
2767     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
2768     {
2769     int ds;
2770    
2771     if (d!=s) {
2772     usereg(s);
2773     ds=stackpos(s);
2774     emit_byte(0xd9);
2775     emit_byte(0xc0+ds); /* duplicate source */
2776     emit_byte(0xd9);
2777     emit_byte(0xfe); /* take sin */
2778     tos_make(d); /* store to destination */
2779     }
2780     else {
2781     make_tos(d);
2782     emit_byte(0xd9);
2783     emit_byte(0xfe); /* take sin */
2784     }
2785     }
2786     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
2787    
2788     double one=1;
2789     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
2790     {
2791     int ds;
2792    
2793     usereg(s);
2794     ds=stackpos(s);
2795     emit_byte(0xd9);
2796     emit_byte(0xc0+ds); /* duplicate source */
2797    
2798     emit_byte(0xd9);
2799     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
2800     emit_byte(0xd9);
2801     emit_byte(0xfc); /* rndint */
2802     emit_byte(0xd9);
2803     emit_byte(0xc9); /* swap top two elements */
2804     emit_byte(0xd8);
2805     emit_byte(0xe1); /* subtract rounded from original */
2806     emit_byte(0xd9);
2807     emit_byte(0xf0); /* f2xm1 */
2808     emit_byte(0xdc);
2809     emit_byte(0x05);
2810     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
2811     emit_byte(0xd9);
2812     emit_byte(0xfd); /* and scale it */
2813     emit_byte(0xdd);
2814     emit_byte(0xd9); /* take he rounded value off */
2815     tos_make(d); /* store to destination */
2816     }
2817     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
2818    
2819     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
2820     {
2821     int ds;
2822    
2823     usereg(s);
2824     ds=stackpos(s);
2825     emit_byte(0xd9);
2826     emit_byte(0xc0+ds); /* duplicate source */
2827     emit_byte(0xd9);
2828     emit_byte(0xea); /* fldl2e */
2829     emit_byte(0xde);
2830     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
2831    
2832     emit_byte(0xd9);
2833     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
2834     emit_byte(0xd9);
2835     emit_byte(0xfc); /* rndint */
2836     emit_byte(0xd9);
2837     emit_byte(0xc9); /* swap top two elements */
2838     emit_byte(0xd8);
2839     emit_byte(0xe1); /* subtract rounded from original */
2840     emit_byte(0xd9);
2841     emit_byte(0xf0); /* f2xm1 */
2842     emit_byte(0xdc);
2843     emit_byte(0x05);
2844     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
2845     emit_byte(0xd9);
2846     emit_byte(0xfd); /* and scale it */
2847     emit_byte(0xdd);
2848     emit_byte(0xd9); /* take he rounded value off */
2849     tos_make(d); /* store to destination */
2850     }
2851     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
2852    
2853     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
2854     {
2855     int ds;
2856    
2857     usereg(s);
2858     ds=stackpos(s);
2859     emit_byte(0xd9);
2860     emit_byte(0xc0+ds); /* duplicate source */
2861     emit_byte(0xd9);
2862     emit_byte(0xe8); /* push '1' */
2863     emit_byte(0xd9);
2864     emit_byte(0xc9); /* swap top two */
2865     emit_byte(0xd9);
2866     emit_byte(0xf1); /* take 1*log2(x) */
2867     tos_make(d); /* store to destination */
2868     }
2869     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
2870    
2871    
2872     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
2873     {
2874     int ds;
2875    
2876     if (d!=s) {
2877     usereg(s);
2878     ds=stackpos(s);
2879     emit_byte(0xd9);
2880     emit_byte(0xc0+ds); /* duplicate source */
2881     emit_byte(0xd9);
2882     emit_byte(0xe0); /* take fchs */
2883     tos_make(d); /* store to destination */
2884     }
2885     else {
2886     make_tos(d);
2887     emit_byte(0xd9);
2888     emit_byte(0xe0); /* take fchs */
2889     }
2890     }
2891     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
2892    
2893     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
2894     {
2895     int ds;
2896    
2897     usereg(s);
2898     usereg(d);
2899    
2900     if (live.spos[s]==live.tos) {
2901     /* Source is on top of stack */
2902     ds=stackpos(d);
2903     emit_byte(0xdc);
2904     emit_byte(0xc0+ds); /* add source to dest*/
2905     }
2906     else {
2907     make_tos(d);
2908     ds=stackpos(s);
2909    
2910     emit_byte(0xd8);
2911     emit_byte(0xc0+ds); /* add source to dest*/
2912     }
2913     }
2914     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
2915    
2916     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
2917     {
2918     int ds;
2919    
2920     usereg(s);
2921     usereg(d);
2922    
2923     if (live.spos[s]==live.tos) {
2924     /* Source is on top of stack */
2925     ds=stackpos(d);
2926     emit_byte(0xdc);
2927     emit_byte(0xe8+ds); /* sub source from dest*/
2928     }
2929     else {
2930     make_tos(d);
2931     ds=stackpos(s);
2932    
2933     emit_byte(0xd8);
2934     emit_byte(0xe0+ds); /* sub src from dest */
2935     }
2936     }
2937     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
2938    
2939     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
2940     {
2941     int ds;
2942    
2943     usereg(s);
2944     usereg(d);
2945    
2946     make_tos(d);
2947     ds=stackpos(s);
2948    
2949     emit_byte(0xdd);
2950     emit_byte(0xe0+ds); /* cmp dest with source*/
2951     }
2952     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
2953    
2954     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
2955     {
2956     int ds;
2957    
2958     usereg(s);
2959     usereg(d);
2960    
2961     if (live.spos[s]==live.tos) {
2962     /* Source is on top of stack */
2963     ds=stackpos(d);
2964     emit_byte(0xdc);
2965     emit_byte(0xc8+ds); /* mul dest by source*/
2966     }
2967     else {
2968     make_tos(d);
2969     ds=stackpos(s);
2970    
2971     emit_byte(0xd8);
2972     emit_byte(0xc8+ds); /* mul dest by source*/
2973     }
2974     }
2975     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
2976    
2977     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
2978     {
2979     int ds;
2980    
2981     usereg(s);
2982     usereg(d);
2983    
2984     if (live.spos[s]==live.tos) {
2985     /* Source is on top of stack */
2986     ds=stackpos(d);
2987     emit_byte(0xdc);
2988     emit_byte(0xf8+ds); /* div dest by source */
2989     }
2990     else {
2991     make_tos(d);
2992     ds=stackpos(s);
2993    
2994     emit_byte(0xd8);
2995     emit_byte(0xf0+ds); /* div dest by source*/
2996     }
2997     }
2998     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
2999    
3000     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3001     {
3002     int ds;
3003    
3004     usereg(s);
3005     usereg(d);
3006    
3007     make_tos2(d,s);
3008     ds=stackpos(s);
3009    
3010     if (ds!=1) {
3011     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3012     abort();
3013     }
3014     emit_byte(0xd9);
3015     emit_byte(0xf8); /* take rem from dest by source */
3016     }
3017     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3018    
3019     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3020     {
3021     int ds;
3022    
3023     usereg(s);
3024     usereg(d);
3025    
3026     make_tos2(d,s);
3027     ds=stackpos(s);
3028    
3029     if (ds!=1) {
3030     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3031     abort();
3032     }
3033     emit_byte(0xd9);
3034     emit_byte(0xf5); /* take rem1 from dest by source */
3035     }
3036     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3037    
3038    
3039     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3040     {
3041     make_tos(r);
3042     emit_byte(0xd9); /* ftst */
3043     emit_byte(0xe4);
3044     }
3045     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3046    
3047     /* %eax register is clobbered if target processor doesn't support fucomi */
3048     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3049     #define FFLAG_NREG EAX_INDEX
3050    
3051     static __inline__ void raw_fflags_into_flags(int r)
3052     {
3053     int p;
3054    
3055     usereg(r);
3056     p=stackpos(r);
3057    
3058     emit_byte(0xd9);
3059     emit_byte(0xee); /* Push 0 */
3060     emit_byte(0xd9);
3061     emit_byte(0xc9+p); /* swap top two around */
3062     if (have_cmov) {
3063     // gb-- fucomi is for P6 cores only, not K6-2 then...
3064     emit_byte(0xdb);
3065     emit_byte(0xe9+p); /* fucomi them */
3066     }
3067     else {
3068     emit_byte(0xdd);
3069     emit_byte(0xe1+p); /* fucom them */
3070     emit_byte(0x9b);
3071     emit_byte(0xdf);
3072     emit_byte(0xe0); /* fstsw ax */
3073     raw_sahf(0); /* sahf */
3074     }
3075     emit_byte(0xdd);
3076     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3077     }