ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.2
Committed: 2002-09-18T15:56:17Z (22 years ago) by gbeauche
Branch: MAIN
Changes since 1.1: +75 -15 lines
Log Message:
Optimize runtime assembler with shorter equivalents when the accumulator
(%eax) is referenced along with immediates.

File Contents

# User Rev Content
1 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
2     only target, and it's easier this way... */
3    
4     /*************************************************************************
5     * Some basic information about the the target CPU *
6     *************************************************************************/
7    
8     #define EAX_INDEX 0
9     #define ECX_INDEX 1
10     #define EDX_INDEX 2
11     #define EBX_INDEX 3
12     #define ESP_INDEX 4
13     #define EBP_INDEX 5
14     #define ESI_INDEX 6
15     #define EDI_INDEX 7
16    
17     /* The register in which subroutines return an integer return value */
18     #define REG_RESULT 0
19    
20     /* The registers subroutines take their first and second argument in */
21     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
22     /* Handle the _fastcall parameters of ECX and EDX */
23     #define REG_PAR1 1
24     #define REG_PAR2 2
25     #else
26     #define REG_PAR1 0
27     #define REG_PAR2 2
28     #endif
29    
30     /* Three registers that are not used for any of the above */
31     #define REG_NOPAR1 6
32     #define REG_NOPAR2 5
33     #define REG_NOPAR3 3
34    
35     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
36     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
37     #define REG_PC_TMP 0
38     #else
39     #define REG_PC_TMP 1 /* Another register that is not the above */
40     #endif
41    
42     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
43     -1 if any reg will do */
44     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
45     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
46    
47     uae_s8 always_used[]={4,-1};
48     uae_s8 can_byte[]={0,1,2,3,-1};
49     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
50    
51     /* cpuopti mutate instruction handlers to assume registers are saved
52     by the caller */
53     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
54    
55     /* This *should* be the same as call_saved. But:
56     - We might not really know which registers are saved, and which aren't,
57     so we need to preserve some, but don't want to rely on everyone else
58     also saving those registers
59     - Special registers (such like the stack pointer) should not be "preserved"
60     by pushing, even though they are "saved" across function calls
61     */
62     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
63    
64     /* Whether classes of instructions do or don't clobber the native flags */
65     #define CLOBBER_MOV
66     #define CLOBBER_LEA
67     #define CLOBBER_CMOV
68     #define CLOBBER_POP
69     #define CLOBBER_PUSH
70     #define CLOBBER_SUB clobber_flags()
71     #define CLOBBER_SBB clobber_flags()
72     #define CLOBBER_CMP clobber_flags()
73     #define CLOBBER_ADD clobber_flags()
74     #define CLOBBER_ADC clobber_flags()
75     #define CLOBBER_AND clobber_flags()
76     #define CLOBBER_OR clobber_flags()
77     #define CLOBBER_XOR clobber_flags()
78    
79     #define CLOBBER_ROL clobber_flags()
80     #define CLOBBER_ROR clobber_flags()
81     #define CLOBBER_SHLL clobber_flags()
82     #define CLOBBER_SHRL clobber_flags()
83     #define CLOBBER_SHRA clobber_flags()
84     #define CLOBBER_TEST clobber_flags()
85     #define CLOBBER_CL16
86     #define CLOBBER_CL8
87     #define CLOBBER_SE16
88     #define CLOBBER_SE8
89     #define CLOBBER_ZE16
90     #define CLOBBER_ZE8
91     #define CLOBBER_SW16 clobber_flags()
92     #define CLOBBER_SW32
93     #define CLOBBER_SETCC
94     #define CLOBBER_MUL clobber_flags()
95     #define CLOBBER_BT clobber_flags()
96     #define CLOBBER_BSF clobber_flags()
97    
98 gbeauche 1.2 const bool optimize_accum = true;
99 gbeauche 1.1 const bool optimize_imm8 = true;
100     const bool optimize_shift_once = true;
101    
102     /*************************************************************************
103     * Actual encoding of the instructions on the target CPU *
104     *************************************************************************/
105    
106 gbeauche 1.2 static __inline__ int isaccum(int r)
107     {
108     return (r == EAX_INDEX);
109     }
110    
111 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
112     {
113     return (x>=-128 && x<=127);
114     }
115    
116     static __inline__ int isword(uae_s32 x)
117     {
118     return (x>=-32768 && x<=32767);
119     }
120    
121     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
122     {
123     emit_byte(0x50+r);
124     }
125     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
126    
127     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
128     {
129     emit_byte(0x58+r);
130     }
131     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
132    
133     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
134     {
135     emit_byte(0x0f);
136     emit_byte(0xba);
137     emit_byte(0xe0+r);
138     emit_byte(i);
139     }
140     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
141    
142     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
143     {
144     emit_byte(0x0f);
145     emit_byte(0xa3);
146     emit_byte(0xc0+8*b+r);
147     }
148     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
149    
150     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
151     {
152     emit_byte(0x0f);
153     emit_byte(0xba);
154     emit_byte(0xf8+r);
155     emit_byte(i);
156     }
157     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
158    
159     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
160     {
161     emit_byte(0x0f);
162     emit_byte(0xbb);
163     emit_byte(0xc0+8*b+r);
164     }
165     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
166    
167    
168     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
169     {
170     emit_byte(0x0f);
171     emit_byte(0xba);
172     emit_byte(0xf0+r);
173     emit_byte(i);
174     }
175     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
176    
177     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
178     {
179     emit_byte(0x0f);
180     emit_byte(0xb3);
181     emit_byte(0xc0+8*b+r);
182     }
183     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
184    
185     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
186     {
187     emit_byte(0x0f);
188     emit_byte(0xba);
189     emit_byte(0xe8+r);
190     emit_byte(i);
191     }
192     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
193    
194     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
195     {
196     emit_byte(0x0f);
197     emit_byte(0xab);
198     emit_byte(0xc0+8*b+r);
199     }
200     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
201    
202     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
203     {
204     emit_byte(0x66);
205     if (isbyte(i)) {
206     emit_byte(0x83);
207     emit_byte(0xe8+d);
208     emit_byte(i);
209     }
210     else {
211 gbeauche 1.2 if (optimize_accum && isaccum(d))
212     emit_byte(0x2d);
213     else {
214 gbeauche 1.1 emit_byte(0x81);
215     emit_byte(0xe8+d);
216 gbeauche 1.2 }
217 gbeauche 1.1 emit_word(i);
218     }
219     }
220     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
221    
222    
223     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
224     {
225     emit_byte(0x8b);
226     emit_byte(0x05+8*d);
227     emit_long(s);
228     }
229     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
230    
231     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
232     {
233     emit_byte(0xc7);
234     emit_byte(0x05);
235     emit_long(d);
236     emit_long(s);
237     }
238     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
239    
240     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
241     {
242     emit_byte(0x66);
243     emit_byte(0xc7);
244     emit_byte(0x05);
245     emit_long(d);
246     emit_word(s);
247     }
248     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
249    
250     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
251     {
252     emit_byte(0xc6);
253     emit_byte(0x05);
254     emit_long(d);
255     emit_byte(s);
256     }
257     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
258    
259     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
260     {
261     if (optimize_shift_once && (i == 1)) {
262     emit_byte(0xd0);
263     emit_byte(0x05);
264     emit_long(d);
265     }
266     else {
267     emit_byte(0xc0);
268     emit_byte(0x05);
269     emit_long(d);
270     emit_byte(i);
271     }
272     }
273     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
274    
275     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
276     {
277     if (optimize_shift_once && (i == 1)) {
278     emit_byte(0xd0);
279     emit_byte(0xc0+r);
280     }
281     else {
282     emit_byte(0xc0);
283     emit_byte(0xc0+r);
284     emit_byte(i);
285     }
286     }
287     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
288    
289     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
290     {
291     emit_byte(0x66);
292     emit_byte(0xc1);
293     emit_byte(0xc0+r);
294     emit_byte(i);
295     }
296     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
297    
298     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
299     {
300     if (optimize_shift_once && (i == 1)) {
301     emit_byte(0xd1);
302     emit_byte(0xc0+r);
303     }
304     else {
305     emit_byte(0xc1);
306     emit_byte(0xc0+r);
307     emit_byte(i);
308     }
309     }
310     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
311    
312     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
313     {
314     emit_byte(0xd3);
315     emit_byte(0xc0+d);
316     }
317     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
318    
319     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
320     {
321     emit_byte(0x66);
322     emit_byte(0xd3);
323     emit_byte(0xc0+d);
324     }
325     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
326    
327     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
328     {
329     emit_byte(0xd2);
330     emit_byte(0xc0+d);
331     }
332     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
333    
334     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
335     {
336     emit_byte(0xd3);
337     emit_byte(0xe0+d);
338     }
339     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
340    
341     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
342     {
343     emit_byte(0x66);
344     emit_byte(0xd3);
345     emit_byte(0xe0+d);
346     }
347     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
348    
349     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
350     {
351     emit_byte(0xd2);
352     emit_byte(0xe0+d);
353     }
354     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
355    
356     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
357     {
358     if (optimize_shift_once && (i == 1)) {
359     emit_byte(0xd0);
360     emit_byte(0xc8+r);
361     }
362     else {
363     emit_byte(0xc0);
364     emit_byte(0xc8+r);
365     emit_byte(i);
366     }
367     }
368     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
369    
370     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
371     {
372     emit_byte(0x66);
373     emit_byte(0xc1);
374     emit_byte(0xc8+r);
375     emit_byte(i);
376     }
377     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
378    
379     // gb-- used for making an fpcr value in compemu_fpp.cpp
380     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
381     {
382     emit_byte(0x0b);
383     emit_byte(0x05+8*d);
384     emit_long(s);
385     }
386     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
387    
388     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
389     {
390     if (optimize_shift_once && (i == 1)) {
391     emit_byte(0xd1);
392     emit_byte(0xc8+r);
393     }
394     else {
395     emit_byte(0xc1);
396     emit_byte(0xc8+r);
397     emit_byte(i);
398     }
399     }
400     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
401    
402     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
403     {
404     emit_byte(0xd3);
405     emit_byte(0xc8+d);
406     }
407     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
408    
409     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
410     {
411     emit_byte(0x66);
412     emit_byte(0xd3);
413     emit_byte(0xc8+d);
414     }
415     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
416    
417     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
418     {
419     emit_byte(0xd2);
420     emit_byte(0xc8+d);
421     }
422     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
423    
424     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
425     {
426     emit_byte(0xd3);
427     emit_byte(0xe8+d);
428     }
429     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
430    
431     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
432     {
433     emit_byte(0x66);
434     emit_byte(0xd3);
435     emit_byte(0xe8+d);
436     }
437     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
438    
439     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
440     {
441     emit_byte(0xd2);
442     emit_byte(0xe8+d);
443     }
444     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
445    
446     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
447     {
448     emit_byte(0xd3);
449     emit_byte(0xf8+d);
450     }
451     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
452    
453     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
454     {
455     emit_byte(0x66);
456     emit_byte(0xd3);
457     emit_byte(0xf8+d);
458     }
459     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
460    
461     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
462     {
463     emit_byte(0xd2);
464     emit_byte(0xf8+d);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
469     {
470     if (optimize_shift_once && (i == 1)) {
471     emit_byte(0xd1);
472     emit_byte(0xe0+r);
473     }
474     else {
475     emit_byte(0xc1);
476     emit_byte(0xe0+r);
477     emit_byte(i);
478     }
479     }
480     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
481    
482     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
483     {
484     emit_byte(0x66);
485     emit_byte(0xc1);
486     emit_byte(0xe0+r);
487     emit_byte(i);
488     }
489     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
490    
491     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
492     {
493     if (optimize_shift_once && (i == 1)) {
494     emit_byte(0xd0);
495     emit_byte(0xe0+r);
496     }
497     else {
498     emit_byte(0xc0);
499     emit_byte(0xe0+r);
500     emit_byte(i);
501     }
502     }
503     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
504    
505     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
506     {
507     if (optimize_shift_once && (i == 1)) {
508     emit_byte(0xd1);
509     emit_byte(0xe8+r);
510     }
511     else {
512     emit_byte(0xc1);
513     emit_byte(0xe8+r);
514     emit_byte(i);
515     }
516     }
517     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
518    
519     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
520     {
521     emit_byte(0x66);
522     emit_byte(0xc1);
523     emit_byte(0xe8+r);
524     emit_byte(i);
525     }
526     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
527    
528     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
529     {
530     if (optimize_shift_once && (i == 1)) {
531     emit_byte(0xd0);
532     emit_byte(0xe8+r);
533     }
534     else {
535     emit_byte(0xc0);
536     emit_byte(0xe8+r);
537     emit_byte(i);
538     }
539     }
540     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
541    
542     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
543     {
544     if (optimize_shift_once && (i == 1)) {
545     emit_byte(0xd1);
546     emit_byte(0xf8+r);
547     }
548     else {
549     emit_byte(0xc1);
550     emit_byte(0xf8+r);
551     emit_byte(i);
552     }
553     }
554     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
555    
556     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
557     {
558     emit_byte(0x66);
559     emit_byte(0xc1);
560     emit_byte(0xf8+r);
561     emit_byte(i);
562     }
563     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
564    
565     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
566     {
567     if (optimize_shift_once && (i == 1)) {
568     emit_byte(0xd0);
569     emit_byte(0xf8+r);
570     }
571     else {
572     emit_byte(0xc0);
573     emit_byte(0xf8+r);
574     emit_byte(i);
575     }
576     }
577     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
578    
579     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
580     {
581     emit_byte(0x9e);
582     }
583     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
584    
585     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
586     {
587     emit_byte(0x0f);
588     emit_byte(0xa2);
589     }
590     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
591    
592     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
593     {
594     emit_byte(0x9f);
595     }
596     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
597    
598     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
599     {
600     emit_byte(0x0f);
601     emit_byte(0x90+cc);
602     emit_byte(0xc0+d);
603     }
604     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
605    
606     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
607     {
608     emit_byte(0x0f);
609     emit_byte(0x90+cc);
610     emit_byte(0x05);
611     emit_long(d);
612     }
613     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
614    
615     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
616     {
617     if (have_cmov) {
618     emit_byte(0x0f);
619     emit_byte(0x40+cc);
620     emit_byte(0xc0+8*d+s);
621     }
622     else { /* replacement using branch and mov */
623     int uncc=(cc^1);
624     emit_byte(0x70+uncc);
625     emit_byte(2); /* skip next 2 bytes if not cc=true */
626     emit_byte(0x89);
627     emit_byte(0xc0+8*s+d);
628     }
629     }
630     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
631    
632     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
633     {
634     emit_byte(0x0f);
635     emit_byte(0xbc);
636     emit_byte(0xc0+8*d+s);
637     }
638     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
639    
640     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
641     {
642     emit_byte(0x0f);
643     emit_byte(0xbf);
644     emit_byte(0xc0+8*d+s);
645     }
646     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
647    
648     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
649     {
650     emit_byte(0x0f);
651     emit_byte(0xbe);
652     emit_byte(0xc0+8*d+s);
653     }
654     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
655    
656     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
657     {
658     emit_byte(0x0f);
659     emit_byte(0xb7);
660     emit_byte(0xc0+8*d+s);
661     }
662     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
663    
664     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
665     {
666     emit_byte(0x0f);
667     emit_byte(0xb6);
668     emit_byte(0xc0+8*d+s);
669     }
670     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
671    
672     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
673     {
674     emit_byte(0x0f);
675     emit_byte(0xaf);
676     emit_byte(0xc0+8*d+s);
677     }
678     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
679    
680     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
681     {
682     if (d!=MUL_NREG1 || s!=MUL_NREG2)
683     abort();
684     emit_byte(0xf7);
685     emit_byte(0xea);
686     }
687     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
688    
689     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
690     {
691     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
692     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
693     abort();
694     }
695     emit_byte(0xf7);
696     emit_byte(0xe2);
697     }
698     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
699    
700     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
701     {
702     abort(); /* %^$&%^$%#^ x86! */
703     emit_byte(0x0f);
704     emit_byte(0xaf);
705     emit_byte(0xc0+8*d+s);
706     }
707     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
708    
709     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
710     {
711     emit_byte(0x88);
712     emit_byte(0xc0+8*s+d);
713     }
714     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
715    
716     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
717     {
718     emit_byte(0x66);
719     emit_byte(0x89);
720     emit_byte(0xc0+8*s+d);
721     }
722     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
723    
724     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
725     {
726     int isebp=(baser==5)?0x40:0;
727     int fi;
728    
729     switch(factor) {
730     case 1: fi=0; break;
731     case 2: fi=1; break;
732     case 4: fi=2; break;
733     case 8: fi=3; break;
734     default: abort();
735     }
736    
737    
738     emit_byte(0x8b);
739     emit_byte(0x04+8*d+isebp);
740     emit_byte(baser+8*index+0x40*fi);
741     if (isebp)
742     emit_byte(0x00);
743     }
744     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
745    
746     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
747     {
748     int fi;
749     int isebp;
750    
751     switch(factor) {
752     case 1: fi=0; break;
753     case 2: fi=1; break;
754     case 4: fi=2; break;
755     case 8: fi=3; break;
756     default: abort();
757     }
758     isebp=(baser==5)?0x40:0;
759    
760     emit_byte(0x66);
761     emit_byte(0x8b);
762     emit_byte(0x04+8*d+isebp);
763     emit_byte(baser+8*index+0x40*fi);
764     if (isebp)
765     emit_byte(0x00);
766     }
767     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
768    
769     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
770     {
771     int fi;
772     int isebp;
773    
774     switch(factor) {
775     case 1: fi=0; break;
776     case 2: fi=1; break;
777     case 4: fi=2; break;
778     case 8: fi=3; break;
779     default: abort();
780     }
781     isebp=(baser==5)?0x40:0;
782    
783     emit_byte(0x8a);
784     emit_byte(0x04+8*d+isebp);
785     emit_byte(baser+8*index+0x40*fi);
786     if (isebp)
787     emit_byte(0x00);
788     }
789     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
790    
791     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
792     {
793     int fi;
794     int isebp;
795    
796     switch(factor) {
797     case 1: fi=0; break;
798     case 2: fi=1; break;
799     case 4: fi=2; break;
800     case 8: fi=3; break;
801     default: abort();
802     }
803    
804    
805     isebp=(baser==5)?0x40:0;
806    
807     emit_byte(0x89);
808     emit_byte(0x04+8*s+isebp);
809     emit_byte(baser+8*index+0x40*fi);
810     if (isebp)
811     emit_byte(0x00);
812     }
813     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
814    
815     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
816     {
817     int fi;
818     int isebp;
819    
820     switch(factor) {
821     case 1: fi=0; break;
822     case 2: fi=1; break;
823     case 4: fi=2; break;
824     case 8: fi=3; break;
825     default: abort();
826     }
827     isebp=(baser==5)?0x40:0;
828    
829     emit_byte(0x66);
830     emit_byte(0x89);
831     emit_byte(0x04+8*s+isebp);
832     emit_byte(baser+8*index+0x40*fi);
833     if (isebp)
834     emit_byte(0x00);
835     }
836     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
837    
838     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
839     {
840     int fi;
841     int isebp;
842    
843     switch(factor) {
844     case 1: fi=0; break;
845     case 2: fi=1; break;
846     case 4: fi=2; break;
847     case 8: fi=3; break;
848     default: abort();
849     }
850     isebp=(baser==5)?0x40:0;
851    
852     emit_byte(0x88);
853     emit_byte(0x04+8*s+isebp);
854     emit_byte(baser+8*index+0x40*fi);
855     if (isebp)
856     emit_byte(0x00);
857     }
858     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
859    
860     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
861     {
862     int fi;
863    
864     switch(factor) {
865     case 1: fi=0; break;
866     case 2: fi=1; break;
867     case 4: fi=2; break;
868     case 8: fi=3; break;
869     default: abort();
870     }
871    
872     emit_byte(0x89);
873     emit_byte(0x84+8*s);
874     emit_byte(baser+8*index+0x40*fi);
875     emit_long(base);
876     }
877     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
878    
879     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
880     {
881     int fi;
882    
883     switch(factor) {
884     case 1: fi=0; break;
885     case 2: fi=1; break;
886     case 4: fi=2; break;
887     case 8: fi=3; break;
888     default: abort();
889     }
890    
891     emit_byte(0x66);
892     emit_byte(0x89);
893     emit_byte(0x84+8*s);
894     emit_byte(baser+8*index+0x40*fi);
895     emit_long(base);
896     }
897     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
898    
899     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
900     {
901     int fi;
902    
903     switch(factor) {
904     case 1: fi=0; break;
905     case 2: fi=1; break;
906     case 4: fi=2; break;
907     case 8: fi=3; break;
908     default: abort();
909     }
910    
911     emit_byte(0x88);
912     emit_byte(0x84+8*s);
913     emit_byte(baser+8*index+0x40*fi);
914     emit_long(base);
915     }
916     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
917    
918     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
919     {
920     int fi;
921    
922     switch(factor) {
923     case 1: fi=0; break;
924     case 2: fi=1; break;
925     case 4: fi=2; break;
926     case 8: fi=3; break;
927     default: abort();
928     }
929    
930     emit_byte(0x8b);
931     emit_byte(0x84+8*d);
932     emit_byte(baser+8*index+0x40*fi);
933     emit_long(base);
934     }
935     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
936    
937     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
938     {
939     int fi;
940    
941     switch(factor) {
942     case 1: fi=0; break;
943     case 2: fi=1; break;
944     case 4: fi=2; break;
945     case 8: fi=3; break;
946     default: abort();
947     }
948    
949     emit_byte(0x66);
950     emit_byte(0x8b);
951     emit_byte(0x84+8*d);
952     emit_byte(baser+8*index+0x40*fi);
953     emit_long(base);
954     }
955     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
956    
957     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
958     {
959     int fi;
960    
961     switch(factor) {
962     case 1: fi=0; break;
963     case 2: fi=1; break;
964     case 4: fi=2; break;
965     case 8: fi=3; break;
966     default: abort();
967     }
968    
969     emit_byte(0x8a);
970     emit_byte(0x84+8*d);
971     emit_byte(baser+8*index+0x40*fi);
972     emit_long(base);
973     }
974     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
975    
976     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
977     {
978     int fi;
979     switch(factor) {
980     case 1: fi=0; break;
981     case 2: fi=1; break;
982     case 4: fi=2; break;
983     case 8: fi=3; break;
984     default:
985     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
986     abort();
987     }
988     emit_byte(0x8b);
989     emit_byte(0x04+8*d);
990     emit_byte(0x05+8*index+64*fi);
991     emit_long(base);
992     }
993     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
994    
995     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
996     {
997     int fi;
998     switch(factor) {
999     case 1: fi=0; break;
1000     case 2: fi=1; break;
1001     case 4: fi=2; break;
1002     case 8: fi=3; break;
1003     default:
1004     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1005     abort();
1006     }
1007     if (have_cmov) {
1008     emit_byte(0x0f);
1009     emit_byte(0x40+cond);
1010     emit_byte(0x04+8*d);
1011     emit_byte(0x05+8*index+64*fi);
1012     emit_long(base);
1013     }
1014     else { /* replacement using branch and mov */
1015     int uncc=(cond^1);
1016     emit_byte(0x70+uncc);
1017     emit_byte(7); /* skip next 7 bytes if not cc=true */
1018     emit_byte(0x8b);
1019     emit_byte(0x04+8*d);
1020     emit_byte(0x05+8*index+64*fi);
1021     emit_long(base);
1022     }
1023     }
1024     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1025    
1026     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1027     {
1028     if (have_cmov) {
1029     emit_byte(0x0f);
1030     emit_byte(0x40+cond);
1031     emit_byte(0x05+8*d);
1032     emit_long(mem);
1033     }
1034     else { /* replacement using branch and mov */
1035     int uncc=(cond^1);
1036     emit_byte(0x70+uncc);
1037     emit_byte(6); /* skip next 6 bytes if not cc=true */
1038     emit_byte(0x8b);
1039     emit_byte(0x05+8*d);
1040     emit_long(mem);
1041     }
1042     }
1043     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1044    
1045     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1046     {
1047     emit_byte(0x8b);
1048     emit_byte(0x40+8*d+s);
1049     emit_byte(offset);
1050     }
1051     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1052    
1053     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1054     {
1055     emit_byte(0x66);
1056     emit_byte(0x8b);
1057     emit_byte(0x40+8*d+s);
1058     emit_byte(offset);
1059     }
1060     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1061    
1062     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1063     {
1064     emit_byte(0x8a);
1065     emit_byte(0x40+8*d+s);
1066     emit_byte(offset);
1067     }
1068     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1069    
1070     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1071     {
1072     emit_byte(0x8b);
1073     emit_byte(0x80+8*d+s);
1074     emit_long(offset);
1075     }
1076     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1077    
1078     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1079     {
1080     emit_byte(0x66);
1081     emit_byte(0x8b);
1082     emit_byte(0x80+8*d+s);
1083     emit_long(offset);
1084     }
1085     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1086    
1087     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1088     {
1089     emit_byte(0x8a);
1090     emit_byte(0x80+8*d+s);
1091     emit_long(offset);
1092     }
1093     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1094    
1095     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1096     {
1097     emit_byte(0xc7);
1098     emit_byte(0x40+d);
1099     emit_byte(offset);
1100     emit_long(i);
1101     }
1102     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1103    
1104     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1105     {
1106     emit_byte(0x66);
1107     emit_byte(0xc7);
1108     emit_byte(0x40+d);
1109     emit_byte(offset);
1110     emit_word(i);
1111     }
1112     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1113    
1114     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1115     {
1116     emit_byte(0xc6);
1117     emit_byte(0x40+d);
1118     emit_byte(offset);
1119     emit_byte(i);
1120     }
1121     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1122    
1123     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1124     {
1125     emit_byte(0x89);
1126     emit_byte(0x40+8*s+d);
1127     emit_byte(offset);
1128     }
1129     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1130    
1131     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1132     {
1133     emit_byte(0x66);
1134     emit_byte(0x89);
1135     emit_byte(0x40+8*s+d);
1136     emit_byte(offset);
1137     }
1138     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1139    
1140     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1141     {
1142     emit_byte(0x88);
1143     emit_byte(0x40+8*s+d);
1144     emit_byte(offset);
1145     }
1146     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1147    
1148     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1149     {
1150     if (optimize_imm8 && isbyte(offset)) {
1151     emit_byte(0x8d);
1152     emit_byte(0x40+8*d+s);
1153     emit_byte(offset);
1154     }
1155     else {
1156     emit_byte(0x8d);
1157     emit_byte(0x80+8*d+s);
1158     emit_long(offset);
1159     }
1160     }
1161     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1162    
1163     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1164     {
1165     int fi;
1166    
1167     switch(factor) {
1168     case 1: fi=0; break;
1169     case 2: fi=1; break;
1170     case 4: fi=2; break;
1171     case 8: fi=3; break;
1172     default: abort();
1173     }
1174    
1175     if (optimize_imm8 && isbyte(offset)) {
1176     emit_byte(0x8d);
1177     emit_byte(0x44+8*d);
1178     emit_byte(0x40*fi+8*index+s);
1179     emit_byte(offset);
1180     }
1181     else {
1182     emit_byte(0x8d);
1183     emit_byte(0x84+8*d);
1184     emit_byte(0x40*fi+8*index+s);
1185     emit_long(offset);
1186     }
1187     }
1188     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1189    
1190     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1191     {
1192     int isebp=(s==5)?0x40:0;
1193     int fi;
1194    
1195     switch(factor) {
1196     case 1: fi=0; break;
1197     case 2: fi=1; break;
1198     case 4: fi=2; break;
1199     case 8: fi=3; break;
1200     default: abort();
1201     }
1202    
1203     emit_byte(0x8d);
1204     emit_byte(0x04+8*d+isebp);
1205     emit_byte(0x40*fi+8*index+s);
1206     if (isebp)
1207     emit_byte(0);
1208     }
1209     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1210    
1211     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1212     {
1213     if (optimize_imm8 && isbyte(offset)) {
1214     emit_byte(0x89);
1215     emit_byte(0x40+8*s+d);
1216     emit_byte(offset);
1217     }
1218     else {
1219     emit_byte(0x89);
1220     emit_byte(0x80+8*s+d);
1221     emit_long(offset);
1222     }
1223     }
1224     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1225    
1226     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1227     {
1228     emit_byte(0x66);
1229     emit_byte(0x89);
1230     emit_byte(0x80+8*s+d);
1231     emit_long(offset);
1232     }
1233     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1234    
1235     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1236     {
1237     if (optimize_imm8 && isbyte(offset)) {
1238     emit_byte(0x88);
1239     emit_byte(0x40+8*s+d);
1240     emit_byte(offset);
1241     }
1242     else {
1243     emit_byte(0x88);
1244     emit_byte(0x80+8*s+d);
1245     emit_long(offset);
1246     }
1247     }
1248     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1249    
1250     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1251     {
1252     emit_byte(0x0f);
1253     emit_byte(0xc8+r);
1254     }
1255     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1256    
1257     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1258     {
1259     emit_byte(0x66);
1260     emit_byte(0xc1);
1261     emit_byte(0xc0+r);
1262     emit_byte(0x08);
1263     }
1264     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1265    
1266     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1267     {
1268     emit_byte(0x89);
1269     emit_byte(0xc0+8*s+d);
1270     }
1271     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1272    
1273     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1274     {
1275     emit_byte(0x89);
1276     emit_byte(0x05+8*s);
1277     emit_long(d);
1278     }
1279     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1280    
1281     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1282     {
1283     emit_byte(0x66);
1284     emit_byte(0x89);
1285     emit_byte(0x05+8*s);
1286     emit_long(d);
1287     }
1288     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1289    
1290     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1291     {
1292     emit_byte(0x66);
1293     emit_byte(0x8b);
1294     emit_byte(0x05+8*d);
1295     emit_long(s);
1296     }
1297     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1298    
1299     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1300     {
1301     emit_byte(0x88);
1302     emit_byte(0x05+8*s);
1303     emit_long(d);
1304     }
1305     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1306    
1307     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1308     {
1309     emit_byte(0x8a);
1310     emit_byte(0x05+8*d);
1311     emit_long(s);
1312     }
1313     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1314    
1315     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1316     {
1317     emit_byte(0xb8+d);
1318     emit_long(s);
1319     }
1320     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1321    
1322     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1323     {
1324     emit_byte(0x66);
1325     emit_byte(0xb8+d);
1326     emit_word(s);
1327     }
1328     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1329    
1330     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1331     {
1332     emit_byte(0xb0+d);
1333     emit_byte(s);
1334     }
1335     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1336    
1337     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1338     {
1339     emit_byte(0x81);
1340     emit_byte(0x15);
1341     emit_long(d);
1342     emit_long(s);
1343     }
1344     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1345    
1346     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1347     {
1348     if (optimize_imm8 && isbyte(s)) {
1349     emit_byte(0x83);
1350     emit_byte(0x05);
1351     emit_long(d);
1352     emit_byte(s);
1353     }
1354     else {
1355     emit_byte(0x81);
1356     emit_byte(0x05);
1357     emit_long(d);
1358     emit_long(s);
1359     }
1360     }
1361     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1362    
1363     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1364     {
1365     emit_byte(0x66);
1366     emit_byte(0x81);
1367     emit_byte(0x05);
1368     emit_long(d);
1369     emit_word(s);
1370     }
1371     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1372    
1373     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1374     {
1375     emit_byte(0x80);
1376     emit_byte(0x05);
1377     emit_long(d);
1378     emit_byte(s);
1379     }
1380     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1381    
1382     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1383     {
1384 gbeauche 1.2 if (optimize_accum && isaccum(d))
1385     emit_byte(0xa9);
1386     else {
1387 gbeauche 1.1 emit_byte(0xf7);
1388     emit_byte(0xc0+d);
1389 gbeauche 1.2 }
1390 gbeauche 1.1 emit_long(i);
1391     }
1392     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1393    
1394     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1395     {
1396     emit_byte(0x85);
1397     emit_byte(0xc0+8*s+d);
1398     }
1399     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1400    
1401     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1402     {
1403     emit_byte(0x66);
1404     emit_byte(0x85);
1405     emit_byte(0xc0+8*s+d);
1406     }
1407     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1408    
1409     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1410     {
1411     emit_byte(0x84);
1412     emit_byte(0xc0+8*s+d);
1413     }
1414     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1415    
1416     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1417     {
1418     if (optimize_imm8 && isbyte(i)) {
1419 gbeauche 1.2 emit_byte(0x83);
1420     emit_byte(0xe0+d);
1421     emit_byte(i);
1422 gbeauche 1.1 }
1423     else {
1424 gbeauche 1.2 if (optimize_accum && isaccum(d))
1425     emit_byte(0x25);
1426     else {
1427     emit_byte(0x81);
1428     emit_byte(0xe0+d);
1429     }
1430     emit_long(i);
1431 gbeauche 1.1 }
1432     }
1433     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1434    
1435     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1436     {
1437 gbeauche 1.2 emit_byte(0x66);
1438     if (optimize_imm8 && isbyte(i)) {
1439     emit_byte(0x83);
1440     emit_byte(0xe0+d);
1441     emit_byte(i);
1442     }
1443     else {
1444     if (optimize_accum && isaccum(d))
1445     emit_byte(0x25);
1446     else {
1447     emit_byte(0x81);
1448     emit_byte(0xe0+d);
1449     }
1450     emit_word(i);
1451     }
1452 gbeauche 1.1 }
1453     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1454    
1455     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1456     {
1457     emit_byte(0x21);
1458     emit_byte(0xc0+8*s+d);
1459     }
1460     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1461    
1462     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1463     {
1464     emit_byte(0x66);
1465     emit_byte(0x21);
1466     emit_byte(0xc0+8*s+d);
1467     }
1468     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1469    
1470     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1471     {
1472     emit_byte(0x20);
1473     emit_byte(0xc0+8*s+d);
1474     }
1475     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1476    
1477     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1478     {
1479     if (optimize_imm8 && isbyte(i)) {
1480     emit_byte(0x83);
1481     emit_byte(0xc8+d);
1482     emit_byte(i);
1483     }
1484     else {
1485 gbeauche 1.2 if (optimize_accum && isaccum(d))
1486     emit_byte(0x0d);
1487     else {
1488 gbeauche 1.1 emit_byte(0x81);
1489     emit_byte(0xc8+d);
1490 gbeauche 1.2 }
1491 gbeauche 1.1 emit_long(i);
1492     }
1493     }
1494     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1495    
1496     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1497     {
1498     emit_byte(0x09);
1499     emit_byte(0xc0+8*s+d);
1500     }
1501     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1502    
1503     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1504     {
1505     emit_byte(0x66);
1506     emit_byte(0x09);
1507     emit_byte(0xc0+8*s+d);
1508     }
1509     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1510    
1511     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1512     {
1513     emit_byte(0x08);
1514     emit_byte(0xc0+8*s+d);
1515     }
1516     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1517    
1518     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1519     {
1520     emit_byte(0x11);
1521     emit_byte(0xc0+8*s+d);
1522     }
1523     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1524    
1525     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1526     {
1527     emit_byte(0x66);
1528     emit_byte(0x11);
1529     emit_byte(0xc0+8*s+d);
1530     }
1531     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1532    
1533     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1534     {
1535     emit_byte(0x10);
1536     emit_byte(0xc0+8*s+d);
1537     }
1538     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1539    
1540     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1541     {
1542     emit_byte(0x01);
1543     emit_byte(0xc0+8*s+d);
1544     }
1545     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1546    
1547     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1548     {
1549     emit_byte(0x66);
1550     emit_byte(0x01);
1551     emit_byte(0xc0+8*s+d);
1552     }
1553     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1554    
1555     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1556     {
1557     emit_byte(0x00);
1558     emit_byte(0xc0+8*s+d);
1559     }
1560     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1561    
1562     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1563     {
1564     if (isbyte(i)) {
1565     emit_byte(0x83);
1566     emit_byte(0xe8+d);
1567     emit_byte(i);
1568     }
1569     else {
1570 gbeauche 1.2 if (optimize_accum && isaccum(d))
1571     emit_byte(0x2d);
1572     else {
1573 gbeauche 1.1 emit_byte(0x81);
1574     emit_byte(0xe8+d);
1575 gbeauche 1.2 }
1576 gbeauche 1.1 emit_long(i);
1577     }
1578     }
1579     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1580    
1581     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1582     {
1583 gbeauche 1.2 if (optimize_accum && isaccum(d))
1584     emit_byte(0x2c);
1585     else {
1586 gbeauche 1.1 emit_byte(0x80);
1587     emit_byte(0xe8+d);
1588 gbeauche 1.2 }
1589 gbeauche 1.1 emit_byte(i);
1590     }
1591     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1592    
1593     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1594     {
1595     if (isbyte(i)) {
1596     emit_byte(0x83);
1597     emit_byte(0xc0+d);
1598     emit_byte(i);
1599     }
1600     else {
1601 gbeauche 1.2 if (optimize_accum && isaccum(d))
1602     emit_byte(0x05);
1603     else {
1604 gbeauche 1.1 emit_byte(0x81);
1605     emit_byte(0xc0+d);
1606 gbeauche 1.2 }
1607 gbeauche 1.1 emit_long(i);
1608     }
1609     }
1610     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1611    
1612     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1613     {
1614 gbeauche 1.2 emit_byte(0x66);
1615 gbeauche 1.1 if (isbyte(i)) {
1616     emit_byte(0x83);
1617     emit_byte(0xc0+d);
1618     emit_byte(i);
1619     }
1620     else {
1621 gbeauche 1.2 if (optimize_accum && isaccum(d))
1622     emit_byte(0x05);
1623     else {
1624 gbeauche 1.1 emit_byte(0x81);
1625     emit_byte(0xc0+d);
1626 gbeauche 1.2 }
1627 gbeauche 1.1 emit_word(i);
1628     }
1629     }
1630     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1631    
1632     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1633     {
1634 gbeauche 1.2 if (optimize_accum && isaccum(d))
1635     emit_byte(0x04);
1636     else {
1637     emit_byte(0x80);
1638     emit_byte(0xc0+d);
1639     }
1640 gbeauche 1.1 emit_byte(i);
1641     }
1642     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1643    
1644     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1645     {
1646     emit_byte(0x19);
1647     emit_byte(0xc0+8*s+d);
1648     }
1649     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1650    
1651     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1652     {
1653     emit_byte(0x66);
1654     emit_byte(0x19);
1655     emit_byte(0xc0+8*s+d);
1656     }
1657     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1658    
1659     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1660     {
1661     emit_byte(0x18);
1662     emit_byte(0xc0+8*s+d);
1663     }
1664     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1665    
1666     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1667     {
1668     emit_byte(0x29);
1669     emit_byte(0xc0+8*s+d);
1670     }
1671     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1672    
1673     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1674     {
1675     emit_byte(0x66);
1676     emit_byte(0x29);
1677     emit_byte(0xc0+8*s+d);
1678     }
1679     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1680    
1681     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1682     {
1683     emit_byte(0x28);
1684     emit_byte(0xc0+8*s+d);
1685     }
1686     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1687    
1688     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1689     {
1690     emit_byte(0x39);
1691     emit_byte(0xc0+8*s+d);
1692     }
1693     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1694    
1695     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1696     {
1697     if (optimize_imm8 && isbyte(i)) {
1698     emit_byte(0x83);
1699     emit_byte(0xf8+r);
1700     emit_byte(i);
1701     }
1702     else {
1703 gbeauche 1.2 if (optimize_accum && isaccum(r))
1704     emit_byte(0x3d);
1705     else {
1706 gbeauche 1.1 emit_byte(0x81);
1707     emit_byte(0xf8+r);
1708 gbeauche 1.2 }
1709 gbeauche 1.1 emit_long(i);
1710     }
1711     }
1712     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1713    
1714     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1715     {
1716     emit_byte(0x66);
1717     emit_byte(0x39);
1718     emit_byte(0xc0+8*s+d);
1719     }
1720     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1721    
1722     LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1723     {
1724 gbeauche 1.2 if (optimize_accum && isaccum(d))
1725     emit_byte(0x3c);
1726     else {
1727 gbeauche 1.1 emit_byte(0x80);
1728     emit_byte(0xf8+d);
1729 gbeauche 1.2 }
1730 gbeauche 1.1 emit_byte(i);
1731     }
1732     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1733    
1734     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1735     {
1736     emit_byte(0x38);
1737     emit_byte(0xc0+8*s+d);
1738     }
1739     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1740    
1741     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1742     {
1743     int fi;
1744    
1745     switch(factor) {
1746     case 1: fi=0; break;
1747     case 2: fi=1; break;
1748     case 4: fi=2; break;
1749     case 8: fi=3; break;
1750     default: abort();
1751     }
1752     emit_byte(0x39);
1753     emit_byte(0x04+8*d);
1754     emit_byte(5+8*index+0x40*fi);
1755     emit_long(offset);
1756     }
1757     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1758    
1759     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1760     {
1761     emit_byte(0x31);
1762     emit_byte(0xc0+8*s+d);
1763     }
1764     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1765    
1766     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1767     {
1768     emit_byte(0x66);
1769     emit_byte(0x31);
1770     emit_byte(0xc0+8*s+d);
1771     }
1772     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1773    
1774     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1775     {
1776     emit_byte(0x30);
1777     emit_byte(0xc0+8*s+d);
1778     }
1779     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1780    
1781     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1782     {
1783     if (optimize_imm8 && isbyte(s)) {
1784     emit_byte(0x83);
1785     emit_byte(0x2d);
1786     emit_long(d);
1787     emit_byte(s);
1788     }
1789     else {
1790     emit_byte(0x81);
1791     emit_byte(0x2d);
1792     emit_long(d);
1793     emit_long(s);
1794     }
1795     }
1796     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1797    
1798     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1799     {
1800     if (optimize_imm8 && isbyte(s)) {
1801     emit_byte(0x83);
1802     emit_byte(0x3d);
1803     emit_long(d);
1804     emit_byte(s);
1805     }
1806     else {
1807     emit_byte(0x81);
1808     emit_byte(0x3d);
1809     emit_long(d);
1810     emit_long(s);
1811     }
1812     }
1813     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1814    
1815     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1816     {
1817     emit_byte(0x87);
1818     emit_byte(0xc0+8*r1+r2);
1819     }
1820     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1821    
1822     /*************************************************************************
1823     * FIXME: string-related instructions *
1824     *************************************************************************/
1825    
1826     LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1827     {
1828     emit_byte(0xfc);
1829     }
1830     LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1831    
1832     LOWFUNC(WRITE,NONE,0,raw_std,(void))
1833     {
1834     emit_byte(0xfd);
1835     }
1836     LENDFUNC(WRITE,NONE,0,raw_std,(void))
1837    
1838     LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1839     {
1840     emit_byte(0xa4);
1841     }
1842     LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1843    
1844     LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1845     {
1846     emit_byte(0xa5);
1847     }
1848     LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1849    
1850     LOWFUNC(NONE,RMW,0,raw_rep,(void))
1851     {
1852     emit_byte(0xf3);
1853     }
1854     LENDFUNC(NONE,RMW,0,raw_rep,(void))
1855    
1856     LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1857     {
1858     raw_rep();
1859     raw_movs_b();
1860     }
1861     LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1862    
1863     LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1864     {
1865     raw_rep();
1866     raw_movs_l();
1867     }
1868     LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1869    
1870     /*************************************************************************
1871     * FIXME: mem access modes probably wrong *
1872     *************************************************************************/
1873    
1874     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1875     {
1876     emit_byte(0x9c);
1877     }
1878     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1879    
1880     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1881     {
1882     emit_byte(0x9d);
1883     }
1884     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1885    
1886     /*************************************************************************
1887     * Unoptimizable stuff --- jump *
1888     *************************************************************************/
1889    
1890     static __inline__ void raw_call_r(R4 r)
1891     {
1892     emit_byte(0xff);
1893     emit_byte(0xd0+r);
1894     }
1895    
1896     static __inline__ void raw_jmp_r(R4 r)
1897     {
1898     emit_byte(0xff);
1899     emit_byte(0xe0+r);
1900     }
1901    
1902     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1903     {
1904     int mu;
1905     switch(m) {
1906     case 1: mu=0; break;
1907     case 2: mu=1; break;
1908     case 4: mu=2; break;
1909     case 8: mu=3; break;
1910     default: abort();
1911     }
1912     emit_byte(0xff);
1913     emit_byte(0x24);
1914     emit_byte(0x05+8*r+0x40*mu);
1915     emit_long(base);
1916     }
1917    
1918     static __inline__ void raw_jmp_m(uae_u32 base)
1919     {
1920     emit_byte(0xff);
1921     emit_byte(0x25);
1922     emit_long(base);
1923     }
1924    
1925    
1926     static __inline__ void raw_call(uae_u32 t)
1927     {
1928     emit_byte(0xe8);
1929     emit_long(t-(uae_u32)target-4);
1930     }
1931    
1932     static __inline__ void raw_jmp(uae_u32 t)
1933     {
1934     emit_byte(0xe9);
1935     emit_long(t-(uae_u32)target-4);
1936     }
1937    
1938     static __inline__ void raw_jl(uae_u32 t)
1939     {
1940     emit_byte(0x0f);
1941     emit_byte(0x8c);
1942     emit_long(t-(uae_u32)target-4);
1943     }
1944    
1945     static __inline__ void raw_jz(uae_u32 t)
1946     {
1947     emit_byte(0x0f);
1948     emit_byte(0x84);
1949     emit_long(t-(uae_u32)target-4);
1950     }
1951    
1952     static __inline__ void raw_jnz(uae_u32 t)
1953     {
1954     emit_byte(0x0f);
1955     emit_byte(0x85);
1956     emit_long(t-(uae_u32)target-4);
1957     }
1958    
1959     static __inline__ void raw_jnz_l_oponly(void)
1960     {
1961     emit_byte(0x0f);
1962     emit_byte(0x85);
1963     }
1964    
1965     static __inline__ void raw_jcc_l_oponly(int cc)
1966     {
1967     emit_byte(0x0f);
1968     emit_byte(0x80+cc);
1969     }
1970    
1971     static __inline__ void raw_jnz_b_oponly(void)
1972     {
1973     emit_byte(0x75);
1974     }
1975    
1976     static __inline__ void raw_jz_b_oponly(void)
1977     {
1978     emit_byte(0x74);
1979     }
1980    
1981     static __inline__ void raw_jcc_b_oponly(int cc)
1982     {
1983     emit_byte(0x70+cc);
1984     }
1985    
1986     static __inline__ void raw_jmp_l_oponly(void)
1987     {
1988     emit_byte(0xe9);
1989     }
1990    
1991     static __inline__ void raw_jmp_b_oponly(void)
1992     {
1993     emit_byte(0xeb);
1994     }
1995    
1996     static __inline__ void raw_ret(void)
1997     {
1998     emit_byte(0xc3);
1999     }
2000    
2001     static __inline__ void raw_nop(void)
2002     {
2003     emit_byte(0x90);
2004     }
2005    
2006    
2007     /*************************************************************************
2008     * Flag handling, to and fro UAE flag register *
2009     *************************************************************************/
2010    
2011     #ifdef SAHF_SETO_PROFITABLE
2012    
2013     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
2014    
2015     static __inline__ void raw_flags_to_reg(int r)
2016     {
2017     raw_lahf(0); /* Most flags in AH */
2018     //raw_setcc(r,0); /* V flag in AL */
2019     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2020    
2021     #if 1 /* Let's avoid those nasty partial register stalls */
2022     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2023     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
2024     //live.state[FLAGTMP].status=CLEAN;
2025     live.state[FLAGTMP].status=INMEM;
2026     live.state[FLAGTMP].realreg=-1;
2027     /* We just "evicted" FLAGTMP. */
2028     if (live.nat[r].nholds!=1) {
2029     /* Huh? */
2030     abort();
2031     }
2032     live.nat[r].nholds=0;
2033     #endif
2034     }
2035    
2036     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
2037     static __inline__ void raw_reg_to_flags(int r)
2038     {
2039     raw_cmp_b_ri(r,-127); /* set V */
2040     raw_sahf(0);
2041     }
2042    
2043     #else
2044    
2045     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
2046     static __inline__ void raw_flags_to_reg(int r)
2047     {
2048     raw_pushfl();
2049     raw_pop_l_r(r);
2050     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
2051     // live.state[FLAGTMP].status=CLEAN;
2052     live.state[FLAGTMP].status=INMEM;
2053     live.state[FLAGTMP].realreg=-1;
2054     /* We just "evicted" FLAGTMP. */
2055     if (live.nat[r].nholds!=1) {
2056     /* Huh? */
2057     abort();
2058     }
2059     live.nat[r].nholds=0;
2060     }
2061    
2062     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2063     static __inline__ void raw_reg_to_flags(int r)
2064     {
2065     raw_push_l_r(r);
2066     raw_popfl();
2067     }
2068    
2069     #endif
2070    
2071     /* Apparently, there are enough instructions between flag store and
2072     flag reload to avoid the partial memory stall */
2073     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2074     {
2075     #if 1
2076     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2077     #else
2078     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2079     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2080     #endif
2081     }
2082    
2083     /* FLAGX is byte sized, and we *do* write it at that size */
2084     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2085     {
2086     if (live.nat[target].canbyte)
2087     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2088     else if (live.nat[target].canword)
2089     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2090     else
2091     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2092     }
2093    
2094    
2095     static __inline__ void raw_inc_sp(int off)
2096     {
2097 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
2098 gbeauche 1.1 }
2099    
2100     /*************************************************************************
2101     * Handling mistaken direct memory access *
2102     *************************************************************************/
2103    
2104     // gb-- I don't need that part for JIT Basilisk II
2105     #if defined(NATMEM_OFFSET) && 0
2106     #include <asm/sigcontext.h>
2107     #include <signal.h>
2108    
2109     #define SIG_READ 1
2110     #define SIG_WRITE 2
2111    
2112     static int in_handler=0;
2113     static uae_u8 veccode[256];
2114    
2115     static void vec(int x, struct sigcontext sc)
2116     {
2117     uae_u8* i=(uae_u8*)sc.eip;
2118     uae_u32 addr=sc.cr2;
2119     int r=-1;
2120     int size=4;
2121     int dir=-1;
2122     int len=0;
2123     int j;
2124    
2125     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2126     if (!canbang)
2127     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2128     if (in_handler)
2129     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2130    
2131     if (canbang && i>=compiled_code && i<=current_compile_p) {
2132     if (*i==0x66) {
2133     i++;
2134     size=2;
2135     len++;
2136     }
2137    
2138     switch(i[0]) {
2139     case 0x8a:
2140     if ((i[1]&0xc0)==0x80) {
2141     r=(i[1]>>3)&7;
2142     dir=SIG_READ;
2143     size=1;
2144     len+=6;
2145     break;
2146     }
2147     break;
2148     case 0x88:
2149     if ((i[1]&0xc0)==0x80) {
2150     r=(i[1]>>3)&7;
2151     dir=SIG_WRITE;
2152     size=1;
2153     len+=6;
2154     break;
2155     }
2156     break;
2157     case 0x8b:
2158     if ((i[1]&0xc0)==0x80) {
2159     r=(i[1]>>3)&7;
2160     dir=SIG_READ;
2161     len+=6;
2162     break;
2163     }
2164     if ((i[1]&0xc0)==0x40) {
2165     r=(i[1]>>3)&7;
2166     dir=SIG_READ;
2167     len+=3;
2168     break;
2169     }
2170     break;
2171     case 0x89:
2172     if ((i[1]&0xc0)==0x80) {
2173     r=(i[1]>>3)&7;
2174     dir=SIG_WRITE;
2175     len+=6;
2176     break;
2177     }
2178     if ((i[1]&0xc0)==0x40) {
2179     r=(i[1]>>3)&7;
2180     dir=SIG_WRITE;
2181     len+=3;
2182     break;
2183     }
2184     break;
2185     }
2186     }
2187    
2188     if (r!=-1) {
2189     void* pr=NULL;
2190     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2191    
2192     switch(r) {
2193     case 0: pr=&(sc.eax); break;
2194     case 1: pr=&(sc.ecx); break;
2195     case 2: pr=&(sc.edx); break;
2196     case 3: pr=&(sc.ebx); break;
2197     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2198     case 5: pr=(size>1)?
2199     (void*)(&(sc.ebp)):
2200     (void*)(((uae_u8*)&(sc.ecx))+1); break;
2201     case 6: pr=(size>1)?
2202     (void*)(&(sc.esi)):
2203     (void*)(((uae_u8*)&(sc.edx))+1); break;
2204     case 7: pr=(size>1)?
2205     (void*)(&(sc.edi)):
2206     (void*)(((uae_u8*)&(sc.ebx))+1); break;
2207     default: abort();
2208     }
2209     if (pr) {
2210     blockinfo* bi;
2211    
2212     if (currprefs.comp_oldsegv) {
2213     addr-=NATMEM_OFFSET;
2214    
2215     if ((addr>=0x10000000 && addr<0x40000000) ||
2216     (addr>=0x50000000)) {
2217     write_log("Suspicious address in %x SEGV handler.\n",addr);
2218     }
2219     if (dir==SIG_READ) {
2220     switch(size) {
2221     case 1: *((uae_u8*)pr)=get_byte(addr); break;
2222     case 2: *((uae_u16*)pr)=get_word(addr); break;
2223     case 4: *((uae_u32*)pr)=get_long(addr); break;
2224     default: abort();
2225     }
2226     }
2227     else { /* write */
2228     switch(size) {
2229     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2230     case 2: put_word(addr,*((uae_u16*)pr)); break;
2231     case 4: put_long(addr,*((uae_u32*)pr)); break;
2232     default: abort();
2233     }
2234     }
2235     write_log("Handled one access!\n");
2236     fflush(stdout);
2237     segvcount++;
2238     sc.eip+=len;
2239     }
2240     else {
2241     void* tmp=target;
2242     int i;
2243     uae_u8 vecbuf[5];
2244    
2245     addr-=NATMEM_OFFSET;
2246    
2247     if ((addr>=0x10000000 && addr<0x40000000) ||
2248     (addr>=0x50000000)) {
2249     write_log("Suspicious address in %x SEGV handler.\n",addr);
2250     }
2251    
2252     target=(uae_u8*)sc.eip;
2253     for (i=0;i<5;i++)
2254     vecbuf[i]=target[i];
2255     emit_byte(0xe9);
2256     emit_long((uae_u32)veccode-(uae_u32)target-4);
2257     write_log("Create jump to %p\n",veccode);
2258    
2259     write_log("Handled one access!\n");
2260     fflush(stdout);
2261     segvcount++;
2262    
2263     target=veccode;
2264    
2265     if (dir==SIG_READ) {
2266     switch(size) {
2267     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2268     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2269     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2270     default: abort();
2271     }
2272     }
2273     else { /* write */
2274     switch(size) {
2275     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2276     case 2: put_word(addr,*((uae_u16*)pr)); break;
2277     case 4: put_long(addr,*((uae_u32*)pr)); break;
2278     default: abort();
2279     }
2280     }
2281     for (i=0;i<5;i++)
2282     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2283     raw_mov_l_mi((uae_u32)&in_handler,0);
2284     emit_byte(0xe9);
2285     emit_long(sc.eip+len-(uae_u32)target-4);
2286     in_handler=1;
2287     target=tmp;
2288     }
2289     bi=active;
2290     while (bi) {
2291     if (bi->handler &&
2292     (uae_u8*)bi->direct_handler<=i &&
2293     (uae_u8*)bi->nexthandler>i) {
2294     write_log("deleted trigger (%p<%p<%p) %p\n",
2295     bi->handler,
2296     i,
2297     bi->nexthandler,
2298     bi->pc_p);
2299     invalidate_block(bi);
2300     raise_in_cl_list(bi);
2301     set_special(0);
2302     return;
2303     }
2304     bi=bi->next;
2305     }
2306     /* Not found in the active list. Might be a rom routine that
2307     is in the dormant list */
2308     bi=dormant;
2309     while (bi) {
2310     if (bi->handler &&
2311     (uae_u8*)bi->direct_handler<=i &&
2312     (uae_u8*)bi->nexthandler>i) {
2313     write_log("deleted trigger (%p<%p<%p) %p\n",
2314     bi->handler,
2315     i,
2316     bi->nexthandler,
2317     bi->pc_p);
2318     invalidate_block(bi);
2319     raise_in_cl_list(bi);
2320     set_special(0);
2321     return;
2322     }
2323     bi=bi->next;
2324     }
2325     write_log("Huh? Could not find trigger!\n");
2326     return;
2327     }
2328     }
2329     write_log("Can't handle access!\n");
2330     for (j=0;j<10;j++) {
2331     write_log("instruction byte %2d is %02x\n",j,i[j]);
2332     }
2333     write_log("Please send the above info (starting at \"fault address\") to\n"
2334     "bmeyer@csse.monash.edu.au\n"
2335     "This shouldn't happen ;-)\n");
2336     fflush(stdout);
2337     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2338     }
2339     #endif
2340    
2341    
2342     /*************************************************************************
2343     * Checking for CPU features *
2344     *************************************************************************/
2345    
2346     typedef struct {
2347     uae_u32 eax;
2348     uae_u32 ecx;
2349     uae_u32 edx;
2350     uae_u32 ebx;
2351     } x86_regs;
2352    
2353    
2354     /* This could be so much easier if it could make assumptions about the
2355     compiler... */
2356    
2357     static uae_u8 cpuid_space[256];
2358     static uae_u32 cpuid_ptr;
2359     static uae_u32 cpuid_level;
2360    
2361     static x86_regs cpuid(uae_u32 level)
2362     {
2363     x86_regs answer;
2364     uae_u8* tmp=get_target();
2365    
2366     cpuid_ptr=(uae_u32)&answer;
2367     cpuid_level=level;
2368    
2369     set_target(cpuid_space);
2370     raw_push_l_r(0); /* eax */
2371     raw_push_l_r(1); /* ecx */
2372     raw_push_l_r(2); /* edx */
2373     raw_push_l_r(3); /* ebx */
2374     raw_push_l_r(7); /* edi */
2375     raw_mov_l_rm(0,(uae_u32)&cpuid_level);
2376     raw_cpuid(0);
2377     raw_mov_l_rm(7,(uae_u32)&cpuid_ptr);
2378     raw_mov_l_Rr(7,0,0);
2379     raw_mov_l_Rr(7,1,4);
2380     raw_mov_l_Rr(7,2,8);
2381     raw_mov_l_Rr(7,3,12);
2382     raw_pop_l_r(7);
2383     raw_pop_l_r(3);
2384     raw_pop_l_r(2);
2385     raw_pop_l_r(1);
2386     raw_pop_l_r(0);
2387     raw_ret();
2388     set_target(tmp);
2389    
2390     ((cpuop_func*)cpuid_space)(0);
2391     return answer;
2392     }
2393    
2394     static void raw_init_cpu(void)
2395     {
2396     x86_regs x;
2397     uae_u32 maxlev;
2398    
2399     x=cpuid(0);
2400     maxlev=x.eax;
2401     write_log("Max CPUID level=%d Processor is %c%c%c%c%c%c%c%c%c%c%c%c\n",
2402     maxlev,
2403     x.ebx,
2404     x.ebx>>8,
2405     x.ebx>>16,
2406     x.ebx>>24,
2407     x.edx,
2408     x.edx>>8,
2409     x.edx>>16,
2410     x.edx>>24,
2411     x.ecx,
2412     x.ecx>>8,
2413     x.ecx>>16,
2414     x.ecx>>24
2415     );
2416     have_rat_stall=(x.ecx==0x6c65746e);
2417    
2418     if (maxlev>=1) {
2419     x=cpuid(1);
2420     if (x.edx&(1<<15))
2421     have_cmov=1;
2422     }
2423     if (!have_cmov)
2424     have_rat_stall=0;
2425     #if 0 /* For testing of non-cmov code! */
2426     have_cmov=0;
2427     #endif
2428     #if 1 /* It appears that partial register writes are a bad idea even on
2429     AMD K7 cores, even though they are not supposed to have the
2430     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2431     if (have_cmov)
2432     have_rat_stall=1;
2433     #endif
2434     }
2435    
2436    
2437     /*************************************************************************
2438     * FPU stuff *
2439     *************************************************************************/
2440    
2441    
2442     static __inline__ void raw_fp_init(void)
2443     {
2444     int i;
2445    
2446     for (i=0;i<N_FREGS;i++)
2447     live.spos[i]=-2;
2448     live.tos=-1; /* Stack is empty */
2449     }
2450    
2451     static __inline__ void raw_fp_cleanup_drop(void)
2452     {
2453     #if 0
2454     /* using FINIT instead of popping all the entries.
2455     Seems to have side effects --- there is display corruption in
2456     Quake when this is used */
2457     if (live.tos>1) {
2458     emit_byte(0x9b);
2459     emit_byte(0xdb);
2460     emit_byte(0xe3);
2461     live.tos=-1;
2462     }
2463     #endif
2464     while (live.tos>=1) {
2465     emit_byte(0xde);
2466     emit_byte(0xd9);
2467     live.tos-=2;
2468     }
2469     while (live.tos>=0) {
2470     emit_byte(0xdd);
2471     emit_byte(0xd8);
2472     live.tos--;
2473     }
2474     raw_fp_init();
2475     }
2476    
2477     static __inline__ void make_tos(int r)
2478     {
2479     int p,q;
2480    
2481     if (live.spos[r]<0) { /* Register not yet on stack */
2482     emit_byte(0xd9);
2483     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2484     live.tos++;
2485     live.spos[r]=live.tos;
2486     live.onstack[live.tos]=r;
2487     return;
2488     }
2489     /* Register is on stack */
2490     if (live.tos==live.spos[r])
2491     return;
2492     p=live.spos[r];
2493     q=live.onstack[live.tos];
2494    
2495     emit_byte(0xd9);
2496     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2497     live.onstack[live.tos]=r;
2498     live.spos[r]=live.tos;
2499     live.onstack[p]=q;
2500     live.spos[q]=p;
2501     }
2502    
2503     static __inline__ void make_tos2(int r, int r2)
2504     {
2505     int q;
2506    
2507     make_tos(r2); /* Put the reg that's supposed to end up in position2
2508     on top */
2509    
2510     if (live.spos[r]<0) { /* Register not yet on stack */
2511     make_tos(r); /* This will extend the stack */
2512     return;
2513     }
2514     /* Register is on stack */
2515     emit_byte(0xd9);
2516     emit_byte(0xc9); /* Move r2 into position 2 */
2517    
2518     q=live.onstack[live.tos-1];
2519     live.onstack[live.tos]=q;
2520     live.spos[q]=live.tos;
2521     live.onstack[live.tos-1]=r2;
2522     live.spos[r2]=live.tos-1;
2523    
2524     make_tos(r); /* And r into 1 */
2525     }
2526    
2527     static __inline__ int stackpos(int r)
2528     {
2529     if (live.spos[r]<0)
2530     abort();
2531     if (live.tos<live.spos[r]) {
2532     printf("Looking for spos for fnreg %d\n",r);
2533     abort();
2534     }
2535     return live.tos-live.spos[r];
2536     }
2537    
2538     static __inline__ void usereg(int r)
2539     {
2540     if (live.spos[r]<0)
2541     make_tos(r);
2542     }
2543    
2544     /* This is called with one FP value in a reg *above* tos, which it will
2545     pop off the stack if necessary */
2546     static __inline__ void tos_make(int r)
2547     {
2548     if (live.spos[r]<0) {
2549     live.tos++;
2550     live.spos[r]=live.tos;
2551     live.onstack[live.tos]=r;
2552     return;
2553     }
2554     emit_byte(0xdd);
2555     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2556     and pop it*/
2557     }
2558    
2559    
2560     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2561     {
2562     make_tos(r);
2563     emit_byte(0xdd);
2564     emit_byte(0x15);
2565     emit_long(m);
2566     }
2567     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2568    
2569     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2570     {
2571     make_tos(r);
2572     emit_byte(0xdd);
2573     emit_byte(0x1d);
2574     emit_long(m);
2575     live.onstack[live.tos]=-1;
2576     live.tos--;
2577     live.spos[r]=-2;
2578     }
2579     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2580    
2581     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2582     {
2583     emit_byte(0xdd);
2584     emit_byte(0x05);
2585     emit_long(m);
2586     tos_make(r);
2587     }
2588     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2589    
2590     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2591     {
2592     emit_byte(0xdb);
2593     emit_byte(0x05);
2594     emit_long(m);
2595     tos_make(r);
2596     }
2597     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2598    
2599     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2600     {
2601     make_tos(r);
2602     emit_byte(0xdb);
2603     emit_byte(0x15);
2604     emit_long(m);
2605     }
2606     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2607    
2608     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2609     {
2610     emit_byte(0xd9);
2611     emit_byte(0x05);
2612     emit_long(m);
2613     tos_make(r);
2614     }
2615     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2616    
2617     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2618     {
2619     make_tos(r);
2620     emit_byte(0xd9);
2621     emit_byte(0x15);
2622     emit_long(m);
2623     }
2624     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2625    
2626     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2627     {
2628     int rs;
2629    
2630     /* Stupid x87 can't write a long double to mem without popping the
2631     stack! */
2632     usereg(r);
2633     rs=stackpos(r);
2634     emit_byte(0xd9); /* Get a copy to the top of stack */
2635     emit_byte(0xc0+rs);
2636    
2637     emit_byte(0xdb); /* store and pop it */
2638     emit_byte(0x3d);
2639     emit_long(m);
2640     }
2641     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2642    
2643     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2644     {
2645     int rs;
2646    
2647     make_tos(r);
2648     emit_byte(0xdb); /* store and pop it */
2649     emit_byte(0x3d);
2650     emit_long(m);
2651     live.onstack[live.tos]=-1;
2652     live.tos--;
2653     live.spos[r]=-2;
2654     }
2655     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2656    
2657     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2658     {
2659     emit_byte(0xdb);
2660     emit_byte(0x2d);
2661     emit_long(m);
2662     tos_make(r);
2663     }
2664     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2665    
2666     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2667     {
2668     emit_byte(0xd9);
2669     emit_byte(0xeb);
2670     tos_make(r);
2671     }
2672     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2673    
2674     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2675     {
2676     emit_byte(0xd9);
2677     emit_byte(0xec);
2678     tos_make(r);
2679     }
2680     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2681    
2682     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2683     {
2684     emit_byte(0xd9);
2685     emit_byte(0xea);
2686     tos_make(r);
2687     }
2688     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2689    
2690     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2691     {
2692     emit_byte(0xd9);
2693     emit_byte(0xed);
2694     tos_make(r);
2695     }
2696     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2697    
2698     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2699     {
2700     emit_byte(0xd9);
2701     emit_byte(0xe8);
2702     tos_make(r);
2703     }
2704     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2705    
2706     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2707     {
2708     emit_byte(0xd9);
2709     emit_byte(0xee);
2710     tos_make(r);
2711     }
2712     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2713    
2714     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2715     {
2716     int ds;
2717    
2718     usereg(s);
2719     ds=stackpos(s);
2720     if (ds==0 && live.spos[d]>=0) {
2721     /* source is on top of stack, and we already have the dest */
2722     int dd=stackpos(d);
2723     emit_byte(0xdd);
2724     emit_byte(0xd0+dd);
2725     }
2726     else {
2727     emit_byte(0xd9);
2728     emit_byte(0xc0+ds); /* duplicate source on tos */
2729     tos_make(d); /* store to destination, pop if necessary */
2730     }
2731     }
2732     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2733    
2734     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2735     {
2736     emit_byte(0xd9);
2737     emit_byte(0xa8+index);
2738     emit_long(base);
2739     }
2740     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2741    
2742    
2743     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2744     {
2745     int ds;
2746    
2747     if (d!=s) {
2748     usereg(s);
2749     ds=stackpos(s);
2750     emit_byte(0xd9);
2751     emit_byte(0xc0+ds); /* duplicate source */
2752     emit_byte(0xd9);
2753     emit_byte(0xfa); /* take square root */
2754     tos_make(d); /* store to destination */
2755     }
2756     else {
2757     make_tos(d);
2758     emit_byte(0xd9);
2759     emit_byte(0xfa); /* take square root */
2760     }
2761     }
2762     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2763    
2764     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2765     {
2766     int ds;
2767    
2768     if (d!=s) {
2769     usereg(s);
2770     ds=stackpos(s);
2771     emit_byte(0xd9);
2772     emit_byte(0xc0+ds); /* duplicate source */
2773     emit_byte(0xd9);
2774     emit_byte(0xe1); /* take fabs */
2775     tos_make(d); /* store to destination */
2776     }
2777     else {
2778     make_tos(d);
2779     emit_byte(0xd9);
2780     emit_byte(0xe1); /* take fabs */
2781     }
2782     }
2783     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2784    
2785     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2786     {
2787     int ds;
2788    
2789     if (d!=s) {
2790     usereg(s);
2791     ds=stackpos(s);
2792     emit_byte(0xd9);
2793     emit_byte(0xc0+ds); /* duplicate source */
2794     emit_byte(0xd9);
2795     emit_byte(0xfc); /* take frndint */
2796     tos_make(d); /* store to destination */
2797     }
2798     else {
2799     make_tos(d);
2800     emit_byte(0xd9);
2801     emit_byte(0xfc); /* take frndint */
2802     }
2803     }
2804     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2805    
2806     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2807     {
2808     int ds;
2809    
2810     if (d!=s) {
2811     usereg(s);
2812     ds=stackpos(s);
2813     emit_byte(0xd9);
2814     emit_byte(0xc0+ds); /* duplicate source */
2815     emit_byte(0xd9);
2816     emit_byte(0xff); /* take cos */
2817     tos_make(d); /* store to destination */
2818     }
2819     else {
2820     make_tos(d);
2821     emit_byte(0xd9);
2822     emit_byte(0xff); /* take cos */
2823     }
2824     }
2825     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2826    
2827     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
2828     {
2829     int ds;
2830    
2831     if (d!=s) {
2832     usereg(s);
2833     ds=stackpos(s);
2834     emit_byte(0xd9);
2835     emit_byte(0xc0+ds); /* duplicate source */
2836     emit_byte(0xd9);
2837     emit_byte(0xfe); /* take sin */
2838     tos_make(d); /* store to destination */
2839     }
2840     else {
2841     make_tos(d);
2842     emit_byte(0xd9);
2843     emit_byte(0xfe); /* take sin */
2844     }
2845     }
2846     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
2847    
2848     double one=1;
2849     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
2850     {
2851     int ds;
2852    
2853     usereg(s);
2854     ds=stackpos(s);
2855     emit_byte(0xd9);
2856     emit_byte(0xc0+ds); /* duplicate source */
2857    
2858     emit_byte(0xd9);
2859     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
2860     emit_byte(0xd9);
2861     emit_byte(0xfc); /* rndint */
2862     emit_byte(0xd9);
2863     emit_byte(0xc9); /* swap top two elements */
2864     emit_byte(0xd8);
2865     emit_byte(0xe1); /* subtract rounded from original */
2866     emit_byte(0xd9);
2867     emit_byte(0xf0); /* f2xm1 */
2868     emit_byte(0xdc);
2869     emit_byte(0x05);
2870     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
2871     emit_byte(0xd9);
2872     emit_byte(0xfd); /* and scale it */
2873     emit_byte(0xdd);
2874     emit_byte(0xd9); /* take he rounded value off */
2875     tos_make(d); /* store to destination */
2876     }
2877     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
2878    
2879     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
2880     {
2881     int ds;
2882    
2883     usereg(s);
2884     ds=stackpos(s);
2885     emit_byte(0xd9);
2886     emit_byte(0xc0+ds); /* duplicate source */
2887     emit_byte(0xd9);
2888     emit_byte(0xea); /* fldl2e */
2889     emit_byte(0xde);
2890     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
2891    
2892     emit_byte(0xd9);
2893     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
2894     emit_byte(0xd9);
2895     emit_byte(0xfc); /* rndint */
2896     emit_byte(0xd9);
2897     emit_byte(0xc9); /* swap top two elements */
2898     emit_byte(0xd8);
2899     emit_byte(0xe1); /* subtract rounded from original */
2900     emit_byte(0xd9);
2901     emit_byte(0xf0); /* f2xm1 */
2902     emit_byte(0xdc);
2903     emit_byte(0x05);
2904     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
2905     emit_byte(0xd9);
2906     emit_byte(0xfd); /* and scale it */
2907     emit_byte(0xdd);
2908     emit_byte(0xd9); /* take he rounded value off */
2909     tos_make(d); /* store to destination */
2910     }
2911     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
2912    
2913     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
2914     {
2915     int ds;
2916    
2917     usereg(s);
2918     ds=stackpos(s);
2919     emit_byte(0xd9);
2920     emit_byte(0xc0+ds); /* duplicate source */
2921     emit_byte(0xd9);
2922     emit_byte(0xe8); /* push '1' */
2923     emit_byte(0xd9);
2924     emit_byte(0xc9); /* swap top two */
2925     emit_byte(0xd9);
2926     emit_byte(0xf1); /* take 1*log2(x) */
2927     tos_make(d); /* store to destination */
2928     }
2929     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
2930    
2931    
2932     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
2933     {
2934     int ds;
2935    
2936     if (d!=s) {
2937     usereg(s);
2938     ds=stackpos(s);
2939     emit_byte(0xd9);
2940     emit_byte(0xc0+ds); /* duplicate source */
2941     emit_byte(0xd9);
2942     emit_byte(0xe0); /* take fchs */
2943     tos_make(d); /* store to destination */
2944     }
2945     else {
2946     make_tos(d);
2947     emit_byte(0xd9);
2948     emit_byte(0xe0); /* take fchs */
2949     }
2950     }
2951     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
2952    
2953     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
2954     {
2955     int ds;
2956    
2957     usereg(s);
2958     usereg(d);
2959    
2960     if (live.spos[s]==live.tos) {
2961     /* Source is on top of stack */
2962     ds=stackpos(d);
2963     emit_byte(0xdc);
2964     emit_byte(0xc0+ds); /* add source to dest*/
2965     }
2966     else {
2967     make_tos(d);
2968     ds=stackpos(s);
2969    
2970     emit_byte(0xd8);
2971     emit_byte(0xc0+ds); /* add source to dest*/
2972     }
2973     }
2974     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
2975    
2976     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
2977     {
2978     int ds;
2979    
2980     usereg(s);
2981     usereg(d);
2982    
2983     if (live.spos[s]==live.tos) {
2984     /* Source is on top of stack */
2985     ds=stackpos(d);
2986     emit_byte(0xdc);
2987     emit_byte(0xe8+ds); /* sub source from dest*/
2988     }
2989     else {
2990     make_tos(d);
2991     ds=stackpos(s);
2992    
2993     emit_byte(0xd8);
2994     emit_byte(0xe0+ds); /* sub src from dest */
2995     }
2996     }
2997     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
2998    
2999     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3000     {
3001     int ds;
3002    
3003     usereg(s);
3004     usereg(d);
3005    
3006     make_tos(d);
3007     ds=stackpos(s);
3008    
3009     emit_byte(0xdd);
3010     emit_byte(0xe0+ds); /* cmp dest with source*/
3011     }
3012     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3013    
3014     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3015     {
3016     int ds;
3017    
3018     usereg(s);
3019     usereg(d);
3020    
3021     if (live.spos[s]==live.tos) {
3022     /* Source is on top of stack */
3023     ds=stackpos(d);
3024     emit_byte(0xdc);
3025     emit_byte(0xc8+ds); /* mul dest by source*/
3026     }
3027     else {
3028     make_tos(d);
3029     ds=stackpos(s);
3030    
3031     emit_byte(0xd8);
3032     emit_byte(0xc8+ds); /* mul dest by source*/
3033     }
3034     }
3035     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3036    
3037     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3038     {
3039     int ds;
3040    
3041     usereg(s);
3042     usereg(d);
3043    
3044     if (live.spos[s]==live.tos) {
3045     /* Source is on top of stack */
3046     ds=stackpos(d);
3047     emit_byte(0xdc);
3048     emit_byte(0xf8+ds); /* div dest by source */
3049     }
3050     else {
3051     make_tos(d);
3052     ds=stackpos(s);
3053    
3054     emit_byte(0xd8);
3055     emit_byte(0xf0+ds); /* div dest by source*/
3056     }
3057     }
3058     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3059    
3060     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3061     {
3062     int ds;
3063    
3064     usereg(s);
3065     usereg(d);
3066    
3067     make_tos2(d,s);
3068     ds=stackpos(s);
3069    
3070     if (ds!=1) {
3071     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3072     abort();
3073     }
3074     emit_byte(0xd9);
3075     emit_byte(0xf8); /* take rem from dest by source */
3076     }
3077     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3078    
3079     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3080     {
3081     int ds;
3082    
3083     usereg(s);
3084     usereg(d);
3085    
3086     make_tos2(d,s);
3087     ds=stackpos(s);
3088    
3089     if (ds!=1) {
3090     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3091     abort();
3092     }
3093     emit_byte(0xd9);
3094     emit_byte(0xf5); /* take rem1 from dest by source */
3095     }
3096     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3097    
3098    
3099     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3100     {
3101     make_tos(r);
3102     emit_byte(0xd9); /* ftst */
3103     emit_byte(0xe4);
3104     }
3105     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3106    
3107     /* %eax register is clobbered if target processor doesn't support fucomi */
3108     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3109     #define FFLAG_NREG EAX_INDEX
3110    
3111     static __inline__ void raw_fflags_into_flags(int r)
3112     {
3113     int p;
3114    
3115     usereg(r);
3116     p=stackpos(r);
3117    
3118     emit_byte(0xd9);
3119     emit_byte(0xee); /* Push 0 */
3120     emit_byte(0xd9);
3121     emit_byte(0xc9+p); /* swap top two around */
3122     if (have_cmov) {
3123     // gb-- fucomi is for P6 cores only, not K6-2 then...
3124     emit_byte(0xdb);
3125     emit_byte(0xe9+p); /* fucomi them */
3126     }
3127     else {
3128     emit_byte(0xdd);
3129     emit_byte(0xe1+p); /* fucom them */
3130     emit_byte(0x9b);
3131     emit_byte(0xdf);
3132     emit_byte(0xe0); /* fstsw ax */
3133     raw_sahf(0); /* sahf */
3134     }
3135     emit_byte(0xdd);
3136     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3137     }