ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.8
Committed: 2002-10-12T16:27:13Z (21 years, 11 months ago) by gbeauche
Branch: MAIN
Changes since 1.7: +62 -0 lines
Log Message:
Add raw_emit_nop_filler() with more efficient no-op fillers stolen from
GNU binutils 2.12.90.0.15. Speed bump is marginal (less than 6%). Make it
default though, that's conditionalized by tune_nop_fillers constant.

File Contents

# User Rev Content
1 gbeauche 1.6 /*
2     * compiler/codegen_x86.cpp - IA-32 code generator
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6     * Adaptation for Basilisk II and improvements, copyright 2000-2002
7     * Gwenole Beauchesne
8     *
9     * Basilisk II (C) 1997-2002 Christian Bauer
10 gbeauche 1.7 *
11     * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 gbeauche 1.6 *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28 gbeauche 1.1 /* This should eventually end up in machdep/, but for now, x86 is the
29     only target, and it's easier this way... */
30    
31 gbeauche 1.5 #include "flags_x86.h"
32    
33 gbeauche 1.1 /*************************************************************************
34     * Some basic information about the the target CPU *
35     *************************************************************************/
36    
37     #define EAX_INDEX 0
38     #define ECX_INDEX 1
39     #define EDX_INDEX 2
40     #define EBX_INDEX 3
41     #define ESP_INDEX 4
42     #define EBP_INDEX 5
43     #define ESI_INDEX 6
44     #define EDI_INDEX 7
45    
46     /* The register in which subroutines return an integer return value */
47     #define REG_RESULT 0
48    
49     /* The registers subroutines take their first and second argument in */
50     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
51     /* Handle the _fastcall parameters of ECX and EDX */
52     #define REG_PAR1 1
53     #define REG_PAR2 2
54     #else
55     #define REG_PAR1 0
56     #define REG_PAR2 2
57     #endif
58    
59     /* Three registers that are not used for any of the above */
60     #define REG_NOPAR1 6
61     #define REG_NOPAR2 5
62     #define REG_NOPAR3 3
63    
64     #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
65     #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66     #define REG_PC_TMP 0
67     #else
68     #define REG_PC_TMP 1 /* Another register that is not the above */
69     #endif
70    
71     #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
72     -1 if any reg will do */
73     #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
74     #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
75    
76     uae_s8 always_used[]={4,-1};
77     uae_s8 can_byte[]={0,1,2,3,-1};
78     uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
79    
80     /* cpuopti mutate instruction handlers to assume registers are saved
81     by the caller */
82     uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
83    
84     /* This *should* be the same as call_saved. But:
85     - We might not really know which registers are saved, and which aren't,
86     so we need to preserve some, but don't want to rely on everyone else
87     also saving those registers
88     - Special registers (such like the stack pointer) should not be "preserved"
89     by pushing, even though they are "saved" across function calls
90     */
91     uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
92    
93     /* Whether classes of instructions do or don't clobber the native flags */
94     #define CLOBBER_MOV
95     #define CLOBBER_LEA
96     #define CLOBBER_CMOV
97     #define CLOBBER_POP
98     #define CLOBBER_PUSH
99     #define CLOBBER_SUB clobber_flags()
100     #define CLOBBER_SBB clobber_flags()
101     #define CLOBBER_CMP clobber_flags()
102     #define CLOBBER_ADD clobber_flags()
103     #define CLOBBER_ADC clobber_flags()
104     #define CLOBBER_AND clobber_flags()
105     #define CLOBBER_OR clobber_flags()
106     #define CLOBBER_XOR clobber_flags()
107    
108     #define CLOBBER_ROL clobber_flags()
109     #define CLOBBER_ROR clobber_flags()
110     #define CLOBBER_SHLL clobber_flags()
111     #define CLOBBER_SHRL clobber_flags()
112     #define CLOBBER_SHRA clobber_flags()
113     #define CLOBBER_TEST clobber_flags()
114     #define CLOBBER_CL16
115     #define CLOBBER_CL8
116     #define CLOBBER_SE16
117     #define CLOBBER_SE8
118     #define CLOBBER_ZE16
119     #define CLOBBER_ZE8
120     #define CLOBBER_SW16 clobber_flags()
121     #define CLOBBER_SW32
122     #define CLOBBER_SETCC
123     #define CLOBBER_MUL clobber_flags()
124     #define CLOBBER_BT clobber_flags()
125     #define CLOBBER_BSF clobber_flags()
126    
127 gbeauche 1.2 const bool optimize_accum = true;
128 gbeauche 1.1 const bool optimize_imm8 = true;
129     const bool optimize_shift_once = true;
130    
131     /*************************************************************************
132     * Actual encoding of the instructions on the target CPU *
133     *************************************************************************/
134    
135 gbeauche 1.2 static __inline__ int isaccum(int r)
136     {
137     return (r == EAX_INDEX);
138     }
139    
140 gbeauche 1.1 static __inline__ int isbyte(uae_s32 x)
141     {
142     return (x>=-128 && x<=127);
143     }
144    
145     static __inline__ int isword(uae_s32 x)
146     {
147     return (x>=-32768 && x<=32767);
148     }
149    
150     LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
151     {
152     emit_byte(0x50+r);
153     }
154     LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
155    
156     LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
157     {
158     emit_byte(0x58+r);
159     }
160     LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
161    
162     LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
163     {
164     emit_byte(0x0f);
165     emit_byte(0xba);
166     emit_byte(0xe0+r);
167     emit_byte(i);
168     }
169     LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
170    
171     LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
172     {
173     emit_byte(0x0f);
174     emit_byte(0xa3);
175     emit_byte(0xc0+8*b+r);
176     }
177     LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
178    
179     LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
180     {
181     emit_byte(0x0f);
182     emit_byte(0xba);
183     emit_byte(0xf8+r);
184     emit_byte(i);
185     }
186     LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
187    
188     LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
189     {
190     emit_byte(0x0f);
191     emit_byte(0xbb);
192     emit_byte(0xc0+8*b+r);
193     }
194     LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
195    
196    
197     LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
198     {
199     emit_byte(0x0f);
200     emit_byte(0xba);
201     emit_byte(0xf0+r);
202     emit_byte(i);
203     }
204     LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
205    
206     LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
207     {
208     emit_byte(0x0f);
209     emit_byte(0xb3);
210     emit_byte(0xc0+8*b+r);
211     }
212     LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
213    
214     LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
215     {
216     emit_byte(0x0f);
217     emit_byte(0xba);
218     emit_byte(0xe8+r);
219     emit_byte(i);
220     }
221     LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
222    
223     LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
224     {
225     emit_byte(0x0f);
226     emit_byte(0xab);
227     emit_byte(0xc0+8*b+r);
228     }
229     LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
230    
231     LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
232     {
233     emit_byte(0x66);
234     if (isbyte(i)) {
235     emit_byte(0x83);
236     emit_byte(0xe8+d);
237     emit_byte(i);
238     }
239     else {
240 gbeauche 1.2 if (optimize_accum && isaccum(d))
241     emit_byte(0x2d);
242     else {
243 gbeauche 1.1 emit_byte(0x81);
244     emit_byte(0xe8+d);
245 gbeauche 1.2 }
246 gbeauche 1.1 emit_word(i);
247     }
248     }
249     LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
250    
251    
252     LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
253     {
254     emit_byte(0x8b);
255     emit_byte(0x05+8*d);
256     emit_long(s);
257     }
258     LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
259    
260     LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
261     {
262     emit_byte(0xc7);
263     emit_byte(0x05);
264     emit_long(d);
265     emit_long(s);
266     }
267     LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
268    
269     LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
270     {
271     emit_byte(0x66);
272     emit_byte(0xc7);
273     emit_byte(0x05);
274     emit_long(d);
275     emit_word(s);
276     }
277     LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
278    
279     LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
280     {
281     emit_byte(0xc6);
282     emit_byte(0x05);
283     emit_long(d);
284     emit_byte(s);
285     }
286     LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
287    
288     LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
289     {
290     if (optimize_shift_once && (i == 1)) {
291     emit_byte(0xd0);
292     emit_byte(0x05);
293     emit_long(d);
294     }
295     else {
296     emit_byte(0xc0);
297     emit_byte(0x05);
298     emit_long(d);
299     emit_byte(i);
300     }
301     }
302     LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
303    
304     LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
305     {
306     if (optimize_shift_once && (i == 1)) {
307     emit_byte(0xd0);
308     emit_byte(0xc0+r);
309     }
310     else {
311     emit_byte(0xc0);
312     emit_byte(0xc0+r);
313     emit_byte(i);
314     }
315     }
316     LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
317    
318     LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
319     {
320     emit_byte(0x66);
321     emit_byte(0xc1);
322     emit_byte(0xc0+r);
323     emit_byte(i);
324     }
325     LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
326    
327     LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
328     {
329     if (optimize_shift_once && (i == 1)) {
330     emit_byte(0xd1);
331     emit_byte(0xc0+r);
332     }
333     else {
334     emit_byte(0xc1);
335     emit_byte(0xc0+r);
336     emit_byte(i);
337     }
338     }
339     LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
340    
341     LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
342     {
343     emit_byte(0xd3);
344     emit_byte(0xc0+d);
345     }
346     LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
347    
348     LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
349     {
350     emit_byte(0x66);
351     emit_byte(0xd3);
352     emit_byte(0xc0+d);
353     }
354     LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
355    
356     LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
357     {
358     emit_byte(0xd2);
359     emit_byte(0xc0+d);
360     }
361     LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
362    
363     LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
364     {
365     emit_byte(0xd3);
366     emit_byte(0xe0+d);
367     }
368     LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
369    
370     LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
371     {
372     emit_byte(0x66);
373     emit_byte(0xd3);
374     emit_byte(0xe0+d);
375     }
376     LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
377    
378     LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
379     {
380     emit_byte(0xd2);
381     emit_byte(0xe0+d);
382     }
383     LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
384    
385     LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
386     {
387     if (optimize_shift_once && (i == 1)) {
388     emit_byte(0xd0);
389     emit_byte(0xc8+r);
390     }
391     else {
392     emit_byte(0xc0);
393     emit_byte(0xc8+r);
394     emit_byte(i);
395     }
396     }
397     LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
398    
399     LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
400     {
401     emit_byte(0x66);
402     emit_byte(0xc1);
403     emit_byte(0xc8+r);
404     emit_byte(i);
405     }
406     LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
407    
408     // gb-- used for making an fpcr value in compemu_fpp.cpp
409     LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
410     {
411     emit_byte(0x0b);
412     emit_byte(0x05+8*d);
413     emit_long(s);
414     }
415     LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
416    
417     LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
418     {
419     if (optimize_shift_once && (i == 1)) {
420     emit_byte(0xd1);
421     emit_byte(0xc8+r);
422     }
423     else {
424     emit_byte(0xc1);
425     emit_byte(0xc8+r);
426     emit_byte(i);
427     }
428     }
429     LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
430    
431     LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
432     {
433     emit_byte(0xd3);
434     emit_byte(0xc8+d);
435     }
436     LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
437    
438     LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
439     {
440     emit_byte(0x66);
441     emit_byte(0xd3);
442     emit_byte(0xc8+d);
443     }
444     LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
445    
446     LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
447     {
448     emit_byte(0xd2);
449     emit_byte(0xc8+d);
450     }
451     LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
452    
453     LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
454     {
455     emit_byte(0xd3);
456     emit_byte(0xe8+d);
457     }
458     LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
459    
460     LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
461     {
462     emit_byte(0x66);
463     emit_byte(0xd3);
464     emit_byte(0xe8+d);
465     }
466     LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
467    
468     LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
469     {
470     emit_byte(0xd2);
471     emit_byte(0xe8+d);
472     }
473     LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
474    
475     LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
476     {
477     emit_byte(0xd3);
478     emit_byte(0xf8+d);
479     }
480     LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
481    
482     LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
483     {
484     emit_byte(0x66);
485     emit_byte(0xd3);
486     emit_byte(0xf8+d);
487     }
488     LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
489    
490     LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
491     {
492     emit_byte(0xd2);
493     emit_byte(0xf8+d);
494     }
495     LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
496    
497     LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
498     {
499     if (optimize_shift_once && (i == 1)) {
500     emit_byte(0xd1);
501     emit_byte(0xe0+r);
502     }
503     else {
504     emit_byte(0xc1);
505     emit_byte(0xe0+r);
506     emit_byte(i);
507     }
508     }
509     LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
510    
511     LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
512     {
513     emit_byte(0x66);
514     emit_byte(0xc1);
515     emit_byte(0xe0+r);
516     emit_byte(i);
517     }
518     LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
519    
520     LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
521     {
522     if (optimize_shift_once && (i == 1)) {
523     emit_byte(0xd0);
524     emit_byte(0xe0+r);
525     }
526     else {
527     emit_byte(0xc0);
528     emit_byte(0xe0+r);
529     emit_byte(i);
530     }
531     }
532     LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
533    
534     LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
535     {
536     if (optimize_shift_once && (i == 1)) {
537     emit_byte(0xd1);
538     emit_byte(0xe8+r);
539     }
540     else {
541     emit_byte(0xc1);
542     emit_byte(0xe8+r);
543     emit_byte(i);
544     }
545     }
546     LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
547    
548     LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
549     {
550     emit_byte(0x66);
551     emit_byte(0xc1);
552     emit_byte(0xe8+r);
553     emit_byte(i);
554     }
555     LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
556    
557     LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
558     {
559     if (optimize_shift_once && (i == 1)) {
560     emit_byte(0xd0);
561     emit_byte(0xe8+r);
562     }
563     else {
564     emit_byte(0xc0);
565     emit_byte(0xe8+r);
566     emit_byte(i);
567     }
568     }
569     LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
570    
571     LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
572     {
573     if (optimize_shift_once && (i == 1)) {
574     emit_byte(0xd1);
575     emit_byte(0xf8+r);
576     }
577     else {
578     emit_byte(0xc1);
579     emit_byte(0xf8+r);
580     emit_byte(i);
581     }
582     }
583     LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
584    
585     LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
586     {
587     emit_byte(0x66);
588     emit_byte(0xc1);
589     emit_byte(0xf8+r);
590     emit_byte(i);
591     }
592     LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
593    
594     LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
595     {
596     if (optimize_shift_once && (i == 1)) {
597     emit_byte(0xd0);
598     emit_byte(0xf8+r);
599     }
600     else {
601     emit_byte(0xc0);
602     emit_byte(0xf8+r);
603     emit_byte(i);
604     }
605     }
606     LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
607    
608     LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
609     {
610     emit_byte(0x9e);
611     }
612     LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
613    
614     LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
615     {
616     emit_byte(0x0f);
617     emit_byte(0xa2);
618     }
619     LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
620    
621     LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
622     {
623     emit_byte(0x9f);
624     }
625     LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
626    
627     LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
628     {
629     emit_byte(0x0f);
630     emit_byte(0x90+cc);
631     emit_byte(0xc0+d);
632     }
633     LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
634    
635     LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
636     {
637     emit_byte(0x0f);
638     emit_byte(0x90+cc);
639     emit_byte(0x05);
640     emit_long(d);
641     }
642     LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
643    
644     LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
645     {
646     if (have_cmov) {
647     emit_byte(0x0f);
648     emit_byte(0x40+cc);
649     emit_byte(0xc0+8*d+s);
650     }
651     else { /* replacement using branch and mov */
652     int uncc=(cc^1);
653     emit_byte(0x70+uncc);
654     emit_byte(2); /* skip next 2 bytes if not cc=true */
655     emit_byte(0x89);
656     emit_byte(0xc0+8*s+d);
657     }
658     }
659     LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
660    
661     LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
662     {
663     emit_byte(0x0f);
664     emit_byte(0xbc);
665     emit_byte(0xc0+8*d+s);
666     }
667     LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
668    
669     LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
670     {
671     emit_byte(0x0f);
672     emit_byte(0xbf);
673     emit_byte(0xc0+8*d+s);
674     }
675     LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
676    
677     LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
678     {
679     emit_byte(0x0f);
680     emit_byte(0xbe);
681     emit_byte(0xc0+8*d+s);
682     }
683     LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
684    
685     LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
686     {
687     emit_byte(0x0f);
688     emit_byte(0xb7);
689     emit_byte(0xc0+8*d+s);
690     }
691     LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
692    
693     LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
694     {
695     emit_byte(0x0f);
696     emit_byte(0xb6);
697     emit_byte(0xc0+8*d+s);
698     }
699     LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
700    
701     LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
702     {
703     emit_byte(0x0f);
704     emit_byte(0xaf);
705     emit_byte(0xc0+8*d+s);
706     }
707     LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
708    
709     LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
710     {
711     if (d!=MUL_NREG1 || s!=MUL_NREG2)
712     abort();
713     emit_byte(0xf7);
714     emit_byte(0xea);
715     }
716     LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
717    
718     LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
719     {
720     if (d!=MUL_NREG1 || s!=MUL_NREG2) {
721     printf("Bad register in MUL: d=%d, s=%d\n",d,s);
722     abort();
723     }
724     emit_byte(0xf7);
725     emit_byte(0xe2);
726     }
727     LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
728    
729     LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
730     {
731     abort(); /* %^$&%^$%#^ x86! */
732     emit_byte(0x0f);
733     emit_byte(0xaf);
734     emit_byte(0xc0+8*d+s);
735     }
736     LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
737    
738     LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
739     {
740     emit_byte(0x88);
741     emit_byte(0xc0+8*s+d);
742     }
743     LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
744    
745     LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
746     {
747     emit_byte(0x66);
748     emit_byte(0x89);
749     emit_byte(0xc0+8*s+d);
750     }
751     LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
752    
753     LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
754     {
755     int isebp=(baser==5)?0x40:0;
756     int fi;
757    
758     switch(factor) {
759     case 1: fi=0; break;
760     case 2: fi=1; break;
761     case 4: fi=2; break;
762     case 8: fi=3; break;
763     default: abort();
764     }
765    
766    
767     emit_byte(0x8b);
768     emit_byte(0x04+8*d+isebp);
769     emit_byte(baser+8*index+0x40*fi);
770     if (isebp)
771     emit_byte(0x00);
772     }
773     LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
774    
775     LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
776     {
777     int fi;
778     int isebp;
779    
780     switch(factor) {
781     case 1: fi=0; break;
782     case 2: fi=1; break;
783     case 4: fi=2; break;
784     case 8: fi=3; break;
785     default: abort();
786     }
787     isebp=(baser==5)?0x40:0;
788    
789     emit_byte(0x66);
790     emit_byte(0x8b);
791     emit_byte(0x04+8*d+isebp);
792     emit_byte(baser+8*index+0x40*fi);
793     if (isebp)
794     emit_byte(0x00);
795     }
796     LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
797    
798     LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
799     {
800     int fi;
801     int isebp;
802    
803     switch(factor) {
804     case 1: fi=0; break;
805     case 2: fi=1; break;
806     case 4: fi=2; break;
807     case 8: fi=3; break;
808     default: abort();
809     }
810     isebp=(baser==5)?0x40:0;
811    
812     emit_byte(0x8a);
813     emit_byte(0x04+8*d+isebp);
814     emit_byte(baser+8*index+0x40*fi);
815     if (isebp)
816     emit_byte(0x00);
817     }
818     LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
819    
820     LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
821     {
822     int fi;
823     int isebp;
824    
825     switch(factor) {
826     case 1: fi=0; break;
827     case 2: fi=1; break;
828     case 4: fi=2; break;
829     case 8: fi=3; break;
830     default: abort();
831     }
832    
833    
834     isebp=(baser==5)?0x40:0;
835    
836     emit_byte(0x89);
837     emit_byte(0x04+8*s+isebp);
838     emit_byte(baser+8*index+0x40*fi);
839     if (isebp)
840     emit_byte(0x00);
841     }
842     LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
843    
844     LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
845     {
846     int fi;
847     int isebp;
848    
849     switch(factor) {
850     case 1: fi=0; break;
851     case 2: fi=1; break;
852     case 4: fi=2; break;
853     case 8: fi=3; break;
854     default: abort();
855     }
856     isebp=(baser==5)?0x40:0;
857    
858     emit_byte(0x66);
859     emit_byte(0x89);
860     emit_byte(0x04+8*s+isebp);
861     emit_byte(baser+8*index+0x40*fi);
862     if (isebp)
863     emit_byte(0x00);
864     }
865     LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
866    
867     LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
868     {
869     int fi;
870     int isebp;
871    
872     switch(factor) {
873     case 1: fi=0; break;
874     case 2: fi=1; break;
875     case 4: fi=2; break;
876     case 8: fi=3; break;
877     default: abort();
878     }
879     isebp=(baser==5)?0x40:0;
880    
881     emit_byte(0x88);
882     emit_byte(0x04+8*s+isebp);
883     emit_byte(baser+8*index+0x40*fi);
884     if (isebp)
885     emit_byte(0x00);
886     }
887     LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
888    
889     LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
890     {
891     int fi;
892    
893     switch(factor) {
894     case 1: fi=0; break;
895     case 2: fi=1; break;
896     case 4: fi=2; break;
897     case 8: fi=3; break;
898     default: abort();
899     }
900    
901     emit_byte(0x89);
902     emit_byte(0x84+8*s);
903     emit_byte(baser+8*index+0x40*fi);
904     emit_long(base);
905     }
906     LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
907    
908     LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
909     {
910     int fi;
911    
912     switch(factor) {
913     case 1: fi=0; break;
914     case 2: fi=1; break;
915     case 4: fi=2; break;
916     case 8: fi=3; break;
917     default: abort();
918     }
919    
920     emit_byte(0x66);
921     emit_byte(0x89);
922     emit_byte(0x84+8*s);
923     emit_byte(baser+8*index+0x40*fi);
924     emit_long(base);
925     }
926     LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
927    
928     LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
929     {
930     int fi;
931    
932     switch(factor) {
933     case 1: fi=0; break;
934     case 2: fi=1; break;
935     case 4: fi=2; break;
936     case 8: fi=3; break;
937     default: abort();
938     }
939    
940     emit_byte(0x88);
941     emit_byte(0x84+8*s);
942     emit_byte(baser+8*index+0x40*fi);
943     emit_long(base);
944     }
945     LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
946    
947     LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
948     {
949     int fi;
950    
951     switch(factor) {
952     case 1: fi=0; break;
953     case 2: fi=1; break;
954     case 4: fi=2; break;
955     case 8: fi=3; break;
956     default: abort();
957     }
958    
959     emit_byte(0x8b);
960     emit_byte(0x84+8*d);
961     emit_byte(baser+8*index+0x40*fi);
962     emit_long(base);
963     }
964     LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
965    
966     LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
967     {
968     int fi;
969    
970     switch(factor) {
971     case 1: fi=0; break;
972     case 2: fi=1; break;
973     case 4: fi=2; break;
974     case 8: fi=3; break;
975     default: abort();
976     }
977    
978     emit_byte(0x66);
979     emit_byte(0x8b);
980     emit_byte(0x84+8*d);
981     emit_byte(baser+8*index+0x40*fi);
982     emit_long(base);
983     }
984     LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
985    
986     LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
987     {
988     int fi;
989    
990     switch(factor) {
991     case 1: fi=0; break;
992     case 2: fi=1; break;
993     case 4: fi=2; break;
994     case 8: fi=3; break;
995     default: abort();
996     }
997    
998     emit_byte(0x8a);
999     emit_byte(0x84+8*d);
1000     emit_byte(baser+8*index+0x40*fi);
1001     emit_long(base);
1002     }
1003     LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
1004    
1005     LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1006     {
1007     int fi;
1008     switch(factor) {
1009     case 1: fi=0; break;
1010     case 2: fi=1; break;
1011     case 4: fi=2; break;
1012     case 8: fi=3; break;
1013     default:
1014     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1015     abort();
1016     }
1017     emit_byte(0x8b);
1018     emit_byte(0x04+8*d);
1019     emit_byte(0x05+8*index+64*fi);
1020     emit_long(base);
1021     }
1022     LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1023    
1024     LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1025     {
1026     int fi;
1027     switch(factor) {
1028     case 1: fi=0; break;
1029     case 2: fi=1; break;
1030     case 4: fi=2; break;
1031     case 8: fi=3; break;
1032     default:
1033     fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1034     abort();
1035     }
1036     if (have_cmov) {
1037     emit_byte(0x0f);
1038     emit_byte(0x40+cond);
1039     emit_byte(0x04+8*d);
1040     emit_byte(0x05+8*index+64*fi);
1041     emit_long(base);
1042     }
1043     else { /* replacement using branch and mov */
1044     int uncc=(cond^1);
1045     emit_byte(0x70+uncc);
1046     emit_byte(7); /* skip next 7 bytes if not cc=true */
1047     emit_byte(0x8b);
1048     emit_byte(0x04+8*d);
1049     emit_byte(0x05+8*index+64*fi);
1050     emit_long(base);
1051     }
1052     }
1053     LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1054    
1055     LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1056     {
1057     if (have_cmov) {
1058     emit_byte(0x0f);
1059     emit_byte(0x40+cond);
1060     emit_byte(0x05+8*d);
1061     emit_long(mem);
1062     }
1063     else { /* replacement using branch and mov */
1064     int uncc=(cond^1);
1065     emit_byte(0x70+uncc);
1066     emit_byte(6); /* skip next 6 bytes if not cc=true */
1067     emit_byte(0x8b);
1068     emit_byte(0x05+8*d);
1069     emit_long(mem);
1070     }
1071     }
1072     LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1073    
1074     LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1075     {
1076     emit_byte(0x8b);
1077     emit_byte(0x40+8*d+s);
1078     emit_byte(offset);
1079     }
1080     LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1081    
1082     LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1083     {
1084     emit_byte(0x66);
1085     emit_byte(0x8b);
1086     emit_byte(0x40+8*d+s);
1087     emit_byte(offset);
1088     }
1089     LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1090    
1091     LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1092     {
1093     emit_byte(0x8a);
1094     emit_byte(0x40+8*d+s);
1095     emit_byte(offset);
1096     }
1097     LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1098    
1099     LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1100     {
1101     emit_byte(0x8b);
1102     emit_byte(0x80+8*d+s);
1103     emit_long(offset);
1104     }
1105     LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1106    
1107     LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1108     {
1109     emit_byte(0x66);
1110     emit_byte(0x8b);
1111     emit_byte(0x80+8*d+s);
1112     emit_long(offset);
1113     }
1114     LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1115    
1116     LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1117     {
1118     emit_byte(0x8a);
1119     emit_byte(0x80+8*d+s);
1120     emit_long(offset);
1121     }
1122     LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1123    
1124     LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1125     {
1126     emit_byte(0xc7);
1127     emit_byte(0x40+d);
1128     emit_byte(offset);
1129     emit_long(i);
1130     }
1131     LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1132    
1133     LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1134     {
1135     emit_byte(0x66);
1136     emit_byte(0xc7);
1137     emit_byte(0x40+d);
1138     emit_byte(offset);
1139     emit_word(i);
1140     }
1141     LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1142    
1143     LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1144     {
1145     emit_byte(0xc6);
1146     emit_byte(0x40+d);
1147     emit_byte(offset);
1148     emit_byte(i);
1149     }
1150     LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1151    
1152     LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1153     {
1154     emit_byte(0x89);
1155     emit_byte(0x40+8*s+d);
1156     emit_byte(offset);
1157     }
1158     LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1159    
1160     LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1161     {
1162     emit_byte(0x66);
1163     emit_byte(0x89);
1164     emit_byte(0x40+8*s+d);
1165     emit_byte(offset);
1166     }
1167     LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1168    
1169     LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1170     {
1171     emit_byte(0x88);
1172     emit_byte(0x40+8*s+d);
1173     emit_byte(offset);
1174     }
1175     LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1176    
1177     LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1178     {
1179     if (optimize_imm8 && isbyte(offset)) {
1180     emit_byte(0x8d);
1181     emit_byte(0x40+8*d+s);
1182     emit_byte(offset);
1183     }
1184     else {
1185     emit_byte(0x8d);
1186     emit_byte(0x80+8*d+s);
1187     emit_long(offset);
1188     }
1189     }
1190     LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1191    
1192     LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1193     {
1194     int fi;
1195    
1196     switch(factor) {
1197     case 1: fi=0; break;
1198     case 2: fi=1; break;
1199     case 4: fi=2; break;
1200     case 8: fi=3; break;
1201     default: abort();
1202     }
1203    
1204     if (optimize_imm8 && isbyte(offset)) {
1205     emit_byte(0x8d);
1206     emit_byte(0x44+8*d);
1207     emit_byte(0x40*fi+8*index+s);
1208     emit_byte(offset);
1209     }
1210     else {
1211     emit_byte(0x8d);
1212     emit_byte(0x84+8*d);
1213     emit_byte(0x40*fi+8*index+s);
1214     emit_long(offset);
1215     }
1216     }
1217     LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1218    
1219     LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1220     {
1221     int isebp=(s==5)?0x40:0;
1222     int fi;
1223    
1224     switch(factor) {
1225     case 1: fi=0; break;
1226     case 2: fi=1; break;
1227     case 4: fi=2; break;
1228     case 8: fi=3; break;
1229     default: abort();
1230     }
1231    
1232     emit_byte(0x8d);
1233     emit_byte(0x04+8*d+isebp);
1234     emit_byte(0x40*fi+8*index+s);
1235     if (isebp)
1236     emit_byte(0);
1237     }
1238     LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1239    
1240     LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1241     {
1242     if (optimize_imm8 && isbyte(offset)) {
1243     emit_byte(0x89);
1244     emit_byte(0x40+8*s+d);
1245     emit_byte(offset);
1246     }
1247     else {
1248     emit_byte(0x89);
1249     emit_byte(0x80+8*s+d);
1250     emit_long(offset);
1251     }
1252     }
1253     LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1254    
1255     LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1256     {
1257     emit_byte(0x66);
1258     emit_byte(0x89);
1259     emit_byte(0x80+8*s+d);
1260     emit_long(offset);
1261     }
1262     LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1263    
1264     LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1265     {
1266     if (optimize_imm8 && isbyte(offset)) {
1267     emit_byte(0x88);
1268     emit_byte(0x40+8*s+d);
1269     emit_byte(offset);
1270     }
1271     else {
1272     emit_byte(0x88);
1273     emit_byte(0x80+8*s+d);
1274     emit_long(offset);
1275     }
1276     }
1277     LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1278    
1279     LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1280     {
1281     emit_byte(0x0f);
1282     emit_byte(0xc8+r);
1283     }
1284     LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1285    
1286     LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1287     {
1288     emit_byte(0x66);
1289     emit_byte(0xc1);
1290     emit_byte(0xc0+r);
1291     emit_byte(0x08);
1292     }
1293     LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1294    
1295     LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1296     {
1297     emit_byte(0x89);
1298     emit_byte(0xc0+8*s+d);
1299     }
1300     LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1301    
1302     LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1303     {
1304     emit_byte(0x89);
1305     emit_byte(0x05+8*s);
1306     emit_long(d);
1307     }
1308     LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1309    
1310     LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1311     {
1312     emit_byte(0x66);
1313     emit_byte(0x89);
1314     emit_byte(0x05+8*s);
1315     emit_long(d);
1316     }
1317     LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1318    
1319     LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1320     {
1321     emit_byte(0x66);
1322     emit_byte(0x8b);
1323     emit_byte(0x05+8*d);
1324     emit_long(s);
1325     }
1326     LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1327    
1328     LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1329     {
1330     emit_byte(0x88);
1331     emit_byte(0x05+8*s);
1332     emit_long(d);
1333     }
1334     LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1335    
1336     LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1337     {
1338     emit_byte(0x8a);
1339     emit_byte(0x05+8*d);
1340     emit_long(s);
1341     }
1342     LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1343    
1344     LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1345     {
1346     emit_byte(0xb8+d);
1347     emit_long(s);
1348     }
1349     LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1350    
1351     LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1352     {
1353     emit_byte(0x66);
1354     emit_byte(0xb8+d);
1355     emit_word(s);
1356     }
1357     LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1358    
1359     LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1360     {
1361     emit_byte(0xb0+d);
1362     emit_byte(s);
1363     }
1364     LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1365    
1366     LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1367     {
1368     emit_byte(0x81);
1369     emit_byte(0x15);
1370     emit_long(d);
1371     emit_long(s);
1372     }
1373     LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1374    
1375     LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1376     {
1377     if (optimize_imm8 && isbyte(s)) {
1378     emit_byte(0x83);
1379     emit_byte(0x05);
1380     emit_long(d);
1381     emit_byte(s);
1382     }
1383     else {
1384     emit_byte(0x81);
1385     emit_byte(0x05);
1386     emit_long(d);
1387     emit_long(s);
1388     }
1389     }
1390     LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1391    
1392     LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1393     {
1394     emit_byte(0x66);
1395     emit_byte(0x81);
1396     emit_byte(0x05);
1397     emit_long(d);
1398     emit_word(s);
1399     }
1400     LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1401    
1402     LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1403     {
1404     emit_byte(0x80);
1405     emit_byte(0x05);
1406     emit_long(d);
1407     emit_byte(s);
1408     }
1409     LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1410    
1411     LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1412     {
1413 gbeauche 1.2 if (optimize_accum && isaccum(d))
1414     emit_byte(0xa9);
1415     else {
1416 gbeauche 1.1 emit_byte(0xf7);
1417     emit_byte(0xc0+d);
1418 gbeauche 1.2 }
1419 gbeauche 1.1 emit_long(i);
1420     }
1421     LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1422    
1423     LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1424     {
1425     emit_byte(0x85);
1426     emit_byte(0xc0+8*s+d);
1427     }
1428     LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1429    
1430     LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1431     {
1432     emit_byte(0x66);
1433     emit_byte(0x85);
1434     emit_byte(0xc0+8*s+d);
1435     }
1436     LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1437    
1438     LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1439     {
1440     emit_byte(0x84);
1441     emit_byte(0xc0+8*s+d);
1442     }
1443     LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1444    
1445     LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1446     {
1447     if (optimize_imm8 && isbyte(i)) {
1448 gbeauche 1.2 emit_byte(0x83);
1449     emit_byte(0xe0+d);
1450     emit_byte(i);
1451 gbeauche 1.1 }
1452     else {
1453 gbeauche 1.2 if (optimize_accum && isaccum(d))
1454     emit_byte(0x25);
1455     else {
1456     emit_byte(0x81);
1457     emit_byte(0xe0+d);
1458     }
1459     emit_long(i);
1460 gbeauche 1.1 }
1461     }
1462     LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1463    
1464     LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1465     {
1466 gbeauche 1.2 emit_byte(0x66);
1467     if (optimize_imm8 && isbyte(i)) {
1468     emit_byte(0x83);
1469     emit_byte(0xe0+d);
1470     emit_byte(i);
1471     }
1472     else {
1473     if (optimize_accum && isaccum(d))
1474     emit_byte(0x25);
1475     else {
1476     emit_byte(0x81);
1477     emit_byte(0xe0+d);
1478     }
1479     emit_word(i);
1480     }
1481 gbeauche 1.1 }
1482     LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1483    
1484     LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1485     {
1486     emit_byte(0x21);
1487     emit_byte(0xc0+8*s+d);
1488     }
1489     LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1490    
1491     LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1492     {
1493     emit_byte(0x66);
1494     emit_byte(0x21);
1495     emit_byte(0xc0+8*s+d);
1496     }
1497     LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1498    
1499     LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1500     {
1501     emit_byte(0x20);
1502     emit_byte(0xc0+8*s+d);
1503     }
1504     LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1505    
1506     LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1507     {
1508     if (optimize_imm8 && isbyte(i)) {
1509     emit_byte(0x83);
1510     emit_byte(0xc8+d);
1511     emit_byte(i);
1512     }
1513     else {
1514 gbeauche 1.2 if (optimize_accum && isaccum(d))
1515     emit_byte(0x0d);
1516     else {
1517 gbeauche 1.1 emit_byte(0x81);
1518     emit_byte(0xc8+d);
1519 gbeauche 1.2 }
1520 gbeauche 1.1 emit_long(i);
1521     }
1522     }
1523     LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1524    
1525     LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1526     {
1527     emit_byte(0x09);
1528     emit_byte(0xc0+8*s+d);
1529     }
1530     LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1531    
1532     LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1533     {
1534     emit_byte(0x66);
1535     emit_byte(0x09);
1536     emit_byte(0xc0+8*s+d);
1537     }
1538     LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1539    
1540     LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1541     {
1542     emit_byte(0x08);
1543     emit_byte(0xc0+8*s+d);
1544     }
1545     LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1546    
1547     LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1548     {
1549     emit_byte(0x11);
1550     emit_byte(0xc0+8*s+d);
1551     }
1552     LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1553    
1554     LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1555     {
1556     emit_byte(0x66);
1557     emit_byte(0x11);
1558     emit_byte(0xc0+8*s+d);
1559     }
1560     LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1561    
1562     LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1563     {
1564     emit_byte(0x10);
1565     emit_byte(0xc0+8*s+d);
1566     }
1567     LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1568    
1569     LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1570     {
1571     emit_byte(0x01);
1572     emit_byte(0xc0+8*s+d);
1573     }
1574     LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1575    
1576     LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1577     {
1578     emit_byte(0x66);
1579     emit_byte(0x01);
1580     emit_byte(0xc0+8*s+d);
1581     }
1582     LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1583    
1584     LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1585     {
1586     emit_byte(0x00);
1587     emit_byte(0xc0+8*s+d);
1588     }
1589     LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1590    
1591     LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1592     {
1593     if (isbyte(i)) {
1594     emit_byte(0x83);
1595     emit_byte(0xe8+d);
1596     emit_byte(i);
1597     }
1598     else {
1599 gbeauche 1.2 if (optimize_accum && isaccum(d))
1600     emit_byte(0x2d);
1601     else {
1602 gbeauche 1.1 emit_byte(0x81);
1603     emit_byte(0xe8+d);
1604 gbeauche 1.2 }
1605 gbeauche 1.1 emit_long(i);
1606     }
1607     }
1608     LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1609    
1610     LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1611     {
1612 gbeauche 1.2 if (optimize_accum && isaccum(d))
1613     emit_byte(0x2c);
1614     else {
1615 gbeauche 1.1 emit_byte(0x80);
1616     emit_byte(0xe8+d);
1617 gbeauche 1.2 }
1618 gbeauche 1.1 emit_byte(i);
1619     }
1620     LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1621    
1622     LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1623     {
1624     if (isbyte(i)) {
1625     emit_byte(0x83);
1626     emit_byte(0xc0+d);
1627     emit_byte(i);
1628     }
1629     else {
1630 gbeauche 1.2 if (optimize_accum && isaccum(d))
1631     emit_byte(0x05);
1632     else {
1633 gbeauche 1.1 emit_byte(0x81);
1634     emit_byte(0xc0+d);
1635 gbeauche 1.2 }
1636 gbeauche 1.1 emit_long(i);
1637     }
1638     }
1639     LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1640    
1641     LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1642     {
1643 gbeauche 1.2 emit_byte(0x66);
1644 gbeauche 1.1 if (isbyte(i)) {
1645     emit_byte(0x83);
1646     emit_byte(0xc0+d);
1647     emit_byte(i);
1648     }
1649     else {
1650 gbeauche 1.2 if (optimize_accum && isaccum(d))
1651     emit_byte(0x05);
1652     else {
1653 gbeauche 1.1 emit_byte(0x81);
1654     emit_byte(0xc0+d);
1655 gbeauche 1.2 }
1656 gbeauche 1.1 emit_word(i);
1657     }
1658     }
1659     LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1660    
1661     LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1662     {
1663 gbeauche 1.2 if (optimize_accum && isaccum(d))
1664     emit_byte(0x04);
1665     else {
1666     emit_byte(0x80);
1667     emit_byte(0xc0+d);
1668     }
1669 gbeauche 1.1 emit_byte(i);
1670     }
1671     LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1672    
1673     LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1674     {
1675     emit_byte(0x19);
1676     emit_byte(0xc0+8*s+d);
1677     }
1678     LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1679    
1680     LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1681     {
1682     emit_byte(0x66);
1683     emit_byte(0x19);
1684     emit_byte(0xc0+8*s+d);
1685     }
1686     LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1687    
1688     LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1689     {
1690     emit_byte(0x18);
1691     emit_byte(0xc0+8*s+d);
1692     }
1693     LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1694    
1695     LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1696     {
1697     emit_byte(0x29);
1698     emit_byte(0xc0+8*s+d);
1699     }
1700     LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1701    
1702     LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1703     {
1704     emit_byte(0x66);
1705     emit_byte(0x29);
1706     emit_byte(0xc0+8*s+d);
1707     }
1708     LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1709    
1710     LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1711     {
1712     emit_byte(0x28);
1713     emit_byte(0xc0+8*s+d);
1714     }
1715     LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1716    
1717     LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1718     {
1719     emit_byte(0x39);
1720     emit_byte(0xc0+8*s+d);
1721     }
1722     LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1723    
1724     LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1725     {
1726     if (optimize_imm8 && isbyte(i)) {
1727     emit_byte(0x83);
1728     emit_byte(0xf8+r);
1729     emit_byte(i);
1730     }
1731     else {
1732 gbeauche 1.2 if (optimize_accum && isaccum(r))
1733     emit_byte(0x3d);
1734     else {
1735 gbeauche 1.1 emit_byte(0x81);
1736     emit_byte(0xf8+r);
1737 gbeauche 1.2 }
1738 gbeauche 1.1 emit_long(i);
1739     }
1740     }
1741     LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1742    
1743     LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1744     {
1745     emit_byte(0x66);
1746     emit_byte(0x39);
1747     emit_byte(0xc0+8*s+d);
1748     }
1749     LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1750    
1751 gbeauche 1.5 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1752     {
1753     emit_byte(0x80);
1754     emit_byte(0x3d);
1755     emit_long(d);
1756     emit_byte(s);
1757     }
1758     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1759    
1760 gbeauche 1.1 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1761     {
1762 gbeauche 1.2 if (optimize_accum && isaccum(d))
1763     emit_byte(0x3c);
1764     else {
1765 gbeauche 1.1 emit_byte(0x80);
1766     emit_byte(0xf8+d);
1767 gbeauche 1.2 }
1768 gbeauche 1.1 emit_byte(i);
1769     }
1770     LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1771    
1772     LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1773     {
1774     emit_byte(0x38);
1775     emit_byte(0xc0+8*s+d);
1776     }
1777     LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1778    
1779     LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1780     {
1781     int fi;
1782    
1783     switch(factor) {
1784     case 1: fi=0; break;
1785     case 2: fi=1; break;
1786     case 4: fi=2; break;
1787     case 8: fi=3; break;
1788     default: abort();
1789     }
1790     emit_byte(0x39);
1791     emit_byte(0x04+8*d);
1792     emit_byte(5+8*index+0x40*fi);
1793     emit_long(offset);
1794     }
1795     LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1796    
1797     LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1798     {
1799     emit_byte(0x31);
1800     emit_byte(0xc0+8*s+d);
1801     }
1802     LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1803    
1804     LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1805     {
1806     emit_byte(0x66);
1807     emit_byte(0x31);
1808     emit_byte(0xc0+8*s+d);
1809     }
1810     LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1811    
1812     LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1813     {
1814     emit_byte(0x30);
1815     emit_byte(0xc0+8*s+d);
1816     }
1817     LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1818    
1819     LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1820     {
1821     if (optimize_imm8 && isbyte(s)) {
1822     emit_byte(0x83);
1823     emit_byte(0x2d);
1824     emit_long(d);
1825     emit_byte(s);
1826     }
1827     else {
1828     emit_byte(0x81);
1829     emit_byte(0x2d);
1830     emit_long(d);
1831     emit_long(s);
1832     }
1833     }
1834     LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1835    
1836     LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1837     {
1838     if (optimize_imm8 && isbyte(s)) {
1839     emit_byte(0x83);
1840     emit_byte(0x3d);
1841     emit_long(d);
1842     emit_byte(s);
1843     }
1844     else {
1845     emit_byte(0x81);
1846     emit_byte(0x3d);
1847     emit_long(d);
1848     emit_long(s);
1849     }
1850     }
1851     LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1852    
1853     LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1854     {
1855     emit_byte(0x87);
1856     emit_byte(0xc0+8*r1+r2);
1857     }
1858     LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1859    
1860     /*************************************************************************
1861     * FIXME: string-related instructions *
1862     *************************************************************************/
1863    
1864     LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1865     {
1866     emit_byte(0xfc);
1867     }
1868     LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1869    
1870     LOWFUNC(WRITE,NONE,0,raw_std,(void))
1871     {
1872     emit_byte(0xfd);
1873     }
1874     LENDFUNC(WRITE,NONE,0,raw_std,(void))
1875    
1876     LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1877     {
1878     emit_byte(0xa4);
1879     }
1880     LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1881    
1882     LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1883     {
1884     emit_byte(0xa5);
1885     }
1886     LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1887    
1888     LOWFUNC(NONE,RMW,0,raw_rep,(void))
1889     {
1890     emit_byte(0xf3);
1891     }
1892     LENDFUNC(NONE,RMW,0,raw_rep,(void))
1893    
1894     LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1895     {
1896     raw_rep();
1897     raw_movs_b();
1898     }
1899     LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1900    
1901     LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1902     {
1903     raw_rep();
1904     raw_movs_l();
1905     }
1906     LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1907    
1908     /*************************************************************************
1909     * FIXME: mem access modes probably wrong *
1910     *************************************************************************/
1911    
1912     LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1913     {
1914     emit_byte(0x9c);
1915     }
1916     LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1917    
1918     LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1919     {
1920     emit_byte(0x9d);
1921     }
1922     LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1923    
1924     /*************************************************************************
1925     * Unoptimizable stuff --- jump *
1926     *************************************************************************/
1927    
1928     static __inline__ void raw_call_r(R4 r)
1929     {
1930     emit_byte(0xff);
1931     emit_byte(0xd0+r);
1932 gbeauche 1.5 }
1933    
1934     static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1935     {
1936     int mu;
1937     switch(m) {
1938     case 1: mu=0; break;
1939     case 2: mu=1; break;
1940     case 4: mu=2; break;
1941     case 8: mu=3; break;
1942     default: abort();
1943     }
1944     emit_byte(0xff);
1945     emit_byte(0x14);
1946     emit_byte(0x05+8*r+0x40*mu);
1947     emit_long(base);
1948 gbeauche 1.1 }
1949    
1950     static __inline__ void raw_jmp_r(R4 r)
1951     {
1952     emit_byte(0xff);
1953     emit_byte(0xe0+r);
1954     }
1955    
1956     static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1957     {
1958     int mu;
1959     switch(m) {
1960     case 1: mu=0; break;
1961     case 2: mu=1; break;
1962     case 4: mu=2; break;
1963     case 8: mu=3; break;
1964     default: abort();
1965     }
1966     emit_byte(0xff);
1967     emit_byte(0x24);
1968     emit_byte(0x05+8*r+0x40*mu);
1969     emit_long(base);
1970     }
1971    
1972     static __inline__ void raw_jmp_m(uae_u32 base)
1973     {
1974     emit_byte(0xff);
1975     emit_byte(0x25);
1976     emit_long(base);
1977     }
1978    
1979    
1980     static __inline__ void raw_call(uae_u32 t)
1981     {
1982     emit_byte(0xe8);
1983     emit_long(t-(uae_u32)target-4);
1984     }
1985    
1986     static __inline__ void raw_jmp(uae_u32 t)
1987     {
1988     emit_byte(0xe9);
1989     emit_long(t-(uae_u32)target-4);
1990     }
1991    
1992     static __inline__ void raw_jl(uae_u32 t)
1993     {
1994     emit_byte(0x0f);
1995     emit_byte(0x8c);
1996     emit_long(t-(uae_u32)target-4);
1997     }
1998    
1999     static __inline__ void raw_jz(uae_u32 t)
2000     {
2001     emit_byte(0x0f);
2002     emit_byte(0x84);
2003     emit_long(t-(uae_u32)target-4);
2004     }
2005    
2006     static __inline__ void raw_jnz(uae_u32 t)
2007     {
2008     emit_byte(0x0f);
2009     emit_byte(0x85);
2010     emit_long(t-(uae_u32)target-4);
2011     }
2012    
2013     static __inline__ void raw_jnz_l_oponly(void)
2014     {
2015     emit_byte(0x0f);
2016     emit_byte(0x85);
2017     }
2018    
2019     static __inline__ void raw_jcc_l_oponly(int cc)
2020     {
2021     emit_byte(0x0f);
2022     emit_byte(0x80+cc);
2023     }
2024    
2025     static __inline__ void raw_jnz_b_oponly(void)
2026     {
2027     emit_byte(0x75);
2028     }
2029    
2030     static __inline__ void raw_jz_b_oponly(void)
2031     {
2032     emit_byte(0x74);
2033     }
2034    
2035     static __inline__ void raw_jcc_b_oponly(int cc)
2036     {
2037     emit_byte(0x70+cc);
2038     }
2039    
2040     static __inline__ void raw_jmp_l_oponly(void)
2041     {
2042     emit_byte(0xe9);
2043     }
2044    
2045     static __inline__ void raw_jmp_b_oponly(void)
2046     {
2047     emit_byte(0xeb);
2048     }
2049    
2050     static __inline__ void raw_ret(void)
2051     {
2052     emit_byte(0xc3);
2053     }
2054    
2055     static __inline__ void raw_nop(void)
2056     {
2057     emit_byte(0x90);
2058     }
2059    
2060 gbeauche 1.8 static __inline__ void raw_emit_nop_filler(int nbytes)
2061     {
2062     /* Source: GNU Binutils 2.12.90.0.15 */
2063     /* Various efficient no-op patterns for aligning code labels.
2064     Note: Don't try to assemble the instructions in the comments.
2065     0L and 0w are not legal. */
2066     static const uae_u8 f32_1[] =
2067     {0x90}; /* nop */
2068     static const uae_u8 f32_2[] =
2069     {0x89,0xf6}; /* movl %esi,%esi */
2070     static const uae_u8 f32_3[] =
2071     {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
2072     static const uae_u8 f32_4[] =
2073     {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
2074     static const uae_u8 f32_5[] =
2075     {0x90, /* nop */
2076     0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
2077     static const uae_u8 f32_6[] =
2078     {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
2079     static const uae_u8 f32_7[] =
2080     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
2081     static const uae_u8 f32_8[] =
2082     {0x90, /* nop */
2083     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
2084     static const uae_u8 f32_9[] =
2085     {0x89,0xf6, /* movl %esi,%esi */
2086     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2087     static const uae_u8 f32_10[] =
2088     {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
2089     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2090     static const uae_u8 f32_11[] =
2091     {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
2092     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2093     static const uae_u8 f32_12[] =
2094     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
2095     0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
2096     static const uae_u8 f32_13[] =
2097     {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
2098     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2099     static const uae_u8 f32_14[] =
2100     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
2101     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2102     static const uae_u8 f32_15[] =
2103     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
2104     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
2105     static const uae_u8 f32_16[] =
2106     {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
2107     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
2108     static const uae_u8 *const f32_patt[] = {
2109     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
2110     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
2111     };
2112    
2113     int nloops = nbytes / 16;
2114     while (nloops-- > 0)
2115     emit_block(f32_16, sizeof(f32_16));
2116    
2117     nbytes %= 16;
2118     if (nbytes)
2119     emit_block(f32_patt[nbytes - 1], nbytes);
2120     }
2121    
2122 gbeauche 1.1
2123     /*************************************************************************
2124     * Flag handling, to and fro UAE flag register *
2125     *************************************************************************/
2126    
2127     #ifdef SAHF_SETO_PROFITABLE
2128    
2129     #define FLAG_NREG1 0 /* Set to -1 if any register will do */
2130    
2131     static __inline__ void raw_flags_to_reg(int r)
2132     {
2133     raw_lahf(0); /* Most flags in AH */
2134     //raw_setcc(r,0); /* V flag in AL */
2135     raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2136    
2137     #if 1 /* Let's avoid those nasty partial register stalls */
2138     //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2139     raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
2140     //live.state[FLAGTMP].status=CLEAN;
2141     live.state[FLAGTMP].status=INMEM;
2142     live.state[FLAGTMP].realreg=-1;
2143     /* We just "evicted" FLAGTMP. */
2144     if (live.nat[r].nholds!=1) {
2145     /* Huh? */
2146     abort();
2147     }
2148     live.nat[r].nholds=0;
2149     #endif
2150     }
2151    
2152     #define FLAG_NREG2 0 /* Set to -1 if any register will do */
2153     static __inline__ void raw_reg_to_flags(int r)
2154     {
2155     raw_cmp_b_ri(r,-127); /* set V */
2156     raw_sahf(0);
2157     }
2158    
2159     #else
2160    
2161     #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
2162     static __inline__ void raw_flags_to_reg(int r)
2163     {
2164     raw_pushfl();
2165     raw_pop_l_r(r);
2166     raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
2167     // live.state[FLAGTMP].status=CLEAN;
2168     live.state[FLAGTMP].status=INMEM;
2169     live.state[FLAGTMP].realreg=-1;
2170     /* We just "evicted" FLAGTMP. */
2171     if (live.nat[r].nholds!=1) {
2172     /* Huh? */
2173     abort();
2174     }
2175     live.nat[r].nholds=0;
2176     }
2177    
2178     #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2179     static __inline__ void raw_reg_to_flags(int r)
2180     {
2181     raw_push_l_r(r);
2182     raw_popfl();
2183     }
2184    
2185     #endif
2186    
2187     /* Apparently, there are enough instructions between flag store and
2188     flag reload to avoid the partial memory stall */
2189     static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2190     {
2191     #if 1
2192     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2193     #else
2194     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2195     raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2196     #endif
2197     }
2198    
2199     /* FLAGX is byte sized, and we *do* write it at that size */
2200     static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2201     {
2202     if (live.nat[target].canbyte)
2203     raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2204     else if (live.nat[target].canword)
2205     raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2206     else
2207     raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2208     }
2209    
2210    
2211     static __inline__ void raw_inc_sp(int off)
2212     {
2213 gbeauche 1.2 raw_add_l_ri(ESP_INDEX,off);
2214 gbeauche 1.1 }
2215    
2216     /*************************************************************************
2217     * Handling mistaken direct memory access *
2218     *************************************************************************/
2219    
2220     // gb-- I don't need that part for JIT Basilisk II
2221     #if defined(NATMEM_OFFSET) && 0
2222     #include <asm/sigcontext.h>
2223     #include <signal.h>
2224    
2225     #define SIG_READ 1
2226     #define SIG_WRITE 2
2227    
2228     static int in_handler=0;
2229     static uae_u8 veccode[256];
2230    
2231     static void vec(int x, struct sigcontext sc)
2232     {
2233     uae_u8* i=(uae_u8*)sc.eip;
2234     uae_u32 addr=sc.cr2;
2235     int r=-1;
2236     int size=4;
2237     int dir=-1;
2238     int len=0;
2239     int j;
2240    
2241     write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2242     if (!canbang)
2243     write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2244     if (in_handler)
2245     write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2246    
2247     if (canbang && i>=compiled_code && i<=current_compile_p) {
2248     if (*i==0x66) {
2249     i++;
2250     size=2;
2251     len++;
2252     }
2253    
2254     switch(i[0]) {
2255     case 0x8a:
2256     if ((i[1]&0xc0)==0x80) {
2257     r=(i[1]>>3)&7;
2258     dir=SIG_READ;
2259     size=1;
2260     len+=6;
2261     break;
2262     }
2263     break;
2264     case 0x88:
2265     if ((i[1]&0xc0)==0x80) {
2266     r=(i[1]>>3)&7;
2267     dir=SIG_WRITE;
2268     size=1;
2269     len+=6;
2270     break;
2271     }
2272     break;
2273     case 0x8b:
2274     if ((i[1]&0xc0)==0x80) {
2275     r=(i[1]>>3)&7;
2276     dir=SIG_READ;
2277     len+=6;
2278     break;
2279     }
2280     if ((i[1]&0xc0)==0x40) {
2281     r=(i[1]>>3)&7;
2282     dir=SIG_READ;
2283     len+=3;
2284     break;
2285     }
2286     break;
2287     case 0x89:
2288     if ((i[1]&0xc0)==0x80) {
2289     r=(i[1]>>3)&7;
2290     dir=SIG_WRITE;
2291     len+=6;
2292     break;
2293     }
2294     if ((i[1]&0xc0)==0x40) {
2295     r=(i[1]>>3)&7;
2296     dir=SIG_WRITE;
2297     len+=3;
2298     break;
2299     }
2300     break;
2301     }
2302     }
2303    
2304     if (r!=-1) {
2305     void* pr=NULL;
2306     write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2307    
2308     switch(r) {
2309     case 0: pr=&(sc.eax); break;
2310     case 1: pr=&(sc.ecx); break;
2311     case 2: pr=&(sc.edx); break;
2312     case 3: pr=&(sc.ebx); break;
2313     case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2314     case 5: pr=(size>1)?
2315     (void*)(&(sc.ebp)):
2316     (void*)(((uae_u8*)&(sc.ecx))+1); break;
2317     case 6: pr=(size>1)?
2318     (void*)(&(sc.esi)):
2319     (void*)(((uae_u8*)&(sc.edx))+1); break;
2320     case 7: pr=(size>1)?
2321     (void*)(&(sc.edi)):
2322     (void*)(((uae_u8*)&(sc.ebx))+1); break;
2323     default: abort();
2324     }
2325     if (pr) {
2326     blockinfo* bi;
2327    
2328     if (currprefs.comp_oldsegv) {
2329     addr-=NATMEM_OFFSET;
2330    
2331     if ((addr>=0x10000000 && addr<0x40000000) ||
2332     (addr>=0x50000000)) {
2333     write_log("Suspicious address in %x SEGV handler.\n",addr);
2334     }
2335     if (dir==SIG_READ) {
2336     switch(size) {
2337     case 1: *((uae_u8*)pr)=get_byte(addr); break;
2338     case 2: *((uae_u16*)pr)=get_word(addr); break;
2339     case 4: *((uae_u32*)pr)=get_long(addr); break;
2340     default: abort();
2341     }
2342     }
2343     else { /* write */
2344     switch(size) {
2345     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2346     case 2: put_word(addr,*((uae_u16*)pr)); break;
2347     case 4: put_long(addr,*((uae_u32*)pr)); break;
2348     default: abort();
2349     }
2350     }
2351     write_log("Handled one access!\n");
2352     fflush(stdout);
2353     segvcount++;
2354     sc.eip+=len;
2355     }
2356     else {
2357     void* tmp=target;
2358     int i;
2359     uae_u8 vecbuf[5];
2360    
2361     addr-=NATMEM_OFFSET;
2362    
2363     if ((addr>=0x10000000 && addr<0x40000000) ||
2364     (addr>=0x50000000)) {
2365     write_log("Suspicious address in %x SEGV handler.\n",addr);
2366     }
2367    
2368     target=(uae_u8*)sc.eip;
2369     for (i=0;i<5;i++)
2370     vecbuf[i]=target[i];
2371     emit_byte(0xe9);
2372     emit_long((uae_u32)veccode-(uae_u32)target-4);
2373     write_log("Create jump to %p\n",veccode);
2374    
2375     write_log("Handled one access!\n");
2376     fflush(stdout);
2377     segvcount++;
2378    
2379     target=veccode;
2380    
2381     if (dir==SIG_READ) {
2382     switch(size) {
2383     case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2384     case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2385     case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2386     default: abort();
2387     }
2388     }
2389     else { /* write */
2390     switch(size) {
2391     case 1: put_byte(addr,*((uae_u8*)pr)); break;
2392     case 2: put_word(addr,*((uae_u16*)pr)); break;
2393     case 4: put_long(addr,*((uae_u32*)pr)); break;
2394     default: abort();
2395     }
2396     }
2397     for (i=0;i<5;i++)
2398     raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2399     raw_mov_l_mi((uae_u32)&in_handler,0);
2400     emit_byte(0xe9);
2401     emit_long(sc.eip+len-(uae_u32)target-4);
2402     in_handler=1;
2403     target=tmp;
2404     }
2405     bi=active;
2406     while (bi) {
2407     if (bi->handler &&
2408     (uae_u8*)bi->direct_handler<=i &&
2409     (uae_u8*)bi->nexthandler>i) {
2410     write_log("deleted trigger (%p<%p<%p) %p\n",
2411     bi->handler,
2412     i,
2413     bi->nexthandler,
2414     bi->pc_p);
2415     invalidate_block(bi);
2416     raise_in_cl_list(bi);
2417     set_special(0);
2418     return;
2419     }
2420     bi=bi->next;
2421     }
2422     /* Not found in the active list. Might be a rom routine that
2423     is in the dormant list */
2424     bi=dormant;
2425     while (bi) {
2426     if (bi->handler &&
2427     (uae_u8*)bi->direct_handler<=i &&
2428     (uae_u8*)bi->nexthandler>i) {
2429     write_log("deleted trigger (%p<%p<%p) %p\n",
2430     bi->handler,
2431     i,
2432     bi->nexthandler,
2433     bi->pc_p);
2434     invalidate_block(bi);
2435     raise_in_cl_list(bi);
2436     set_special(0);
2437     return;
2438     }
2439     bi=bi->next;
2440     }
2441     write_log("Huh? Could not find trigger!\n");
2442     return;
2443     }
2444     }
2445     write_log("Can't handle access!\n");
2446     for (j=0;j<10;j++) {
2447     write_log("instruction byte %2d is %02x\n",j,i[j]);
2448     }
2449     write_log("Please send the above info (starting at \"fault address\") to\n"
2450     "bmeyer@csse.monash.edu.au\n"
2451     "This shouldn't happen ;-)\n");
2452     fflush(stdout);
2453     signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2454     }
2455     #endif
2456    
2457    
2458     /*************************************************************************
2459     * Checking for CPU features *
2460     *************************************************************************/
2461    
2462 gbeauche 1.3 struct cpuinfo_x86 {
2463     uae_u8 x86; // CPU family
2464     uae_u8 x86_vendor; // CPU vendor
2465     uae_u8 x86_processor; // CPU canonical processor type
2466     uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
2467     uae_u32 x86_hwcap;
2468     uae_u8 x86_model;
2469     uae_u8 x86_mask;
2470     int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
2471     char x86_vendor_id[16];
2472     };
2473     struct cpuinfo_x86 cpuinfo;
2474    
2475     enum {
2476     X86_VENDOR_INTEL = 0,
2477     X86_VENDOR_CYRIX = 1,
2478     X86_VENDOR_AMD = 2,
2479     X86_VENDOR_UMC = 3,
2480     X86_VENDOR_NEXGEN = 4,
2481     X86_VENDOR_CENTAUR = 5,
2482     X86_VENDOR_RISE = 6,
2483     X86_VENDOR_TRANSMETA = 7,
2484     X86_VENDOR_NSC = 8,
2485     X86_VENDOR_UNKNOWN = 0xff
2486     };
2487    
2488     enum {
2489     X86_PROCESSOR_I386, /* 80386 */
2490     X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
2491     X86_PROCESSOR_PENTIUM,
2492     X86_PROCESSOR_PENTIUMPRO,
2493     X86_PROCESSOR_K6,
2494     X86_PROCESSOR_ATHLON,
2495     X86_PROCESSOR_PENTIUM4,
2496     X86_PROCESSOR_max
2497     };
2498    
2499     static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2500     "80386",
2501     "80486",
2502     "Pentium",
2503     "PentiumPro",
2504     "K6",
2505     "Athlon",
2506     "Pentium4"
2507     };
2508    
2509     static struct ptt {
2510     const int align_loop;
2511     const int align_loop_max_skip;
2512     const int align_jump;
2513     const int align_jump_max_skip;
2514     const int align_func;
2515     }
2516     x86_alignments[X86_PROCESSOR_max] = {
2517     { 4, 3, 4, 3, 4 },
2518     { 16, 15, 16, 15, 16 },
2519     { 16, 7, 16, 7, 16 },
2520     { 16, 15, 16, 7, 16 },
2521     { 32, 7, 32, 7, 32 },
2522 gbeauche 1.4 { 16, 7, 16, 7, 16 },
2523 gbeauche 1.3 { 0, 0, 0, 0, 0 }
2524     };
2525 gbeauche 1.1
2526 gbeauche 1.3 static void
2527     x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2528 gbeauche 1.1 {
2529 gbeauche 1.3 char *v = c->x86_vendor_id;
2530    
2531     if (!strcmp(v, "GenuineIntel"))
2532     c->x86_vendor = X86_VENDOR_INTEL;
2533     else if (!strcmp(v, "AuthenticAMD"))
2534     c->x86_vendor = X86_VENDOR_AMD;
2535     else if (!strcmp(v, "CyrixInstead"))
2536     c->x86_vendor = X86_VENDOR_CYRIX;
2537     else if (!strcmp(v, "Geode by NSC"))
2538     c->x86_vendor = X86_VENDOR_NSC;
2539     else if (!strcmp(v, "UMC UMC UMC "))
2540     c->x86_vendor = X86_VENDOR_UMC;
2541     else if (!strcmp(v, "CentaurHauls"))
2542     c->x86_vendor = X86_VENDOR_CENTAUR;
2543     else if (!strcmp(v, "NexGenDriven"))
2544     c->x86_vendor = X86_VENDOR_NEXGEN;
2545     else if (!strcmp(v, "RiseRiseRise"))
2546     c->x86_vendor = X86_VENDOR_RISE;
2547     else if (!strcmp(v, "GenuineTMx86") ||
2548     !strcmp(v, "TransmetaCPU"))
2549     c->x86_vendor = X86_VENDOR_TRANSMETA;
2550     else
2551     c->x86_vendor = X86_VENDOR_UNKNOWN;
2552     }
2553 gbeauche 1.1
2554 gbeauche 1.3 static void
2555     cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2556     {
2557     static uae_u8 cpuid_space[256];
2558     uae_u8* tmp=get_target();
2559 gbeauche 1.1
2560 gbeauche 1.3 set_target(cpuid_space);
2561     raw_push_l_r(0); /* eax */
2562     raw_push_l_r(1); /* ecx */
2563     raw_push_l_r(2); /* edx */
2564     raw_push_l_r(3); /* ebx */
2565     raw_mov_l_rm(0,(uae_u32)&op);
2566     raw_cpuid(0);
2567     if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2568     if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2569     if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2570     if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2571     raw_pop_l_r(3);
2572     raw_pop_l_r(2);
2573     raw_pop_l_r(1);
2574     raw_pop_l_r(0);
2575     raw_ret();
2576     set_target(tmp);
2577 gbeauche 1.1
2578 gbeauche 1.3 ((cpuop_func*)cpuid_space)(0);
2579 gbeauche 1.1 }
2580    
2581 gbeauche 1.3 static void
2582     raw_init_cpu(void)
2583 gbeauche 1.1 {
2584 gbeauche 1.3 struct cpuinfo_x86 *c = &cpuinfo;
2585    
2586     /* Defaults */
2587     c->x86_vendor = X86_VENDOR_UNKNOWN;
2588     c->cpuid_level = -1; /* CPUID not detected */
2589     c->x86_model = c->x86_mask = 0; /* So far unknown... */
2590     c->x86_vendor_id[0] = '\0'; /* Unset */
2591     c->x86_hwcap = 0;
2592    
2593     /* Get vendor name */
2594     c->x86_vendor_id[12] = '\0';
2595     cpuid(0x00000000,
2596     (uae_u32 *)&c->cpuid_level,
2597     (uae_u32 *)&c->x86_vendor_id[0],
2598     (uae_u32 *)&c->x86_vendor_id[8],
2599     (uae_u32 *)&c->x86_vendor_id[4]);
2600     x86_get_cpu_vendor(c);
2601    
2602     /* Intel-defined flags: level 0x00000001 */
2603     c->x86_brand_id = 0;
2604     if ( c->cpuid_level >= 0x00000001 ) {
2605     uae_u32 tfms, brand_id;
2606     cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2607     c->x86 = (tfms >> 8) & 15;
2608     c->x86_model = (tfms >> 4) & 15;
2609     c->x86_brand_id = brand_id & 0xff;
2610     if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2611     (c->x86 == 0xf)) {
2612     /* AMD Extended Family and Model Values */
2613     c->x86 += (tfms >> 20) & 0xff;
2614     c->x86_model += (tfms >> 12) & 0xf0;
2615     }
2616     c->x86_mask = tfms & 15;
2617     } else {
2618     /* Have CPUID level 0 only - unheard of */
2619     c->x86 = 4;
2620     }
2621    
2622     /* Canonicalize processor ID */
2623     c->x86_processor = X86_PROCESSOR_max;
2624     switch (c->x86) {
2625     case 3:
2626     c->x86_processor = X86_PROCESSOR_I386;
2627     break;
2628     case 4:
2629     c->x86_processor = X86_PROCESSOR_I486;
2630     break;
2631     case 5:
2632     if (c->x86_vendor == X86_VENDOR_AMD)
2633     c->x86_processor = X86_PROCESSOR_K6;
2634     else
2635     c->x86_processor = X86_PROCESSOR_PENTIUM;
2636     break;
2637     case 6:
2638     if (c->x86_vendor == X86_VENDOR_AMD)
2639     c->x86_processor = X86_PROCESSOR_ATHLON;
2640     else
2641     c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2642     break;
2643     case 15:
2644     if (c->x86_vendor == X86_VENDOR_INTEL) {
2645     /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2646     if (c->x86_brand_id >= 8)
2647     c->x86_processor = X86_PROCESSOR_PENTIUM4;
2648     }
2649     break;
2650     }
2651     if (c->x86_processor == X86_PROCESSOR_max) {
2652     fprintf(stderr, "Error: unknown processor type\n");
2653     fprintf(stderr, " Family : %d\n", c->x86);
2654     fprintf(stderr, " Model : %d\n", c->x86_model);
2655     fprintf(stderr, " Mask : %d\n", c->x86_mask);
2656     if (c->x86_brand_id)
2657     fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
2658     abort();
2659     }
2660    
2661     /* Have CMOV support? */
2662     have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2663    
2664     /* Can the host CPU suffer from partial register stalls? */
2665     have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2666     #if 1
2667     /* It appears that partial register writes are a bad idea even on
2668 gbeauche 1.1 AMD K7 cores, even though they are not supposed to have the
2669     dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2670 gbeauche 1.3 if (c->x86_processor == X86_PROCESSOR_ATHLON)
2671     have_rat_stall = true;
2672 gbeauche 1.1 #endif
2673 gbeauche 1.3
2674     /* Alignments */
2675     if (tune_alignment) {
2676     align_loops = x86_alignments[c->x86_processor].align_loop;
2677     align_jumps = x86_alignments[c->x86_processor].align_jump;
2678     }
2679    
2680     write_log("Max CPUID level=%d Processor is %s [%s]\n",
2681     c->cpuid_level, c->x86_vendor_id,
2682     x86_processor_string_table[c->x86_processor]);
2683 gbeauche 1.1 }
2684    
2685    
2686     /*************************************************************************
2687     * FPU stuff *
2688     *************************************************************************/
2689    
2690    
2691     static __inline__ void raw_fp_init(void)
2692     {
2693     int i;
2694    
2695     for (i=0;i<N_FREGS;i++)
2696     live.spos[i]=-2;
2697     live.tos=-1; /* Stack is empty */
2698     }
2699    
2700     static __inline__ void raw_fp_cleanup_drop(void)
2701     {
2702     #if 0
2703     /* using FINIT instead of popping all the entries.
2704     Seems to have side effects --- there is display corruption in
2705     Quake when this is used */
2706     if (live.tos>1) {
2707     emit_byte(0x9b);
2708     emit_byte(0xdb);
2709     emit_byte(0xe3);
2710     live.tos=-1;
2711     }
2712     #endif
2713     while (live.tos>=1) {
2714     emit_byte(0xde);
2715     emit_byte(0xd9);
2716     live.tos-=2;
2717     }
2718     while (live.tos>=0) {
2719     emit_byte(0xdd);
2720     emit_byte(0xd8);
2721     live.tos--;
2722     }
2723     raw_fp_init();
2724     }
2725    
2726     static __inline__ void make_tos(int r)
2727     {
2728     int p,q;
2729    
2730     if (live.spos[r]<0) { /* Register not yet on stack */
2731     emit_byte(0xd9);
2732     emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2733     live.tos++;
2734     live.spos[r]=live.tos;
2735     live.onstack[live.tos]=r;
2736     return;
2737     }
2738     /* Register is on stack */
2739     if (live.tos==live.spos[r])
2740     return;
2741     p=live.spos[r];
2742     q=live.onstack[live.tos];
2743    
2744     emit_byte(0xd9);
2745     emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2746     live.onstack[live.tos]=r;
2747     live.spos[r]=live.tos;
2748     live.onstack[p]=q;
2749     live.spos[q]=p;
2750     }
2751    
2752     static __inline__ void make_tos2(int r, int r2)
2753     {
2754     int q;
2755    
2756     make_tos(r2); /* Put the reg that's supposed to end up in position2
2757     on top */
2758    
2759     if (live.spos[r]<0) { /* Register not yet on stack */
2760     make_tos(r); /* This will extend the stack */
2761     return;
2762     }
2763     /* Register is on stack */
2764     emit_byte(0xd9);
2765     emit_byte(0xc9); /* Move r2 into position 2 */
2766    
2767     q=live.onstack[live.tos-1];
2768     live.onstack[live.tos]=q;
2769     live.spos[q]=live.tos;
2770     live.onstack[live.tos-1]=r2;
2771     live.spos[r2]=live.tos-1;
2772    
2773     make_tos(r); /* And r into 1 */
2774     }
2775    
2776     static __inline__ int stackpos(int r)
2777     {
2778     if (live.spos[r]<0)
2779     abort();
2780     if (live.tos<live.spos[r]) {
2781     printf("Looking for spos for fnreg %d\n",r);
2782     abort();
2783     }
2784     return live.tos-live.spos[r];
2785     }
2786    
2787     static __inline__ void usereg(int r)
2788     {
2789     if (live.spos[r]<0)
2790     make_tos(r);
2791     }
2792    
2793     /* This is called with one FP value in a reg *above* tos, which it will
2794     pop off the stack if necessary */
2795     static __inline__ void tos_make(int r)
2796     {
2797     if (live.spos[r]<0) {
2798     live.tos++;
2799     live.spos[r]=live.tos;
2800     live.onstack[live.tos]=r;
2801     return;
2802     }
2803     emit_byte(0xdd);
2804     emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2805     and pop it*/
2806     }
2807    
2808    
2809     LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2810     {
2811     make_tos(r);
2812     emit_byte(0xdd);
2813     emit_byte(0x15);
2814     emit_long(m);
2815     }
2816     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2817    
2818     LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2819     {
2820     make_tos(r);
2821     emit_byte(0xdd);
2822     emit_byte(0x1d);
2823     emit_long(m);
2824     live.onstack[live.tos]=-1;
2825     live.tos--;
2826     live.spos[r]=-2;
2827     }
2828     LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2829    
2830     LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2831     {
2832     emit_byte(0xdd);
2833     emit_byte(0x05);
2834     emit_long(m);
2835     tos_make(r);
2836     }
2837     LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2838    
2839     LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2840     {
2841     emit_byte(0xdb);
2842     emit_byte(0x05);
2843     emit_long(m);
2844     tos_make(r);
2845     }
2846     LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2847    
2848     LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2849     {
2850     make_tos(r);
2851     emit_byte(0xdb);
2852     emit_byte(0x15);
2853     emit_long(m);
2854     }
2855     LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2856    
2857     LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2858     {
2859     emit_byte(0xd9);
2860     emit_byte(0x05);
2861     emit_long(m);
2862     tos_make(r);
2863     }
2864     LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2865    
2866     LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2867     {
2868     make_tos(r);
2869     emit_byte(0xd9);
2870     emit_byte(0x15);
2871     emit_long(m);
2872     }
2873     LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2874    
2875     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2876     {
2877     int rs;
2878    
2879     /* Stupid x87 can't write a long double to mem without popping the
2880     stack! */
2881     usereg(r);
2882     rs=stackpos(r);
2883     emit_byte(0xd9); /* Get a copy to the top of stack */
2884     emit_byte(0xc0+rs);
2885    
2886     emit_byte(0xdb); /* store and pop it */
2887     emit_byte(0x3d);
2888     emit_long(m);
2889     }
2890     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2891    
2892     LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2893     {
2894     int rs;
2895    
2896     make_tos(r);
2897     emit_byte(0xdb); /* store and pop it */
2898     emit_byte(0x3d);
2899     emit_long(m);
2900     live.onstack[live.tos]=-1;
2901     live.tos--;
2902     live.spos[r]=-2;
2903     }
2904     LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2905    
2906     LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2907     {
2908     emit_byte(0xdb);
2909     emit_byte(0x2d);
2910     emit_long(m);
2911     tos_make(r);
2912     }
2913     LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2914    
2915     LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2916     {
2917     emit_byte(0xd9);
2918     emit_byte(0xeb);
2919     tos_make(r);
2920     }
2921     LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2922    
2923     LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2924     {
2925     emit_byte(0xd9);
2926     emit_byte(0xec);
2927     tos_make(r);
2928     }
2929     LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2930    
2931     LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2932     {
2933     emit_byte(0xd9);
2934     emit_byte(0xea);
2935     tos_make(r);
2936     }
2937     LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2938    
2939     LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2940     {
2941     emit_byte(0xd9);
2942     emit_byte(0xed);
2943     tos_make(r);
2944     }
2945     LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2946    
2947     LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2948     {
2949     emit_byte(0xd9);
2950     emit_byte(0xe8);
2951     tos_make(r);
2952     }
2953     LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2954    
2955     LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2956     {
2957     emit_byte(0xd9);
2958     emit_byte(0xee);
2959     tos_make(r);
2960     }
2961     LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2962    
2963     LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2964     {
2965     int ds;
2966    
2967     usereg(s);
2968     ds=stackpos(s);
2969     if (ds==0 && live.spos[d]>=0) {
2970     /* source is on top of stack, and we already have the dest */
2971     int dd=stackpos(d);
2972     emit_byte(0xdd);
2973     emit_byte(0xd0+dd);
2974     }
2975     else {
2976     emit_byte(0xd9);
2977     emit_byte(0xc0+ds); /* duplicate source on tos */
2978     tos_make(d); /* store to destination, pop if necessary */
2979     }
2980     }
2981     LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2982    
2983     LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2984     {
2985     emit_byte(0xd9);
2986     emit_byte(0xa8+index);
2987     emit_long(base);
2988     }
2989     LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2990    
2991    
2992     LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2993     {
2994     int ds;
2995    
2996     if (d!=s) {
2997     usereg(s);
2998     ds=stackpos(s);
2999     emit_byte(0xd9);
3000     emit_byte(0xc0+ds); /* duplicate source */
3001     emit_byte(0xd9);
3002     emit_byte(0xfa); /* take square root */
3003     tos_make(d); /* store to destination */
3004     }
3005     else {
3006     make_tos(d);
3007     emit_byte(0xd9);
3008     emit_byte(0xfa); /* take square root */
3009     }
3010     }
3011     LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
3012    
3013     LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
3014     {
3015     int ds;
3016    
3017     if (d!=s) {
3018     usereg(s);
3019     ds=stackpos(s);
3020     emit_byte(0xd9);
3021     emit_byte(0xc0+ds); /* duplicate source */
3022     emit_byte(0xd9);
3023     emit_byte(0xe1); /* take fabs */
3024     tos_make(d); /* store to destination */
3025     }
3026     else {
3027     make_tos(d);
3028     emit_byte(0xd9);
3029     emit_byte(0xe1); /* take fabs */
3030     }
3031     }
3032     LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
3033    
3034     LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
3035     {
3036     int ds;
3037    
3038     if (d!=s) {
3039     usereg(s);
3040     ds=stackpos(s);
3041     emit_byte(0xd9);
3042     emit_byte(0xc0+ds); /* duplicate source */
3043     emit_byte(0xd9);
3044     emit_byte(0xfc); /* take frndint */
3045     tos_make(d); /* store to destination */
3046     }
3047     else {
3048     make_tos(d);
3049     emit_byte(0xd9);
3050     emit_byte(0xfc); /* take frndint */
3051     }
3052     }
3053     LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
3054    
3055     LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
3056     {
3057     int ds;
3058    
3059     if (d!=s) {
3060     usereg(s);
3061     ds=stackpos(s);
3062     emit_byte(0xd9);
3063     emit_byte(0xc0+ds); /* duplicate source */
3064     emit_byte(0xd9);
3065     emit_byte(0xff); /* take cos */
3066     tos_make(d); /* store to destination */
3067     }
3068     else {
3069     make_tos(d);
3070     emit_byte(0xd9);
3071     emit_byte(0xff); /* take cos */
3072     }
3073     }
3074     LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
3075    
3076     LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3077     {
3078     int ds;
3079    
3080     if (d!=s) {
3081     usereg(s);
3082     ds=stackpos(s);
3083     emit_byte(0xd9);
3084     emit_byte(0xc0+ds); /* duplicate source */
3085     emit_byte(0xd9);
3086     emit_byte(0xfe); /* take sin */
3087     tos_make(d); /* store to destination */
3088     }
3089     else {
3090     make_tos(d);
3091     emit_byte(0xd9);
3092     emit_byte(0xfe); /* take sin */
3093     }
3094     }
3095     LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3096    
3097     double one=1;
3098     LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3099     {
3100     int ds;
3101    
3102     usereg(s);
3103     ds=stackpos(s);
3104     emit_byte(0xd9);
3105     emit_byte(0xc0+ds); /* duplicate source */
3106    
3107     emit_byte(0xd9);
3108     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3109     emit_byte(0xd9);
3110     emit_byte(0xfc); /* rndint */
3111     emit_byte(0xd9);
3112     emit_byte(0xc9); /* swap top two elements */
3113     emit_byte(0xd8);
3114     emit_byte(0xe1); /* subtract rounded from original */
3115     emit_byte(0xd9);
3116     emit_byte(0xf0); /* f2xm1 */
3117     emit_byte(0xdc);
3118     emit_byte(0x05);
3119     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3120     emit_byte(0xd9);
3121     emit_byte(0xfd); /* and scale it */
3122     emit_byte(0xdd);
3123     emit_byte(0xd9); /* take he rounded value off */
3124     tos_make(d); /* store to destination */
3125     }
3126     LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3127    
3128     LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3129     {
3130     int ds;
3131    
3132     usereg(s);
3133     ds=stackpos(s);
3134     emit_byte(0xd9);
3135     emit_byte(0xc0+ds); /* duplicate source */
3136     emit_byte(0xd9);
3137     emit_byte(0xea); /* fldl2e */
3138     emit_byte(0xde);
3139     emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
3140    
3141     emit_byte(0xd9);
3142     emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3143     emit_byte(0xd9);
3144     emit_byte(0xfc); /* rndint */
3145     emit_byte(0xd9);
3146     emit_byte(0xc9); /* swap top two elements */
3147     emit_byte(0xd8);
3148     emit_byte(0xe1); /* subtract rounded from original */
3149     emit_byte(0xd9);
3150     emit_byte(0xf0); /* f2xm1 */
3151     emit_byte(0xdc);
3152     emit_byte(0x05);
3153     emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3154     emit_byte(0xd9);
3155     emit_byte(0xfd); /* and scale it */
3156     emit_byte(0xdd);
3157     emit_byte(0xd9); /* take he rounded value off */
3158     tos_make(d); /* store to destination */
3159     }
3160     LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3161    
3162     LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3163     {
3164     int ds;
3165    
3166     usereg(s);
3167     ds=stackpos(s);
3168     emit_byte(0xd9);
3169     emit_byte(0xc0+ds); /* duplicate source */
3170     emit_byte(0xd9);
3171     emit_byte(0xe8); /* push '1' */
3172     emit_byte(0xd9);
3173     emit_byte(0xc9); /* swap top two */
3174     emit_byte(0xd9);
3175     emit_byte(0xf1); /* take 1*log2(x) */
3176     tos_make(d); /* store to destination */
3177     }
3178     LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3179    
3180    
3181     LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3182     {
3183     int ds;
3184    
3185     if (d!=s) {
3186     usereg(s);
3187     ds=stackpos(s);
3188     emit_byte(0xd9);
3189     emit_byte(0xc0+ds); /* duplicate source */
3190     emit_byte(0xd9);
3191     emit_byte(0xe0); /* take fchs */
3192     tos_make(d); /* store to destination */
3193     }
3194     else {
3195     make_tos(d);
3196     emit_byte(0xd9);
3197     emit_byte(0xe0); /* take fchs */
3198     }
3199     }
3200     LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3201    
3202     LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3203     {
3204     int ds;
3205    
3206     usereg(s);
3207     usereg(d);
3208    
3209     if (live.spos[s]==live.tos) {
3210     /* Source is on top of stack */
3211     ds=stackpos(d);
3212     emit_byte(0xdc);
3213     emit_byte(0xc0+ds); /* add source to dest*/
3214     }
3215     else {
3216     make_tos(d);
3217     ds=stackpos(s);
3218    
3219     emit_byte(0xd8);
3220     emit_byte(0xc0+ds); /* add source to dest*/
3221     }
3222     }
3223     LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3224    
3225     LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3226     {
3227     int ds;
3228    
3229     usereg(s);
3230     usereg(d);
3231    
3232     if (live.spos[s]==live.tos) {
3233     /* Source is on top of stack */
3234     ds=stackpos(d);
3235     emit_byte(0xdc);
3236     emit_byte(0xe8+ds); /* sub source from dest*/
3237     }
3238     else {
3239     make_tos(d);
3240     ds=stackpos(s);
3241    
3242     emit_byte(0xd8);
3243     emit_byte(0xe0+ds); /* sub src from dest */
3244     }
3245     }
3246     LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3247    
3248     LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3249     {
3250     int ds;
3251    
3252     usereg(s);
3253     usereg(d);
3254    
3255     make_tos(d);
3256     ds=stackpos(s);
3257    
3258     emit_byte(0xdd);
3259     emit_byte(0xe0+ds); /* cmp dest with source*/
3260     }
3261     LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3262    
3263     LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3264     {
3265     int ds;
3266    
3267     usereg(s);
3268     usereg(d);
3269    
3270     if (live.spos[s]==live.tos) {
3271     /* Source is on top of stack */
3272     ds=stackpos(d);
3273     emit_byte(0xdc);
3274     emit_byte(0xc8+ds); /* mul dest by source*/
3275     }
3276     else {
3277     make_tos(d);
3278     ds=stackpos(s);
3279    
3280     emit_byte(0xd8);
3281     emit_byte(0xc8+ds); /* mul dest by source*/
3282     }
3283     }
3284     LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3285    
3286     LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3287     {
3288     int ds;
3289    
3290     usereg(s);
3291     usereg(d);
3292    
3293     if (live.spos[s]==live.tos) {
3294     /* Source is on top of stack */
3295     ds=stackpos(d);
3296     emit_byte(0xdc);
3297     emit_byte(0xf8+ds); /* div dest by source */
3298     }
3299     else {
3300     make_tos(d);
3301     ds=stackpos(s);
3302    
3303     emit_byte(0xd8);
3304     emit_byte(0xf0+ds); /* div dest by source*/
3305     }
3306     }
3307     LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3308    
3309     LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3310     {
3311     int ds;
3312    
3313     usereg(s);
3314     usereg(d);
3315    
3316     make_tos2(d,s);
3317     ds=stackpos(s);
3318    
3319     if (ds!=1) {
3320     printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3321     abort();
3322     }
3323     emit_byte(0xd9);
3324     emit_byte(0xf8); /* take rem from dest by source */
3325     }
3326     LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3327    
3328     LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3329     {
3330     int ds;
3331    
3332     usereg(s);
3333     usereg(d);
3334    
3335     make_tos2(d,s);
3336     ds=stackpos(s);
3337    
3338     if (ds!=1) {
3339     printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3340     abort();
3341     }
3342     emit_byte(0xd9);
3343     emit_byte(0xf5); /* take rem1 from dest by source */
3344     }
3345     LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3346    
3347    
3348     LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3349     {
3350     make_tos(r);
3351     emit_byte(0xd9); /* ftst */
3352     emit_byte(0xe4);
3353     }
3354     LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3355    
3356     /* %eax register is clobbered if target processor doesn't support fucomi */
3357     #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3358     #define FFLAG_NREG EAX_INDEX
3359    
3360     static __inline__ void raw_fflags_into_flags(int r)
3361     {
3362     int p;
3363    
3364     usereg(r);
3365     p=stackpos(r);
3366    
3367     emit_byte(0xd9);
3368     emit_byte(0xee); /* Push 0 */
3369     emit_byte(0xd9);
3370     emit_byte(0xc9+p); /* swap top two around */
3371     if (have_cmov) {
3372     // gb-- fucomi is for P6 cores only, not K6-2 then...
3373     emit_byte(0xdb);
3374     emit_byte(0xe9+p); /* fucomi them */
3375     }
3376     else {
3377     emit_byte(0xdd);
3378     emit_byte(0xe1+p); /* fucom them */
3379     emit_byte(0x9b);
3380     emit_byte(0xdf);
3381     emit_byte(0xe0); /* fstsw ax */
3382     raw_sahf(0); /* sahf */
3383     }
3384     emit_byte(0xdd);
3385     emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3386     }