ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.5
Committed: 2002-10-01T09:37:03Z (21 years, 9 months ago) by gbeauche
Branch: MAIN
Changes since 1.4: +27 -0 lines
Log Message:
- #include "flags_x86.h" here to get NATICE_CC_?? helper macros
- Add raw_cmp_b_mi() and raw_call_m_indexed() for generated
  m68k_compile_execute() function

File Contents

# Content
1 /* This should eventually end up in machdep/, but for now, x86 is the
2 only target, and it's easier this way... */
3
4 #include "flags_x86.h"
5
6 /*************************************************************************
7 * Some basic information about the the target CPU *
8 *************************************************************************/
9
10 #define EAX_INDEX 0
11 #define ECX_INDEX 1
12 #define EDX_INDEX 2
13 #define EBX_INDEX 3
14 #define ESP_INDEX 4
15 #define EBP_INDEX 5
16 #define ESI_INDEX 6
17 #define EDI_INDEX 7
18
19 /* The register in which subroutines return an integer return value */
20 #define REG_RESULT 0
21
22 /* The registers subroutines take their first and second argument in */
23 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
24 /* Handle the _fastcall parameters of ECX and EDX */
25 #define REG_PAR1 1
26 #define REG_PAR2 2
27 #else
28 #define REG_PAR1 0
29 #define REG_PAR2 2
30 #endif
31
32 /* Three registers that are not used for any of the above */
33 #define REG_NOPAR1 6
34 #define REG_NOPAR2 5
35 #define REG_NOPAR3 3
36
37 #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
38 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
39 #define REG_PC_TMP 0
40 #else
41 #define REG_PC_TMP 1 /* Another register that is not the above */
42 #endif
43
44 #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
45 -1 if any reg will do */
46 #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
47 #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
48
49 uae_s8 always_used[]={4,-1};
50 uae_s8 can_byte[]={0,1,2,3,-1};
51 uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
52
53 /* cpuopti mutate instruction handlers to assume registers are saved
54 by the caller */
55 uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
56
57 /* This *should* be the same as call_saved. But:
58 - We might not really know which registers are saved, and which aren't,
59 so we need to preserve some, but don't want to rely on everyone else
60 also saving those registers
61 - Special registers (such like the stack pointer) should not be "preserved"
62 by pushing, even though they are "saved" across function calls
63 */
64 uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
65
66 /* Whether classes of instructions do or don't clobber the native flags */
67 #define CLOBBER_MOV
68 #define CLOBBER_LEA
69 #define CLOBBER_CMOV
70 #define CLOBBER_POP
71 #define CLOBBER_PUSH
72 #define CLOBBER_SUB clobber_flags()
73 #define CLOBBER_SBB clobber_flags()
74 #define CLOBBER_CMP clobber_flags()
75 #define CLOBBER_ADD clobber_flags()
76 #define CLOBBER_ADC clobber_flags()
77 #define CLOBBER_AND clobber_flags()
78 #define CLOBBER_OR clobber_flags()
79 #define CLOBBER_XOR clobber_flags()
80
81 #define CLOBBER_ROL clobber_flags()
82 #define CLOBBER_ROR clobber_flags()
83 #define CLOBBER_SHLL clobber_flags()
84 #define CLOBBER_SHRL clobber_flags()
85 #define CLOBBER_SHRA clobber_flags()
86 #define CLOBBER_TEST clobber_flags()
87 #define CLOBBER_CL16
88 #define CLOBBER_CL8
89 #define CLOBBER_SE16
90 #define CLOBBER_SE8
91 #define CLOBBER_ZE16
92 #define CLOBBER_ZE8
93 #define CLOBBER_SW16 clobber_flags()
94 #define CLOBBER_SW32
95 #define CLOBBER_SETCC
96 #define CLOBBER_MUL clobber_flags()
97 #define CLOBBER_BT clobber_flags()
98 #define CLOBBER_BSF clobber_flags()
99
100 const bool optimize_accum = true;
101 const bool optimize_imm8 = true;
102 const bool optimize_shift_once = true;
103
104 /*************************************************************************
105 * Actual encoding of the instructions on the target CPU *
106 *************************************************************************/
107
108 static __inline__ int isaccum(int r)
109 {
110 return (r == EAX_INDEX);
111 }
112
113 static __inline__ int isbyte(uae_s32 x)
114 {
115 return (x>=-128 && x<=127);
116 }
117
118 static __inline__ int isword(uae_s32 x)
119 {
120 return (x>=-32768 && x<=32767);
121 }
122
123 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
124 {
125 emit_byte(0x50+r);
126 }
127 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
128
129 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
130 {
131 emit_byte(0x58+r);
132 }
133 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
134
135 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
136 {
137 emit_byte(0x0f);
138 emit_byte(0xba);
139 emit_byte(0xe0+r);
140 emit_byte(i);
141 }
142 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
143
144 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
145 {
146 emit_byte(0x0f);
147 emit_byte(0xa3);
148 emit_byte(0xc0+8*b+r);
149 }
150 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
151
152 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
153 {
154 emit_byte(0x0f);
155 emit_byte(0xba);
156 emit_byte(0xf8+r);
157 emit_byte(i);
158 }
159 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
160
161 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
162 {
163 emit_byte(0x0f);
164 emit_byte(0xbb);
165 emit_byte(0xc0+8*b+r);
166 }
167 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
168
169
170 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
171 {
172 emit_byte(0x0f);
173 emit_byte(0xba);
174 emit_byte(0xf0+r);
175 emit_byte(i);
176 }
177 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
178
179 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
180 {
181 emit_byte(0x0f);
182 emit_byte(0xb3);
183 emit_byte(0xc0+8*b+r);
184 }
185 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
186
187 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
188 {
189 emit_byte(0x0f);
190 emit_byte(0xba);
191 emit_byte(0xe8+r);
192 emit_byte(i);
193 }
194 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
195
196 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
197 {
198 emit_byte(0x0f);
199 emit_byte(0xab);
200 emit_byte(0xc0+8*b+r);
201 }
202 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
203
204 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
205 {
206 emit_byte(0x66);
207 if (isbyte(i)) {
208 emit_byte(0x83);
209 emit_byte(0xe8+d);
210 emit_byte(i);
211 }
212 else {
213 if (optimize_accum && isaccum(d))
214 emit_byte(0x2d);
215 else {
216 emit_byte(0x81);
217 emit_byte(0xe8+d);
218 }
219 emit_word(i);
220 }
221 }
222 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
223
224
225 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
226 {
227 emit_byte(0x8b);
228 emit_byte(0x05+8*d);
229 emit_long(s);
230 }
231 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
232
233 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
234 {
235 emit_byte(0xc7);
236 emit_byte(0x05);
237 emit_long(d);
238 emit_long(s);
239 }
240 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
241
242 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
243 {
244 emit_byte(0x66);
245 emit_byte(0xc7);
246 emit_byte(0x05);
247 emit_long(d);
248 emit_word(s);
249 }
250 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
251
252 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
253 {
254 emit_byte(0xc6);
255 emit_byte(0x05);
256 emit_long(d);
257 emit_byte(s);
258 }
259 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
260
261 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
262 {
263 if (optimize_shift_once && (i == 1)) {
264 emit_byte(0xd0);
265 emit_byte(0x05);
266 emit_long(d);
267 }
268 else {
269 emit_byte(0xc0);
270 emit_byte(0x05);
271 emit_long(d);
272 emit_byte(i);
273 }
274 }
275 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
276
277 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
278 {
279 if (optimize_shift_once && (i == 1)) {
280 emit_byte(0xd0);
281 emit_byte(0xc0+r);
282 }
283 else {
284 emit_byte(0xc0);
285 emit_byte(0xc0+r);
286 emit_byte(i);
287 }
288 }
289 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
290
291 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
292 {
293 emit_byte(0x66);
294 emit_byte(0xc1);
295 emit_byte(0xc0+r);
296 emit_byte(i);
297 }
298 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
299
300 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
301 {
302 if (optimize_shift_once && (i == 1)) {
303 emit_byte(0xd1);
304 emit_byte(0xc0+r);
305 }
306 else {
307 emit_byte(0xc1);
308 emit_byte(0xc0+r);
309 emit_byte(i);
310 }
311 }
312 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
313
314 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
315 {
316 emit_byte(0xd3);
317 emit_byte(0xc0+d);
318 }
319 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
320
321 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
322 {
323 emit_byte(0x66);
324 emit_byte(0xd3);
325 emit_byte(0xc0+d);
326 }
327 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
328
329 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
330 {
331 emit_byte(0xd2);
332 emit_byte(0xc0+d);
333 }
334 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
335
336 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
337 {
338 emit_byte(0xd3);
339 emit_byte(0xe0+d);
340 }
341 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
342
343 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
344 {
345 emit_byte(0x66);
346 emit_byte(0xd3);
347 emit_byte(0xe0+d);
348 }
349 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
350
351 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
352 {
353 emit_byte(0xd2);
354 emit_byte(0xe0+d);
355 }
356 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
357
358 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
359 {
360 if (optimize_shift_once && (i == 1)) {
361 emit_byte(0xd0);
362 emit_byte(0xc8+r);
363 }
364 else {
365 emit_byte(0xc0);
366 emit_byte(0xc8+r);
367 emit_byte(i);
368 }
369 }
370 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
371
372 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
373 {
374 emit_byte(0x66);
375 emit_byte(0xc1);
376 emit_byte(0xc8+r);
377 emit_byte(i);
378 }
379 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
380
381 // gb-- used for making an fpcr value in compemu_fpp.cpp
382 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
383 {
384 emit_byte(0x0b);
385 emit_byte(0x05+8*d);
386 emit_long(s);
387 }
388 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
389
390 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
391 {
392 if (optimize_shift_once && (i == 1)) {
393 emit_byte(0xd1);
394 emit_byte(0xc8+r);
395 }
396 else {
397 emit_byte(0xc1);
398 emit_byte(0xc8+r);
399 emit_byte(i);
400 }
401 }
402 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
403
404 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
405 {
406 emit_byte(0xd3);
407 emit_byte(0xc8+d);
408 }
409 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
410
411 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
412 {
413 emit_byte(0x66);
414 emit_byte(0xd3);
415 emit_byte(0xc8+d);
416 }
417 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
418
419 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
420 {
421 emit_byte(0xd2);
422 emit_byte(0xc8+d);
423 }
424 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
425
426 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
427 {
428 emit_byte(0xd3);
429 emit_byte(0xe8+d);
430 }
431 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
432
433 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
434 {
435 emit_byte(0x66);
436 emit_byte(0xd3);
437 emit_byte(0xe8+d);
438 }
439 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
440
441 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
442 {
443 emit_byte(0xd2);
444 emit_byte(0xe8+d);
445 }
446 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
447
448 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
449 {
450 emit_byte(0xd3);
451 emit_byte(0xf8+d);
452 }
453 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
454
455 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
456 {
457 emit_byte(0x66);
458 emit_byte(0xd3);
459 emit_byte(0xf8+d);
460 }
461 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
462
463 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
464 {
465 emit_byte(0xd2);
466 emit_byte(0xf8+d);
467 }
468 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
469
470 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
471 {
472 if (optimize_shift_once && (i == 1)) {
473 emit_byte(0xd1);
474 emit_byte(0xe0+r);
475 }
476 else {
477 emit_byte(0xc1);
478 emit_byte(0xe0+r);
479 emit_byte(i);
480 }
481 }
482 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
483
484 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
485 {
486 emit_byte(0x66);
487 emit_byte(0xc1);
488 emit_byte(0xe0+r);
489 emit_byte(i);
490 }
491 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
492
493 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
494 {
495 if (optimize_shift_once && (i == 1)) {
496 emit_byte(0xd0);
497 emit_byte(0xe0+r);
498 }
499 else {
500 emit_byte(0xc0);
501 emit_byte(0xe0+r);
502 emit_byte(i);
503 }
504 }
505 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
506
507 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
508 {
509 if (optimize_shift_once && (i == 1)) {
510 emit_byte(0xd1);
511 emit_byte(0xe8+r);
512 }
513 else {
514 emit_byte(0xc1);
515 emit_byte(0xe8+r);
516 emit_byte(i);
517 }
518 }
519 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
520
521 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
522 {
523 emit_byte(0x66);
524 emit_byte(0xc1);
525 emit_byte(0xe8+r);
526 emit_byte(i);
527 }
528 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
529
530 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
531 {
532 if (optimize_shift_once && (i == 1)) {
533 emit_byte(0xd0);
534 emit_byte(0xe8+r);
535 }
536 else {
537 emit_byte(0xc0);
538 emit_byte(0xe8+r);
539 emit_byte(i);
540 }
541 }
542 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
543
544 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
545 {
546 if (optimize_shift_once && (i == 1)) {
547 emit_byte(0xd1);
548 emit_byte(0xf8+r);
549 }
550 else {
551 emit_byte(0xc1);
552 emit_byte(0xf8+r);
553 emit_byte(i);
554 }
555 }
556 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
557
558 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
559 {
560 emit_byte(0x66);
561 emit_byte(0xc1);
562 emit_byte(0xf8+r);
563 emit_byte(i);
564 }
565 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
566
567 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
568 {
569 if (optimize_shift_once && (i == 1)) {
570 emit_byte(0xd0);
571 emit_byte(0xf8+r);
572 }
573 else {
574 emit_byte(0xc0);
575 emit_byte(0xf8+r);
576 emit_byte(i);
577 }
578 }
579 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
580
581 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
582 {
583 emit_byte(0x9e);
584 }
585 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
586
587 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
588 {
589 emit_byte(0x0f);
590 emit_byte(0xa2);
591 }
592 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
593
594 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
595 {
596 emit_byte(0x9f);
597 }
598 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
599
600 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
601 {
602 emit_byte(0x0f);
603 emit_byte(0x90+cc);
604 emit_byte(0xc0+d);
605 }
606 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
607
608 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
609 {
610 emit_byte(0x0f);
611 emit_byte(0x90+cc);
612 emit_byte(0x05);
613 emit_long(d);
614 }
615 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
616
617 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
618 {
619 if (have_cmov) {
620 emit_byte(0x0f);
621 emit_byte(0x40+cc);
622 emit_byte(0xc0+8*d+s);
623 }
624 else { /* replacement using branch and mov */
625 int uncc=(cc^1);
626 emit_byte(0x70+uncc);
627 emit_byte(2); /* skip next 2 bytes if not cc=true */
628 emit_byte(0x89);
629 emit_byte(0xc0+8*s+d);
630 }
631 }
632 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
633
634 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
635 {
636 emit_byte(0x0f);
637 emit_byte(0xbc);
638 emit_byte(0xc0+8*d+s);
639 }
640 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
641
642 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
643 {
644 emit_byte(0x0f);
645 emit_byte(0xbf);
646 emit_byte(0xc0+8*d+s);
647 }
648 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
649
650 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
651 {
652 emit_byte(0x0f);
653 emit_byte(0xbe);
654 emit_byte(0xc0+8*d+s);
655 }
656 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
657
658 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
659 {
660 emit_byte(0x0f);
661 emit_byte(0xb7);
662 emit_byte(0xc0+8*d+s);
663 }
664 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
665
666 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
667 {
668 emit_byte(0x0f);
669 emit_byte(0xb6);
670 emit_byte(0xc0+8*d+s);
671 }
672 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
673
674 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
675 {
676 emit_byte(0x0f);
677 emit_byte(0xaf);
678 emit_byte(0xc0+8*d+s);
679 }
680 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
681
682 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
683 {
684 if (d!=MUL_NREG1 || s!=MUL_NREG2)
685 abort();
686 emit_byte(0xf7);
687 emit_byte(0xea);
688 }
689 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
690
691 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
692 {
693 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
694 printf("Bad register in MUL: d=%d, s=%d\n",d,s);
695 abort();
696 }
697 emit_byte(0xf7);
698 emit_byte(0xe2);
699 }
700 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
701
702 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
703 {
704 abort(); /* %^$&%^$%#^ x86! */
705 emit_byte(0x0f);
706 emit_byte(0xaf);
707 emit_byte(0xc0+8*d+s);
708 }
709 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
710
711 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
712 {
713 emit_byte(0x88);
714 emit_byte(0xc0+8*s+d);
715 }
716 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
717
718 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
719 {
720 emit_byte(0x66);
721 emit_byte(0x89);
722 emit_byte(0xc0+8*s+d);
723 }
724 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
725
726 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
727 {
728 int isebp=(baser==5)?0x40:0;
729 int fi;
730
731 switch(factor) {
732 case 1: fi=0; break;
733 case 2: fi=1; break;
734 case 4: fi=2; break;
735 case 8: fi=3; break;
736 default: abort();
737 }
738
739
740 emit_byte(0x8b);
741 emit_byte(0x04+8*d+isebp);
742 emit_byte(baser+8*index+0x40*fi);
743 if (isebp)
744 emit_byte(0x00);
745 }
746 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
747
748 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
749 {
750 int fi;
751 int isebp;
752
753 switch(factor) {
754 case 1: fi=0; break;
755 case 2: fi=1; break;
756 case 4: fi=2; break;
757 case 8: fi=3; break;
758 default: abort();
759 }
760 isebp=(baser==5)?0x40:0;
761
762 emit_byte(0x66);
763 emit_byte(0x8b);
764 emit_byte(0x04+8*d+isebp);
765 emit_byte(baser+8*index+0x40*fi);
766 if (isebp)
767 emit_byte(0x00);
768 }
769 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
770
771 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
772 {
773 int fi;
774 int isebp;
775
776 switch(factor) {
777 case 1: fi=0; break;
778 case 2: fi=1; break;
779 case 4: fi=2; break;
780 case 8: fi=3; break;
781 default: abort();
782 }
783 isebp=(baser==5)?0x40:0;
784
785 emit_byte(0x8a);
786 emit_byte(0x04+8*d+isebp);
787 emit_byte(baser+8*index+0x40*fi);
788 if (isebp)
789 emit_byte(0x00);
790 }
791 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
792
793 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
794 {
795 int fi;
796 int isebp;
797
798 switch(factor) {
799 case 1: fi=0; break;
800 case 2: fi=1; break;
801 case 4: fi=2; break;
802 case 8: fi=3; break;
803 default: abort();
804 }
805
806
807 isebp=(baser==5)?0x40:0;
808
809 emit_byte(0x89);
810 emit_byte(0x04+8*s+isebp);
811 emit_byte(baser+8*index+0x40*fi);
812 if (isebp)
813 emit_byte(0x00);
814 }
815 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
816
817 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
818 {
819 int fi;
820 int isebp;
821
822 switch(factor) {
823 case 1: fi=0; break;
824 case 2: fi=1; break;
825 case 4: fi=2; break;
826 case 8: fi=3; break;
827 default: abort();
828 }
829 isebp=(baser==5)?0x40:0;
830
831 emit_byte(0x66);
832 emit_byte(0x89);
833 emit_byte(0x04+8*s+isebp);
834 emit_byte(baser+8*index+0x40*fi);
835 if (isebp)
836 emit_byte(0x00);
837 }
838 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
839
840 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
841 {
842 int fi;
843 int isebp;
844
845 switch(factor) {
846 case 1: fi=0; break;
847 case 2: fi=1; break;
848 case 4: fi=2; break;
849 case 8: fi=3; break;
850 default: abort();
851 }
852 isebp=(baser==5)?0x40:0;
853
854 emit_byte(0x88);
855 emit_byte(0x04+8*s+isebp);
856 emit_byte(baser+8*index+0x40*fi);
857 if (isebp)
858 emit_byte(0x00);
859 }
860 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
861
862 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
863 {
864 int fi;
865
866 switch(factor) {
867 case 1: fi=0; break;
868 case 2: fi=1; break;
869 case 4: fi=2; break;
870 case 8: fi=3; break;
871 default: abort();
872 }
873
874 emit_byte(0x89);
875 emit_byte(0x84+8*s);
876 emit_byte(baser+8*index+0x40*fi);
877 emit_long(base);
878 }
879 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
880
881 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
882 {
883 int fi;
884
885 switch(factor) {
886 case 1: fi=0; break;
887 case 2: fi=1; break;
888 case 4: fi=2; break;
889 case 8: fi=3; break;
890 default: abort();
891 }
892
893 emit_byte(0x66);
894 emit_byte(0x89);
895 emit_byte(0x84+8*s);
896 emit_byte(baser+8*index+0x40*fi);
897 emit_long(base);
898 }
899 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
900
901 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
902 {
903 int fi;
904
905 switch(factor) {
906 case 1: fi=0; break;
907 case 2: fi=1; break;
908 case 4: fi=2; break;
909 case 8: fi=3; break;
910 default: abort();
911 }
912
913 emit_byte(0x88);
914 emit_byte(0x84+8*s);
915 emit_byte(baser+8*index+0x40*fi);
916 emit_long(base);
917 }
918 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
919
920 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
921 {
922 int fi;
923
924 switch(factor) {
925 case 1: fi=0; break;
926 case 2: fi=1; break;
927 case 4: fi=2; break;
928 case 8: fi=3; break;
929 default: abort();
930 }
931
932 emit_byte(0x8b);
933 emit_byte(0x84+8*d);
934 emit_byte(baser+8*index+0x40*fi);
935 emit_long(base);
936 }
937 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
938
939 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
940 {
941 int fi;
942
943 switch(factor) {
944 case 1: fi=0; break;
945 case 2: fi=1; break;
946 case 4: fi=2; break;
947 case 8: fi=3; break;
948 default: abort();
949 }
950
951 emit_byte(0x66);
952 emit_byte(0x8b);
953 emit_byte(0x84+8*d);
954 emit_byte(baser+8*index+0x40*fi);
955 emit_long(base);
956 }
957 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
958
959 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
960 {
961 int fi;
962
963 switch(factor) {
964 case 1: fi=0; break;
965 case 2: fi=1; break;
966 case 4: fi=2; break;
967 case 8: fi=3; break;
968 default: abort();
969 }
970
971 emit_byte(0x8a);
972 emit_byte(0x84+8*d);
973 emit_byte(baser+8*index+0x40*fi);
974 emit_long(base);
975 }
976 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
977
978 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
979 {
980 int fi;
981 switch(factor) {
982 case 1: fi=0; break;
983 case 2: fi=1; break;
984 case 4: fi=2; break;
985 case 8: fi=3; break;
986 default:
987 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
988 abort();
989 }
990 emit_byte(0x8b);
991 emit_byte(0x04+8*d);
992 emit_byte(0x05+8*index+64*fi);
993 emit_long(base);
994 }
995 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
996
997 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
998 {
999 int fi;
1000 switch(factor) {
1001 case 1: fi=0; break;
1002 case 2: fi=1; break;
1003 case 4: fi=2; break;
1004 case 8: fi=3; break;
1005 default:
1006 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1007 abort();
1008 }
1009 if (have_cmov) {
1010 emit_byte(0x0f);
1011 emit_byte(0x40+cond);
1012 emit_byte(0x04+8*d);
1013 emit_byte(0x05+8*index+64*fi);
1014 emit_long(base);
1015 }
1016 else { /* replacement using branch and mov */
1017 int uncc=(cond^1);
1018 emit_byte(0x70+uncc);
1019 emit_byte(7); /* skip next 7 bytes if not cc=true */
1020 emit_byte(0x8b);
1021 emit_byte(0x04+8*d);
1022 emit_byte(0x05+8*index+64*fi);
1023 emit_long(base);
1024 }
1025 }
1026 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1027
1028 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1029 {
1030 if (have_cmov) {
1031 emit_byte(0x0f);
1032 emit_byte(0x40+cond);
1033 emit_byte(0x05+8*d);
1034 emit_long(mem);
1035 }
1036 else { /* replacement using branch and mov */
1037 int uncc=(cond^1);
1038 emit_byte(0x70+uncc);
1039 emit_byte(6); /* skip next 6 bytes if not cc=true */
1040 emit_byte(0x8b);
1041 emit_byte(0x05+8*d);
1042 emit_long(mem);
1043 }
1044 }
1045 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1046
1047 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1048 {
1049 emit_byte(0x8b);
1050 emit_byte(0x40+8*d+s);
1051 emit_byte(offset);
1052 }
1053 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1054
1055 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1056 {
1057 emit_byte(0x66);
1058 emit_byte(0x8b);
1059 emit_byte(0x40+8*d+s);
1060 emit_byte(offset);
1061 }
1062 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1063
1064 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1065 {
1066 emit_byte(0x8a);
1067 emit_byte(0x40+8*d+s);
1068 emit_byte(offset);
1069 }
1070 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1071
1072 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1073 {
1074 emit_byte(0x8b);
1075 emit_byte(0x80+8*d+s);
1076 emit_long(offset);
1077 }
1078 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1079
1080 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1081 {
1082 emit_byte(0x66);
1083 emit_byte(0x8b);
1084 emit_byte(0x80+8*d+s);
1085 emit_long(offset);
1086 }
1087 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1088
1089 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1090 {
1091 emit_byte(0x8a);
1092 emit_byte(0x80+8*d+s);
1093 emit_long(offset);
1094 }
1095 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1096
1097 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1098 {
1099 emit_byte(0xc7);
1100 emit_byte(0x40+d);
1101 emit_byte(offset);
1102 emit_long(i);
1103 }
1104 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1105
1106 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1107 {
1108 emit_byte(0x66);
1109 emit_byte(0xc7);
1110 emit_byte(0x40+d);
1111 emit_byte(offset);
1112 emit_word(i);
1113 }
1114 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1115
1116 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1117 {
1118 emit_byte(0xc6);
1119 emit_byte(0x40+d);
1120 emit_byte(offset);
1121 emit_byte(i);
1122 }
1123 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1124
1125 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1126 {
1127 emit_byte(0x89);
1128 emit_byte(0x40+8*s+d);
1129 emit_byte(offset);
1130 }
1131 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1132
1133 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1134 {
1135 emit_byte(0x66);
1136 emit_byte(0x89);
1137 emit_byte(0x40+8*s+d);
1138 emit_byte(offset);
1139 }
1140 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1141
1142 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1143 {
1144 emit_byte(0x88);
1145 emit_byte(0x40+8*s+d);
1146 emit_byte(offset);
1147 }
1148 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1149
1150 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1151 {
1152 if (optimize_imm8 && isbyte(offset)) {
1153 emit_byte(0x8d);
1154 emit_byte(0x40+8*d+s);
1155 emit_byte(offset);
1156 }
1157 else {
1158 emit_byte(0x8d);
1159 emit_byte(0x80+8*d+s);
1160 emit_long(offset);
1161 }
1162 }
1163 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1164
1165 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1166 {
1167 int fi;
1168
1169 switch(factor) {
1170 case 1: fi=0; break;
1171 case 2: fi=1; break;
1172 case 4: fi=2; break;
1173 case 8: fi=3; break;
1174 default: abort();
1175 }
1176
1177 if (optimize_imm8 && isbyte(offset)) {
1178 emit_byte(0x8d);
1179 emit_byte(0x44+8*d);
1180 emit_byte(0x40*fi+8*index+s);
1181 emit_byte(offset);
1182 }
1183 else {
1184 emit_byte(0x8d);
1185 emit_byte(0x84+8*d);
1186 emit_byte(0x40*fi+8*index+s);
1187 emit_long(offset);
1188 }
1189 }
1190 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1191
1192 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1193 {
1194 int isebp=(s==5)?0x40:0;
1195 int fi;
1196
1197 switch(factor) {
1198 case 1: fi=0; break;
1199 case 2: fi=1; break;
1200 case 4: fi=2; break;
1201 case 8: fi=3; break;
1202 default: abort();
1203 }
1204
1205 emit_byte(0x8d);
1206 emit_byte(0x04+8*d+isebp);
1207 emit_byte(0x40*fi+8*index+s);
1208 if (isebp)
1209 emit_byte(0);
1210 }
1211 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1212
1213 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1214 {
1215 if (optimize_imm8 && isbyte(offset)) {
1216 emit_byte(0x89);
1217 emit_byte(0x40+8*s+d);
1218 emit_byte(offset);
1219 }
1220 else {
1221 emit_byte(0x89);
1222 emit_byte(0x80+8*s+d);
1223 emit_long(offset);
1224 }
1225 }
1226 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1227
1228 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1229 {
1230 emit_byte(0x66);
1231 emit_byte(0x89);
1232 emit_byte(0x80+8*s+d);
1233 emit_long(offset);
1234 }
1235 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1236
1237 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1238 {
1239 if (optimize_imm8 && isbyte(offset)) {
1240 emit_byte(0x88);
1241 emit_byte(0x40+8*s+d);
1242 emit_byte(offset);
1243 }
1244 else {
1245 emit_byte(0x88);
1246 emit_byte(0x80+8*s+d);
1247 emit_long(offset);
1248 }
1249 }
1250 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1251
1252 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1253 {
1254 emit_byte(0x0f);
1255 emit_byte(0xc8+r);
1256 }
1257 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1258
1259 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1260 {
1261 emit_byte(0x66);
1262 emit_byte(0xc1);
1263 emit_byte(0xc0+r);
1264 emit_byte(0x08);
1265 }
1266 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1267
1268 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1269 {
1270 emit_byte(0x89);
1271 emit_byte(0xc0+8*s+d);
1272 }
1273 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1274
1275 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1276 {
1277 emit_byte(0x89);
1278 emit_byte(0x05+8*s);
1279 emit_long(d);
1280 }
1281 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1282
1283 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1284 {
1285 emit_byte(0x66);
1286 emit_byte(0x89);
1287 emit_byte(0x05+8*s);
1288 emit_long(d);
1289 }
1290 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1291
1292 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1293 {
1294 emit_byte(0x66);
1295 emit_byte(0x8b);
1296 emit_byte(0x05+8*d);
1297 emit_long(s);
1298 }
1299 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1300
1301 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1302 {
1303 emit_byte(0x88);
1304 emit_byte(0x05+8*s);
1305 emit_long(d);
1306 }
1307 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1308
1309 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1310 {
1311 emit_byte(0x8a);
1312 emit_byte(0x05+8*d);
1313 emit_long(s);
1314 }
1315 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1316
1317 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1318 {
1319 emit_byte(0xb8+d);
1320 emit_long(s);
1321 }
1322 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1323
1324 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1325 {
1326 emit_byte(0x66);
1327 emit_byte(0xb8+d);
1328 emit_word(s);
1329 }
1330 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1331
1332 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1333 {
1334 emit_byte(0xb0+d);
1335 emit_byte(s);
1336 }
1337 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1338
1339 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1340 {
1341 emit_byte(0x81);
1342 emit_byte(0x15);
1343 emit_long(d);
1344 emit_long(s);
1345 }
1346 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1347
1348 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1349 {
1350 if (optimize_imm8 && isbyte(s)) {
1351 emit_byte(0x83);
1352 emit_byte(0x05);
1353 emit_long(d);
1354 emit_byte(s);
1355 }
1356 else {
1357 emit_byte(0x81);
1358 emit_byte(0x05);
1359 emit_long(d);
1360 emit_long(s);
1361 }
1362 }
1363 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1364
1365 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1366 {
1367 emit_byte(0x66);
1368 emit_byte(0x81);
1369 emit_byte(0x05);
1370 emit_long(d);
1371 emit_word(s);
1372 }
1373 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1374
1375 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1376 {
1377 emit_byte(0x80);
1378 emit_byte(0x05);
1379 emit_long(d);
1380 emit_byte(s);
1381 }
1382 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1383
1384 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1385 {
1386 if (optimize_accum && isaccum(d))
1387 emit_byte(0xa9);
1388 else {
1389 emit_byte(0xf7);
1390 emit_byte(0xc0+d);
1391 }
1392 emit_long(i);
1393 }
1394 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1395
1396 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1397 {
1398 emit_byte(0x85);
1399 emit_byte(0xc0+8*s+d);
1400 }
1401 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1402
1403 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1404 {
1405 emit_byte(0x66);
1406 emit_byte(0x85);
1407 emit_byte(0xc0+8*s+d);
1408 }
1409 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1410
1411 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1412 {
1413 emit_byte(0x84);
1414 emit_byte(0xc0+8*s+d);
1415 }
1416 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1417
1418 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1419 {
1420 if (optimize_imm8 && isbyte(i)) {
1421 emit_byte(0x83);
1422 emit_byte(0xe0+d);
1423 emit_byte(i);
1424 }
1425 else {
1426 if (optimize_accum && isaccum(d))
1427 emit_byte(0x25);
1428 else {
1429 emit_byte(0x81);
1430 emit_byte(0xe0+d);
1431 }
1432 emit_long(i);
1433 }
1434 }
1435 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1436
1437 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1438 {
1439 emit_byte(0x66);
1440 if (optimize_imm8 && isbyte(i)) {
1441 emit_byte(0x83);
1442 emit_byte(0xe0+d);
1443 emit_byte(i);
1444 }
1445 else {
1446 if (optimize_accum && isaccum(d))
1447 emit_byte(0x25);
1448 else {
1449 emit_byte(0x81);
1450 emit_byte(0xe0+d);
1451 }
1452 emit_word(i);
1453 }
1454 }
1455 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1456
1457 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1458 {
1459 emit_byte(0x21);
1460 emit_byte(0xc0+8*s+d);
1461 }
1462 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1463
1464 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1465 {
1466 emit_byte(0x66);
1467 emit_byte(0x21);
1468 emit_byte(0xc0+8*s+d);
1469 }
1470 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1471
1472 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1473 {
1474 emit_byte(0x20);
1475 emit_byte(0xc0+8*s+d);
1476 }
1477 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1478
1479 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1480 {
1481 if (optimize_imm8 && isbyte(i)) {
1482 emit_byte(0x83);
1483 emit_byte(0xc8+d);
1484 emit_byte(i);
1485 }
1486 else {
1487 if (optimize_accum && isaccum(d))
1488 emit_byte(0x0d);
1489 else {
1490 emit_byte(0x81);
1491 emit_byte(0xc8+d);
1492 }
1493 emit_long(i);
1494 }
1495 }
1496 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1497
1498 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1499 {
1500 emit_byte(0x09);
1501 emit_byte(0xc0+8*s+d);
1502 }
1503 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1504
1505 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1506 {
1507 emit_byte(0x66);
1508 emit_byte(0x09);
1509 emit_byte(0xc0+8*s+d);
1510 }
1511 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1512
1513 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1514 {
1515 emit_byte(0x08);
1516 emit_byte(0xc0+8*s+d);
1517 }
1518 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1519
1520 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1521 {
1522 emit_byte(0x11);
1523 emit_byte(0xc0+8*s+d);
1524 }
1525 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1526
1527 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1528 {
1529 emit_byte(0x66);
1530 emit_byte(0x11);
1531 emit_byte(0xc0+8*s+d);
1532 }
1533 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1534
1535 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1536 {
1537 emit_byte(0x10);
1538 emit_byte(0xc0+8*s+d);
1539 }
1540 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1541
1542 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1543 {
1544 emit_byte(0x01);
1545 emit_byte(0xc0+8*s+d);
1546 }
1547 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1548
1549 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1550 {
1551 emit_byte(0x66);
1552 emit_byte(0x01);
1553 emit_byte(0xc0+8*s+d);
1554 }
1555 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1556
1557 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1558 {
1559 emit_byte(0x00);
1560 emit_byte(0xc0+8*s+d);
1561 }
1562 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1563
1564 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1565 {
1566 if (isbyte(i)) {
1567 emit_byte(0x83);
1568 emit_byte(0xe8+d);
1569 emit_byte(i);
1570 }
1571 else {
1572 if (optimize_accum && isaccum(d))
1573 emit_byte(0x2d);
1574 else {
1575 emit_byte(0x81);
1576 emit_byte(0xe8+d);
1577 }
1578 emit_long(i);
1579 }
1580 }
1581 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1582
1583 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1584 {
1585 if (optimize_accum && isaccum(d))
1586 emit_byte(0x2c);
1587 else {
1588 emit_byte(0x80);
1589 emit_byte(0xe8+d);
1590 }
1591 emit_byte(i);
1592 }
1593 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1594
1595 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1596 {
1597 if (isbyte(i)) {
1598 emit_byte(0x83);
1599 emit_byte(0xc0+d);
1600 emit_byte(i);
1601 }
1602 else {
1603 if (optimize_accum && isaccum(d))
1604 emit_byte(0x05);
1605 else {
1606 emit_byte(0x81);
1607 emit_byte(0xc0+d);
1608 }
1609 emit_long(i);
1610 }
1611 }
1612 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1613
1614 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1615 {
1616 emit_byte(0x66);
1617 if (isbyte(i)) {
1618 emit_byte(0x83);
1619 emit_byte(0xc0+d);
1620 emit_byte(i);
1621 }
1622 else {
1623 if (optimize_accum && isaccum(d))
1624 emit_byte(0x05);
1625 else {
1626 emit_byte(0x81);
1627 emit_byte(0xc0+d);
1628 }
1629 emit_word(i);
1630 }
1631 }
1632 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1633
1634 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1635 {
1636 if (optimize_accum && isaccum(d))
1637 emit_byte(0x04);
1638 else {
1639 emit_byte(0x80);
1640 emit_byte(0xc0+d);
1641 }
1642 emit_byte(i);
1643 }
1644 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1645
1646 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1647 {
1648 emit_byte(0x19);
1649 emit_byte(0xc0+8*s+d);
1650 }
1651 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1652
1653 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1654 {
1655 emit_byte(0x66);
1656 emit_byte(0x19);
1657 emit_byte(0xc0+8*s+d);
1658 }
1659 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1660
1661 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1662 {
1663 emit_byte(0x18);
1664 emit_byte(0xc0+8*s+d);
1665 }
1666 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1667
1668 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1669 {
1670 emit_byte(0x29);
1671 emit_byte(0xc0+8*s+d);
1672 }
1673 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1674
1675 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1676 {
1677 emit_byte(0x66);
1678 emit_byte(0x29);
1679 emit_byte(0xc0+8*s+d);
1680 }
1681 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1682
1683 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1684 {
1685 emit_byte(0x28);
1686 emit_byte(0xc0+8*s+d);
1687 }
1688 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1689
1690 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1691 {
1692 emit_byte(0x39);
1693 emit_byte(0xc0+8*s+d);
1694 }
1695 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1696
1697 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1698 {
1699 if (optimize_imm8 && isbyte(i)) {
1700 emit_byte(0x83);
1701 emit_byte(0xf8+r);
1702 emit_byte(i);
1703 }
1704 else {
1705 if (optimize_accum && isaccum(r))
1706 emit_byte(0x3d);
1707 else {
1708 emit_byte(0x81);
1709 emit_byte(0xf8+r);
1710 }
1711 emit_long(i);
1712 }
1713 }
1714 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1715
1716 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1717 {
1718 emit_byte(0x66);
1719 emit_byte(0x39);
1720 emit_byte(0xc0+8*s+d);
1721 }
1722 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1723
1724 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1725 {
1726 emit_byte(0x80);
1727 emit_byte(0x3d);
1728 emit_long(d);
1729 emit_byte(s);
1730 }
1731 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1732
1733 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1734 {
1735 if (optimize_accum && isaccum(d))
1736 emit_byte(0x3c);
1737 else {
1738 emit_byte(0x80);
1739 emit_byte(0xf8+d);
1740 }
1741 emit_byte(i);
1742 }
1743 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1744
1745 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1746 {
1747 emit_byte(0x38);
1748 emit_byte(0xc0+8*s+d);
1749 }
1750 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1751
1752 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1753 {
1754 int fi;
1755
1756 switch(factor) {
1757 case 1: fi=0; break;
1758 case 2: fi=1; break;
1759 case 4: fi=2; break;
1760 case 8: fi=3; break;
1761 default: abort();
1762 }
1763 emit_byte(0x39);
1764 emit_byte(0x04+8*d);
1765 emit_byte(5+8*index+0x40*fi);
1766 emit_long(offset);
1767 }
1768 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1769
1770 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1771 {
1772 emit_byte(0x31);
1773 emit_byte(0xc0+8*s+d);
1774 }
1775 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1776
1777 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1778 {
1779 emit_byte(0x66);
1780 emit_byte(0x31);
1781 emit_byte(0xc0+8*s+d);
1782 }
1783 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1784
1785 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1786 {
1787 emit_byte(0x30);
1788 emit_byte(0xc0+8*s+d);
1789 }
1790 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1791
1792 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1793 {
1794 if (optimize_imm8 && isbyte(s)) {
1795 emit_byte(0x83);
1796 emit_byte(0x2d);
1797 emit_long(d);
1798 emit_byte(s);
1799 }
1800 else {
1801 emit_byte(0x81);
1802 emit_byte(0x2d);
1803 emit_long(d);
1804 emit_long(s);
1805 }
1806 }
1807 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1808
1809 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1810 {
1811 if (optimize_imm8 && isbyte(s)) {
1812 emit_byte(0x83);
1813 emit_byte(0x3d);
1814 emit_long(d);
1815 emit_byte(s);
1816 }
1817 else {
1818 emit_byte(0x81);
1819 emit_byte(0x3d);
1820 emit_long(d);
1821 emit_long(s);
1822 }
1823 }
1824 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1825
1826 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1827 {
1828 emit_byte(0x87);
1829 emit_byte(0xc0+8*r1+r2);
1830 }
1831 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1832
1833 /*************************************************************************
1834 * FIXME: string-related instructions *
1835 *************************************************************************/
1836
1837 LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1838 {
1839 emit_byte(0xfc);
1840 }
1841 LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1842
1843 LOWFUNC(WRITE,NONE,0,raw_std,(void))
1844 {
1845 emit_byte(0xfd);
1846 }
1847 LENDFUNC(WRITE,NONE,0,raw_std,(void))
1848
1849 LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1850 {
1851 emit_byte(0xa4);
1852 }
1853 LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1854
1855 LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1856 {
1857 emit_byte(0xa5);
1858 }
1859 LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1860
1861 LOWFUNC(NONE,RMW,0,raw_rep,(void))
1862 {
1863 emit_byte(0xf3);
1864 }
1865 LENDFUNC(NONE,RMW,0,raw_rep,(void))
1866
1867 LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1868 {
1869 raw_rep();
1870 raw_movs_b();
1871 }
1872 LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1873
1874 LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1875 {
1876 raw_rep();
1877 raw_movs_l();
1878 }
1879 LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1880
1881 /*************************************************************************
1882 * FIXME: mem access modes probably wrong *
1883 *************************************************************************/
1884
1885 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1886 {
1887 emit_byte(0x9c);
1888 }
1889 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1890
1891 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1892 {
1893 emit_byte(0x9d);
1894 }
1895 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1896
1897 /*************************************************************************
1898 * Unoptimizable stuff --- jump *
1899 *************************************************************************/
1900
1901 static __inline__ void raw_call_r(R4 r)
1902 {
1903 emit_byte(0xff);
1904 emit_byte(0xd0+r);
1905 }
1906
1907 static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1908 {
1909 int mu;
1910 switch(m) {
1911 case 1: mu=0; break;
1912 case 2: mu=1; break;
1913 case 4: mu=2; break;
1914 case 8: mu=3; break;
1915 default: abort();
1916 }
1917 emit_byte(0xff);
1918 emit_byte(0x14);
1919 emit_byte(0x05+8*r+0x40*mu);
1920 emit_long(base);
1921 }
1922
1923 static __inline__ void raw_jmp_r(R4 r)
1924 {
1925 emit_byte(0xff);
1926 emit_byte(0xe0+r);
1927 }
1928
1929 static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1930 {
1931 int mu;
1932 switch(m) {
1933 case 1: mu=0; break;
1934 case 2: mu=1; break;
1935 case 4: mu=2; break;
1936 case 8: mu=3; break;
1937 default: abort();
1938 }
1939 emit_byte(0xff);
1940 emit_byte(0x24);
1941 emit_byte(0x05+8*r+0x40*mu);
1942 emit_long(base);
1943 }
1944
1945 static __inline__ void raw_jmp_m(uae_u32 base)
1946 {
1947 emit_byte(0xff);
1948 emit_byte(0x25);
1949 emit_long(base);
1950 }
1951
1952
1953 static __inline__ void raw_call(uae_u32 t)
1954 {
1955 emit_byte(0xe8);
1956 emit_long(t-(uae_u32)target-4);
1957 }
1958
1959 static __inline__ void raw_jmp(uae_u32 t)
1960 {
1961 emit_byte(0xe9);
1962 emit_long(t-(uae_u32)target-4);
1963 }
1964
1965 static __inline__ void raw_jl(uae_u32 t)
1966 {
1967 emit_byte(0x0f);
1968 emit_byte(0x8c);
1969 emit_long(t-(uae_u32)target-4);
1970 }
1971
1972 static __inline__ void raw_jz(uae_u32 t)
1973 {
1974 emit_byte(0x0f);
1975 emit_byte(0x84);
1976 emit_long(t-(uae_u32)target-4);
1977 }
1978
1979 static __inline__ void raw_jnz(uae_u32 t)
1980 {
1981 emit_byte(0x0f);
1982 emit_byte(0x85);
1983 emit_long(t-(uae_u32)target-4);
1984 }
1985
1986 static __inline__ void raw_jnz_l_oponly(void)
1987 {
1988 emit_byte(0x0f);
1989 emit_byte(0x85);
1990 }
1991
1992 static __inline__ void raw_jcc_l_oponly(int cc)
1993 {
1994 emit_byte(0x0f);
1995 emit_byte(0x80+cc);
1996 }
1997
1998 static __inline__ void raw_jnz_b_oponly(void)
1999 {
2000 emit_byte(0x75);
2001 }
2002
2003 static __inline__ void raw_jz_b_oponly(void)
2004 {
2005 emit_byte(0x74);
2006 }
2007
2008 static __inline__ void raw_jcc_b_oponly(int cc)
2009 {
2010 emit_byte(0x70+cc);
2011 }
2012
2013 static __inline__ void raw_jmp_l_oponly(void)
2014 {
2015 emit_byte(0xe9);
2016 }
2017
2018 static __inline__ void raw_jmp_b_oponly(void)
2019 {
2020 emit_byte(0xeb);
2021 }
2022
2023 static __inline__ void raw_ret(void)
2024 {
2025 emit_byte(0xc3);
2026 }
2027
2028 static __inline__ void raw_nop(void)
2029 {
2030 emit_byte(0x90);
2031 }
2032
2033
2034 /*************************************************************************
2035 * Flag handling, to and fro UAE flag register *
2036 *************************************************************************/
2037
2038 #ifdef SAHF_SETO_PROFITABLE
2039
2040 #define FLAG_NREG1 0 /* Set to -1 if any register will do */
2041
2042 static __inline__ void raw_flags_to_reg(int r)
2043 {
2044 raw_lahf(0); /* Most flags in AH */
2045 //raw_setcc(r,0); /* V flag in AL */
2046 raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2047
2048 #if 1 /* Let's avoid those nasty partial register stalls */
2049 //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2050 raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
2051 //live.state[FLAGTMP].status=CLEAN;
2052 live.state[FLAGTMP].status=INMEM;
2053 live.state[FLAGTMP].realreg=-1;
2054 /* We just "evicted" FLAGTMP. */
2055 if (live.nat[r].nholds!=1) {
2056 /* Huh? */
2057 abort();
2058 }
2059 live.nat[r].nholds=0;
2060 #endif
2061 }
2062
2063 #define FLAG_NREG2 0 /* Set to -1 if any register will do */
2064 static __inline__ void raw_reg_to_flags(int r)
2065 {
2066 raw_cmp_b_ri(r,-127); /* set V */
2067 raw_sahf(0);
2068 }
2069
2070 #else
2071
2072 #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
2073 static __inline__ void raw_flags_to_reg(int r)
2074 {
2075 raw_pushfl();
2076 raw_pop_l_r(r);
2077 raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
2078 // live.state[FLAGTMP].status=CLEAN;
2079 live.state[FLAGTMP].status=INMEM;
2080 live.state[FLAGTMP].realreg=-1;
2081 /* We just "evicted" FLAGTMP. */
2082 if (live.nat[r].nholds!=1) {
2083 /* Huh? */
2084 abort();
2085 }
2086 live.nat[r].nholds=0;
2087 }
2088
2089 #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2090 static __inline__ void raw_reg_to_flags(int r)
2091 {
2092 raw_push_l_r(r);
2093 raw_popfl();
2094 }
2095
2096 #endif
2097
2098 /* Apparently, there are enough instructions between flag store and
2099 flag reload to avoid the partial memory stall */
2100 static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2101 {
2102 #if 1
2103 raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2104 #else
2105 raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2106 raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2107 #endif
2108 }
2109
2110 /* FLAGX is byte sized, and we *do* write it at that size */
2111 static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2112 {
2113 if (live.nat[target].canbyte)
2114 raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2115 else if (live.nat[target].canword)
2116 raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2117 else
2118 raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2119 }
2120
2121
2122 static __inline__ void raw_inc_sp(int off)
2123 {
2124 raw_add_l_ri(ESP_INDEX,off);
2125 }
2126
2127 /*************************************************************************
2128 * Handling mistaken direct memory access *
2129 *************************************************************************/
2130
2131 // gb-- I don't need that part for JIT Basilisk II
2132 #if defined(NATMEM_OFFSET) && 0
2133 #include <asm/sigcontext.h>
2134 #include <signal.h>
2135
2136 #define SIG_READ 1
2137 #define SIG_WRITE 2
2138
2139 static int in_handler=0;
2140 static uae_u8 veccode[256];
2141
2142 static void vec(int x, struct sigcontext sc)
2143 {
2144 uae_u8* i=(uae_u8*)sc.eip;
2145 uae_u32 addr=sc.cr2;
2146 int r=-1;
2147 int size=4;
2148 int dir=-1;
2149 int len=0;
2150 int j;
2151
2152 write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2153 if (!canbang)
2154 write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2155 if (in_handler)
2156 write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2157
2158 if (canbang && i>=compiled_code && i<=current_compile_p) {
2159 if (*i==0x66) {
2160 i++;
2161 size=2;
2162 len++;
2163 }
2164
2165 switch(i[0]) {
2166 case 0x8a:
2167 if ((i[1]&0xc0)==0x80) {
2168 r=(i[1]>>3)&7;
2169 dir=SIG_READ;
2170 size=1;
2171 len+=6;
2172 break;
2173 }
2174 break;
2175 case 0x88:
2176 if ((i[1]&0xc0)==0x80) {
2177 r=(i[1]>>3)&7;
2178 dir=SIG_WRITE;
2179 size=1;
2180 len+=6;
2181 break;
2182 }
2183 break;
2184 case 0x8b:
2185 if ((i[1]&0xc0)==0x80) {
2186 r=(i[1]>>3)&7;
2187 dir=SIG_READ;
2188 len+=6;
2189 break;
2190 }
2191 if ((i[1]&0xc0)==0x40) {
2192 r=(i[1]>>3)&7;
2193 dir=SIG_READ;
2194 len+=3;
2195 break;
2196 }
2197 break;
2198 case 0x89:
2199 if ((i[1]&0xc0)==0x80) {
2200 r=(i[1]>>3)&7;
2201 dir=SIG_WRITE;
2202 len+=6;
2203 break;
2204 }
2205 if ((i[1]&0xc0)==0x40) {
2206 r=(i[1]>>3)&7;
2207 dir=SIG_WRITE;
2208 len+=3;
2209 break;
2210 }
2211 break;
2212 }
2213 }
2214
2215 if (r!=-1) {
2216 void* pr=NULL;
2217 write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2218
2219 switch(r) {
2220 case 0: pr=&(sc.eax); break;
2221 case 1: pr=&(sc.ecx); break;
2222 case 2: pr=&(sc.edx); break;
2223 case 3: pr=&(sc.ebx); break;
2224 case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2225 case 5: pr=(size>1)?
2226 (void*)(&(sc.ebp)):
2227 (void*)(((uae_u8*)&(sc.ecx))+1); break;
2228 case 6: pr=(size>1)?
2229 (void*)(&(sc.esi)):
2230 (void*)(((uae_u8*)&(sc.edx))+1); break;
2231 case 7: pr=(size>1)?
2232 (void*)(&(sc.edi)):
2233 (void*)(((uae_u8*)&(sc.ebx))+1); break;
2234 default: abort();
2235 }
2236 if (pr) {
2237 blockinfo* bi;
2238
2239 if (currprefs.comp_oldsegv) {
2240 addr-=NATMEM_OFFSET;
2241
2242 if ((addr>=0x10000000 && addr<0x40000000) ||
2243 (addr>=0x50000000)) {
2244 write_log("Suspicious address in %x SEGV handler.\n",addr);
2245 }
2246 if (dir==SIG_READ) {
2247 switch(size) {
2248 case 1: *((uae_u8*)pr)=get_byte(addr); break;
2249 case 2: *((uae_u16*)pr)=get_word(addr); break;
2250 case 4: *((uae_u32*)pr)=get_long(addr); break;
2251 default: abort();
2252 }
2253 }
2254 else { /* write */
2255 switch(size) {
2256 case 1: put_byte(addr,*((uae_u8*)pr)); break;
2257 case 2: put_word(addr,*((uae_u16*)pr)); break;
2258 case 4: put_long(addr,*((uae_u32*)pr)); break;
2259 default: abort();
2260 }
2261 }
2262 write_log("Handled one access!\n");
2263 fflush(stdout);
2264 segvcount++;
2265 sc.eip+=len;
2266 }
2267 else {
2268 void* tmp=target;
2269 int i;
2270 uae_u8 vecbuf[5];
2271
2272 addr-=NATMEM_OFFSET;
2273
2274 if ((addr>=0x10000000 && addr<0x40000000) ||
2275 (addr>=0x50000000)) {
2276 write_log("Suspicious address in %x SEGV handler.\n",addr);
2277 }
2278
2279 target=(uae_u8*)sc.eip;
2280 for (i=0;i<5;i++)
2281 vecbuf[i]=target[i];
2282 emit_byte(0xe9);
2283 emit_long((uae_u32)veccode-(uae_u32)target-4);
2284 write_log("Create jump to %p\n",veccode);
2285
2286 write_log("Handled one access!\n");
2287 fflush(stdout);
2288 segvcount++;
2289
2290 target=veccode;
2291
2292 if (dir==SIG_READ) {
2293 switch(size) {
2294 case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2295 case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2296 case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2297 default: abort();
2298 }
2299 }
2300 else { /* write */
2301 switch(size) {
2302 case 1: put_byte(addr,*((uae_u8*)pr)); break;
2303 case 2: put_word(addr,*((uae_u16*)pr)); break;
2304 case 4: put_long(addr,*((uae_u32*)pr)); break;
2305 default: abort();
2306 }
2307 }
2308 for (i=0;i<5;i++)
2309 raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2310 raw_mov_l_mi((uae_u32)&in_handler,0);
2311 emit_byte(0xe9);
2312 emit_long(sc.eip+len-(uae_u32)target-4);
2313 in_handler=1;
2314 target=tmp;
2315 }
2316 bi=active;
2317 while (bi) {
2318 if (bi->handler &&
2319 (uae_u8*)bi->direct_handler<=i &&
2320 (uae_u8*)bi->nexthandler>i) {
2321 write_log("deleted trigger (%p<%p<%p) %p\n",
2322 bi->handler,
2323 i,
2324 bi->nexthandler,
2325 bi->pc_p);
2326 invalidate_block(bi);
2327 raise_in_cl_list(bi);
2328 set_special(0);
2329 return;
2330 }
2331 bi=bi->next;
2332 }
2333 /* Not found in the active list. Might be a rom routine that
2334 is in the dormant list */
2335 bi=dormant;
2336 while (bi) {
2337 if (bi->handler &&
2338 (uae_u8*)bi->direct_handler<=i &&
2339 (uae_u8*)bi->nexthandler>i) {
2340 write_log("deleted trigger (%p<%p<%p) %p\n",
2341 bi->handler,
2342 i,
2343 bi->nexthandler,
2344 bi->pc_p);
2345 invalidate_block(bi);
2346 raise_in_cl_list(bi);
2347 set_special(0);
2348 return;
2349 }
2350 bi=bi->next;
2351 }
2352 write_log("Huh? Could not find trigger!\n");
2353 return;
2354 }
2355 }
2356 write_log("Can't handle access!\n");
2357 for (j=0;j<10;j++) {
2358 write_log("instruction byte %2d is %02x\n",j,i[j]);
2359 }
2360 write_log("Please send the above info (starting at \"fault address\") to\n"
2361 "bmeyer@csse.monash.edu.au\n"
2362 "This shouldn't happen ;-)\n");
2363 fflush(stdout);
2364 signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2365 }
2366 #endif
2367
2368
2369 /*************************************************************************
2370 * Checking for CPU features *
2371 *************************************************************************/
2372
2373 struct cpuinfo_x86 {
2374 uae_u8 x86; // CPU family
2375 uae_u8 x86_vendor; // CPU vendor
2376 uae_u8 x86_processor; // CPU canonical processor type
2377 uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
2378 uae_u32 x86_hwcap;
2379 uae_u8 x86_model;
2380 uae_u8 x86_mask;
2381 int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
2382 char x86_vendor_id[16];
2383 };
2384 struct cpuinfo_x86 cpuinfo;
2385
2386 enum {
2387 X86_VENDOR_INTEL = 0,
2388 X86_VENDOR_CYRIX = 1,
2389 X86_VENDOR_AMD = 2,
2390 X86_VENDOR_UMC = 3,
2391 X86_VENDOR_NEXGEN = 4,
2392 X86_VENDOR_CENTAUR = 5,
2393 X86_VENDOR_RISE = 6,
2394 X86_VENDOR_TRANSMETA = 7,
2395 X86_VENDOR_NSC = 8,
2396 X86_VENDOR_UNKNOWN = 0xff
2397 };
2398
2399 enum {
2400 X86_PROCESSOR_I386, /* 80386 */
2401 X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
2402 X86_PROCESSOR_PENTIUM,
2403 X86_PROCESSOR_PENTIUMPRO,
2404 X86_PROCESSOR_K6,
2405 X86_PROCESSOR_ATHLON,
2406 X86_PROCESSOR_PENTIUM4,
2407 X86_PROCESSOR_max
2408 };
2409
2410 static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2411 "80386",
2412 "80486",
2413 "Pentium",
2414 "PentiumPro",
2415 "K6",
2416 "Athlon",
2417 "Pentium4"
2418 };
2419
2420 static struct ptt {
2421 const int align_loop;
2422 const int align_loop_max_skip;
2423 const int align_jump;
2424 const int align_jump_max_skip;
2425 const int align_func;
2426 }
2427 x86_alignments[X86_PROCESSOR_max] = {
2428 { 4, 3, 4, 3, 4 },
2429 { 16, 15, 16, 15, 16 },
2430 { 16, 7, 16, 7, 16 },
2431 { 16, 15, 16, 7, 16 },
2432 { 32, 7, 32, 7, 32 },
2433 { 16, 7, 16, 7, 16 },
2434 { 0, 0, 0, 0, 0 }
2435 };
2436
2437 static void
2438 x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2439 {
2440 char *v = c->x86_vendor_id;
2441
2442 if (!strcmp(v, "GenuineIntel"))
2443 c->x86_vendor = X86_VENDOR_INTEL;
2444 else if (!strcmp(v, "AuthenticAMD"))
2445 c->x86_vendor = X86_VENDOR_AMD;
2446 else if (!strcmp(v, "CyrixInstead"))
2447 c->x86_vendor = X86_VENDOR_CYRIX;
2448 else if (!strcmp(v, "Geode by NSC"))
2449 c->x86_vendor = X86_VENDOR_NSC;
2450 else if (!strcmp(v, "UMC UMC UMC "))
2451 c->x86_vendor = X86_VENDOR_UMC;
2452 else if (!strcmp(v, "CentaurHauls"))
2453 c->x86_vendor = X86_VENDOR_CENTAUR;
2454 else if (!strcmp(v, "NexGenDriven"))
2455 c->x86_vendor = X86_VENDOR_NEXGEN;
2456 else if (!strcmp(v, "RiseRiseRise"))
2457 c->x86_vendor = X86_VENDOR_RISE;
2458 else if (!strcmp(v, "GenuineTMx86") ||
2459 !strcmp(v, "TransmetaCPU"))
2460 c->x86_vendor = X86_VENDOR_TRANSMETA;
2461 else
2462 c->x86_vendor = X86_VENDOR_UNKNOWN;
2463 }
2464
2465 static void
2466 cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2467 {
2468 static uae_u8 cpuid_space[256];
2469 uae_u8* tmp=get_target();
2470
2471 set_target(cpuid_space);
2472 raw_push_l_r(0); /* eax */
2473 raw_push_l_r(1); /* ecx */
2474 raw_push_l_r(2); /* edx */
2475 raw_push_l_r(3); /* ebx */
2476 raw_mov_l_rm(0,(uae_u32)&op);
2477 raw_cpuid(0);
2478 if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2479 if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2480 if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2481 if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2482 raw_pop_l_r(3);
2483 raw_pop_l_r(2);
2484 raw_pop_l_r(1);
2485 raw_pop_l_r(0);
2486 raw_ret();
2487 set_target(tmp);
2488
2489 ((cpuop_func*)cpuid_space)(0);
2490 }
2491
2492 static void
2493 raw_init_cpu(void)
2494 {
2495 struct cpuinfo_x86 *c = &cpuinfo;
2496
2497 /* Defaults */
2498 c->x86_vendor = X86_VENDOR_UNKNOWN;
2499 c->cpuid_level = -1; /* CPUID not detected */
2500 c->x86_model = c->x86_mask = 0; /* So far unknown... */
2501 c->x86_vendor_id[0] = '\0'; /* Unset */
2502 c->x86_hwcap = 0;
2503
2504 /* Get vendor name */
2505 c->x86_vendor_id[12] = '\0';
2506 cpuid(0x00000000,
2507 (uae_u32 *)&c->cpuid_level,
2508 (uae_u32 *)&c->x86_vendor_id[0],
2509 (uae_u32 *)&c->x86_vendor_id[8],
2510 (uae_u32 *)&c->x86_vendor_id[4]);
2511 x86_get_cpu_vendor(c);
2512
2513 /* Intel-defined flags: level 0x00000001 */
2514 c->x86_brand_id = 0;
2515 if ( c->cpuid_level >= 0x00000001 ) {
2516 uae_u32 tfms, brand_id;
2517 cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2518 c->x86 = (tfms >> 8) & 15;
2519 c->x86_model = (tfms >> 4) & 15;
2520 c->x86_brand_id = brand_id & 0xff;
2521 if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2522 (c->x86 == 0xf)) {
2523 /* AMD Extended Family and Model Values */
2524 c->x86 += (tfms >> 20) & 0xff;
2525 c->x86_model += (tfms >> 12) & 0xf0;
2526 }
2527 c->x86_mask = tfms & 15;
2528 } else {
2529 /* Have CPUID level 0 only - unheard of */
2530 c->x86 = 4;
2531 }
2532
2533 /* Canonicalize processor ID */
2534 c->x86_processor = X86_PROCESSOR_max;
2535 switch (c->x86) {
2536 case 3:
2537 c->x86_processor = X86_PROCESSOR_I386;
2538 break;
2539 case 4:
2540 c->x86_processor = X86_PROCESSOR_I486;
2541 break;
2542 case 5:
2543 if (c->x86_vendor == X86_VENDOR_AMD)
2544 c->x86_processor = X86_PROCESSOR_K6;
2545 else
2546 c->x86_processor = X86_PROCESSOR_PENTIUM;
2547 break;
2548 case 6:
2549 if (c->x86_vendor == X86_VENDOR_AMD)
2550 c->x86_processor = X86_PROCESSOR_ATHLON;
2551 else
2552 c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2553 break;
2554 case 15:
2555 if (c->x86_vendor == X86_VENDOR_INTEL) {
2556 /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2557 if (c->x86_brand_id >= 8)
2558 c->x86_processor = X86_PROCESSOR_PENTIUM4;
2559 }
2560 break;
2561 }
2562 if (c->x86_processor == X86_PROCESSOR_max) {
2563 fprintf(stderr, "Error: unknown processor type\n");
2564 fprintf(stderr, " Family : %d\n", c->x86);
2565 fprintf(stderr, " Model : %d\n", c->x86_model);
2566 fprintf(stderr, " Mask : %d\n", c->x86_mask);
2567 if (c->x86_brand_id)
2568 fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
2569 abort();
2570 }
2571
2572 /* Have CMOV support? */
2573 have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2574
2575 /* Can the host CPU suffer from partial register stalls? */
2576 have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2577 #if 1
2578 /* It appears that partial register writes are a bad idea even on
2579 AMD K7 cores, even though they are not supposed to have the
2580 dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2581 if (c->x86_processor == X86_PROCESSOR_ATHLON)
2582 have_rat_stall = true;
2583 #endif
2584
2585 /* Alignments */
2586 if (tune_alignment) {
2587 align_loops = x86_alignments[c->x86_processor].align_loop;
2588 align_jumps = x86_alignments[c->x86_processor].align_jump;
2589 }
2590
2591 write_log("Max CPUID level=%d Processor is %s [%s]\n",
2592 c->cpuid_level, c->x86_vendor_id,
2593 x86_processor_string_table[c->x86_processor]);
2594 }
2595
2596
2597 /*************************************************************************
2598 * FPU stuff *
2599 *************************************************************************/
2600
2601
2602 static __inline__ void raw_fp_init(void)
2603 {
2604 int i;
2605
2606 for (i=0;i<N_FREGS;i++)
2607 live.spos[i]=-2;
2608 live.tos=-1; /* Stack is empty */
2609 }
2610
2611 static __inline__ void raw_fp_cleanup_drop(void)
2612 {
2613 #if 0
2614 /* using FINIT instead of popping all the entries.
2615 Seems to have side effects --- there is display corruption in
2616 Quake when this is used */
2617 if (live.tos>1) {
2618 emit_byte(0x9b);
2619 emit_byte(0xdb);
2620 emit_byte(0xe3);
2621 live.tos=-1;
2622 }
2623 #endif
2624 while (live.tos>=1) {
2625 emit_byte(0xde);
2626 emit_byte(0xd9);
2627 live.tos-=2;
2628 }
2629 while (live.tos>=0) {
2630 emit_byte(0xdd);
2631 emit_byte(0xd8);
2632 live.tos--;
2633 }
2634 raw_fp_init();
2635 }
2636
2637 static __inline__ void make_tos(int r)
2638 {
2639 int p,q;
2640
2641 if (live.spos[r]<0) { /* Register not yet on stack */
2642 emit_byte(0xd9);
2643 emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2644 live.tos++;
2645 live.spos[r]=live.tos;
2646 live.onstack[live.tos]=r;
2647 return;
2648 }
2649 /* Register is on stack */
2650 if (live.tos==live.spos[r])
2651 return;
2652 p=live.spos[r];
2653 q=live.onstack[live.tos];
2654
2655 emit_byte(0xd9);
2656 emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2657 live.onstack[live.tos]=r;
2658 live.spos[r]=live.tos;
2659 live.onstack[p]=q;
2660 live.spos[q]=p;
2661 }
2662
2663 static __inline__ void make_tos2(int r, int r2)
2664 {
2665 int q;
2666
2667 make_tos(r2); /* Put the reg that's supposed to end up in position2
2668 on top */
2669
2670 if (live.spos[r]<0) { /* Register not yet on stack */
2671 make_tos(r); /* This will extend the stack */
2672 return;
2673 }
2674 /* Register is on stack */
2675 emit_byte(0xd9);
2676 emit_byte(0xc9); /* Move r2 into position 2 */
2677
2678 q=live.onstack[live.tos-1];
2679 live.onstack[live.tos]=q;
2680 live.spos[q]=live.tos;
2681 live.onstack[live.tos-1]=r2;
2682 live.spos[r2]=live.tos-1;
2683
2684 make_tos(r); /* And r into 1 */
2685 }
2686
2687 static __inline__ int stackpos(int r)
2688 {
2689 if (live.spos[r]<0)
2690 abort();
2691 if (live.tos<live.spos[r]) {
2692 printf("Looking for spos for fnreg %d\n",r);
2693 abort();
2694 }
2695 return live.tos-live.spos[r];
2696 }
2697
2698 static __inline__ void usereg(int r)
2699 {
2700 if (live.spos[r]<0)
2701 make_tos(r);
2702 }
2703
2704 /* This is called with one FP value in a reg *above* tos, which it will
2705 pop off the stack if necessary */
2706 static __inline__ void tos_make(int r)
2707 {
2708 if (live.spos[r]<0) {
2709 live.tos++;
2710 live.spos[r]=live.tos;
2711 live.onstack[live.tos]=r;
2712 return;
2713 }
2714 emit_byte(0xdd);
2715 emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2716 and pop it*/
2717 }
2718
2719
2720 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2721 {
2722 make_tos(r);
2723 emit_byte(0xdd);
2724 emit_byte(0x15);
2725 emit_long(m);
2726 }
2727 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2728
2729 LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2730 {
2731 make_tos(r);
2732 emit_byte(0xdd);
2733 emit_byte(0x1d);
2734 emit_long(m);
2735 live.onstack[live.tos]=-1;
2736 live.tos--;
2737 live.spos[r]=-2;
2738 }
2739 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2740
2741 LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2742 {
2743 emit_byte(0xdd);
2744 emit_byte(0x05);
2745 emit_long(m);
2746 tos_make(r);
2747 }
2748 LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2749
2750 LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2751 {
2752 emit_byte(0xdb);
2753 emit_byte(0x05);
2754 emit_long(m);
2755 tos_make(r);
2756 }
2757 LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2758
2759 LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2760 {
2761 make_tos(r);
2762 emit_byte(0xdb);
2763 emit_byte(0x15);
2764 emit_long(m);
2765 }
2766 LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2767
2768 LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2769 {
2770 emit_byte(0xd9);
2771 emit_byte(0x05);
2772 emit_long(m);
2773 tos_make(r);
2774 }
2775 LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2776
2777 LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2778 {
2779 make_tos(r);
2780 emit_byte(0xd9);
2781 emit_byte(0x15);
2782 emit_long(m);
2783 }
2784 LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2785
2786 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2787 {
2788 int rs;
2789
2790 /* Stupid x87 can't write a long double to mem without popping the
2791 stack! */
2792 usereg(r);
2793 rs=stackpos(r);
2794 emit_byte(0xd9); /* Get a copy to the top of stack */
2795 emit_byte(0xc0+rs);
2796
2797 emit_byte(0xdb); /* store and pop it */
2798 emit_byte(0x3d);
2799 emit_long(m);
2800 }
2801 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2802
2803 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2804 {
2805 int rs;
2806
2807 make_tos(r);
2808 emit_byte(0xdb); /* store and pop it */
2809 emit_byte(0x3d);
2810 emit_long(m);
2811 live.onstack[live.tos]=-1;
2812 live.tos--;
2813 live.spos[r]=-2;
2814 }
2815 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2816
2817 LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2818 {
2819 emit_byte(0xdb);
2820 emit_byte(0x2d);
2821 emit_long(m);
2822 tos_make(r);
2823 }
2824 LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2825
2826 LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2827 {
2828 emit_byte(0xd9);
2829 emit_byte(0xeb);
2830 tos_make(r);
2831 }
2832 LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2833
2834 LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2835 {
2836 emit_byte(0xd9);
2837 emit_byte(0xec);
2838 tos_make(r);
2839 }
2840 LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2841
2842 LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2843 {
2844 emit_byte(0xd9);
2845 emit_byte(0xea);
2846 tos_make(r);
2847 }
2848 LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2849
2850 LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2851 {
2852 emit_byte(0xd9);
2853 emit_byte(0xed);
2854 tos_make(r);
2855 }
2856 LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2857
2858 LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2859 {
2860 emit_byte(0xd9);
2861 emit_byte(0xe8);
2862 tos_make(r);
2863 }
2864 LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2865
2866 LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2867 {
2868 emit_byte(0xd9);
2869 emit_byte(0xee);
2870 tos_make(r);
2871 }
2872 LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2873
2874 LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2875 {
2876 int ds;
2877
2878 usereg(s);
2879 ds=stackpos(s);
2880 if (ds==0 && live.spos[d]>=0) {
2881 /* source is on top of stack, and we already have the dest */
2882 int dd=stackpos(d);
2883 emit_byte(0xdd);
2884 emit_byte(0xd0+dd);
2885 }
2886 else {
2887 emit_byte(0xd9);
2888 emit_byte(0xc0+ds); /* duplicate source on tos */
2889 tos_make(d); /* store to destination, pop if necessary */
2890 }
2891 }
2892 LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2893
2894 LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2895 {
2896 emit_byte(0xd9);
2897 emit_byte(0xa8+index);
2898 emit_long(base);
2899 }
2900 LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2901
2902
2903 LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2904 {
2905 int ds;
2906
2907 if (d!=s) {
2908 usereg(s);
2909 ds=stackpos(s);
2910 emit_byte(0xd9);
2911 emit_byte(0xc0+ds); /* duplicate source */
2912 emit_byte(0xd9);
2913 emit_byte(0xfa); /* take square root */
2914 tos_make(d); /* store to destination */
2915 }
2916 else {
2917 make_tos(d);
2918 emit_byte(0xd9);
2919 emit_byte(0xfa); /* take square root */
2920 }
2921 }
2922 LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2923
2924 LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2925 {
2926 int ds;
2927
2928 if (d!=s) {
2929 usereg(s);
2930 ds=stackpos(s);
2931 emit_byte(0xd9);
2932 emit_byte(0xc0+ds); /* duplicate source */
2933 emit_byte(0xd9);
2934 emit_byte(0xe1); /* take fabs */
2935 tos_make(d); /* store to destination */
2936 }
2937 else {
2938 make_tos(d);
2939 emit_byte(0xd9);
2940 emit_byte(0xe1); /* take fabs */
2941 }
2942 }
2943 LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
2944
2945 LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2946 {
2947 int ds;
2948
2949 if (d!=s) {
2950 usereg(s);
2951 ds=stackpos(s);
2952 emit_byte(0xd9);
2953 emit_byte(0xc0+ds); /* duplicate source */
2954 emit_byte(0xd9);
2955 emit_byte(0xfc); /* take frndint */
2956 tos_make(d); /* store to destination */
2957 }
2958 else {
2959 make_tos(d);
2960 emit_byte(0xd9);
2961 emit_byte(0xfc); /* take frndint */
2962 }
2963 }
2964 LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
2965
2966 LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2967 {
2968 int ds;
2969
2970 if (d!=s) {
2971 usereg(s);
2972 ds=stackpos(s);
2973 emit_byte(0xd9);
2974 emit_byte(0xc0+ds); /* duplicate source */
2975 emit_byte(0xd9);
2976 emit_byte(0xff); /* take cos */
2977 tos_make(d); /* store to destination */
2978 }
2979 else {
2980 make_tos(d);
2981 emit_byte(0xd9);
2982 emit_byte(0xff); /* take cos */
2983 }
2984 }
2985 LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
2986
2987 LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
2988 {
2989 int ds;
2990
2991 if (d!=s) {
2992 usereg(s);
2993 ds=stackpos(s);
2994 emit_byte(0xd9);
2995 emit_byte(0xc0+ds); /* duplicate source */
2996 emit_byte(0xd9);
2997 emit_byte(0xfe); /* take sin */
2998 tos_make(d); /* store to destination */
2999 }
3000 else {
3001 make_tos(d);
3002 emit_byte(0xd9);
3003 emit_byte(0xfe); /* take sin */
3004 }
3005 }
3006 LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3007
3008 double one=1;
3009 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3010 {
3011 int ds;
3012
3013 usereg(s);
3014 ds=stackpos(s);
3015 emit_byte(0xd9);
3016 emit_byte(0xc0+ds); /* duplicate source */
3017
3018 emit_byte(0xd9);
3019 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3020 emit_byte(0xd9);
3021 emit_byte(0xfc); /* rndint */
3022 emit_byte(0xd9);
3023 emit_byte(0xc9); /* swap top two elements */
3024 emit_byte(0xd8);
3025 emit_byte(0xe1); /* subtract rounded from original */
3026 emit_byte(0xd9);
3027 emit_byte(0xf0); /* f2xm1 */
3028 emit_byte(0xdc);
3029 emit_byte(0x05);
3030 emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3031 emit_byte(0xd9);
3032 emit_byte(0xfd); /* and scale it */
3033 emit_byte(0xdd);
3034 emit_byte(0xd9); /* take he rounded value off */
3035 tos_make(d); /* store to destination */
3036 }
3037 LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3038
3039 LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3040 {
3041 int ds;
3042
3043 usereg(s);
3044 ds=stackpos(s);
3045 emit_byte(0xd9);
3046 emit_byte(0xc0+ds); /* duplicate source */
3047 emit_byte(0xd9);
3048 emit_byte(0xea); /* fldl2e */
3049 emit_byte(0xde);
3050 emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
3051
3052 emit_byte(0xd9);
3053 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3054 emit_byte(0xd9);
3055 emit_byte(0xfc); /* rndint */
3056 emit_byte(0xd9);
3057 emit_byte(0xc9); /* swap top two elements */
3058 emit_byte(0xd8);
3059 emit_byte(0xe1); /* subtract rounded from original */
3060 emit_byte(0xd9);
3061 emit_byte(0xf0); /* f2xm1 */
3062 emit_byte(0xdc);
3063 emit_byte(0x05);
3064 emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3065 emit_byte(0xd9);
3066 emit_byte(0xfd); /* and scale it */
3067 emit_byte(0xdd);
3068 emit_byte(0xd9); /* take he rounded value off */
3069 tos_make(d); /* store to destination */
3070 }
3071 LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3072
3073 LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3074 {
3075 int ds;
3076
3077 usereg(s);
3078 ds=stackpos(s);
3079 emit_byte(0xd9);
3080 emit_byte(0xc0+ds); /* duplicate source */
3081 emit_byte(0xd9);
3082 emit_byte(0xe8); /* push '1' */
3083 emit_byte(0xd9);
3084 emit_byte(0xc9); /* swap top two */
3085 emit_byte(0xd9);
3086 emit_byte(0xf1); /* take 1*log2(x) */
3087 tos_make(d); /* store to destination */
3088 }
3089 LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3090
3091
3092 LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3093 {
3094 int ds;
3095
3096 if (d!=s) {
3097 usereg(s);
3098 ds=stackpos(s);
3099 emit_byte(0xd9);
3100 emit_byte(0xc0+ds); /* duplicate source */
3101 emit_byte(0xd9);
3102 emit_byte(0xe0); /* take fchs */
3103 tos_make(d); /* store to destination */
3104 }
3105 else {
3106 make_tos(d);
3107 emit_byte(0xd9);
3108 emit_byte(0xe0); /* take fchs */
3109 }
3110 }
3111 LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3112
3113 LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3114 {
3115 int ds;
3116
3117 usereg(s);
3118 usereg(d);
3119
3120 if (live.spos[s]==live.tos) {
3121 /* Source is on top of stack */
3122 ds=stackpos(d);
3123 emit_byte(0xdc);
3124 emit_byte(0xc0+ds); /* add source to dest*/
3125 }
3126 else {
3127 make_tos(d);
3128 ds=stackpos(s);
3129
3130 emit_byte(0xd8);
3131 emit_byte(0xc0+ds); /* add source to dest*/
3132 }
3133 }
3134 LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3135
3136 LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3137 {
3138 int ds;
3139
3140 usereg(s);
3141 usereg(d);
3142
3143 if (live.spos[s]==live.tos) {
3144 /* Source is on top of stack */
3145 ds=stackpos(d);
3146 emit_byte(0xdc);
3147 emit_byte(0xe8+ds); /* sub source from dest*/
3148 }
3149 else {
3150 make_tos(d);
3151 ds=stackpos(s);
3152
3153 emit_byte(0xd8);
3154 emit_byte(0xe0+ds); /* sub src from dest */
3155 }
3156 }
3157 LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3158
3159 LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3160 {
3161 int ds;
3162
3163 usereg(s);
3164 usereg(d);
3165
3166 make_tos(d);
3167 ds=stackpos(s);
3168
3169 emit_byte(0xdd);
3170 emit_byte(0xe0+ds); /* cmp dest with source*/
3171 }
3172 LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3173
3174 LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3175 {
3176 int ds;
3177
3178 usereg(s);
3179 usereg(d);
3180
3181 if (live.spos[s]==live.tos) {
3182 /* Source is on top of stack */
3183 ds=stackpos(d);
3184 emit_byte(0xdc);
3185 emit_byte(0xc8+ds); /* mul dest by source*/
3186 }
3187 else {
3188 make_tos(d);
3189 ds=stackpos(s);
3190
3191 emit_byte(0xd8);
3192 emit_byte(0xc8+ds); /* mul dest by source*/
3193 }
3194 }
3195 LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3196
3197 LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3198 {
3199 int ds;
3200
3201 usereg(s);
3202 usereg(d);
3203
3204 if (live.spos[s]==live.tos) {
3205 /* Source is on top of stack */
3206 ds=stackpos(d);
3207 emit_byte(0xdc);
3208 emit_byte(0xf8+ds); /* div dest by source */
3209 }
3210 else {
3211 make_tos(d);
3212 ds=stackpos(s);
3213
3214 emit_byte(0xd8);
3215 emit_byte(0xf0+ds); /* div dest by source*/
3216 }
3217 }
3218 LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3219
3220 LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3221 {
3222 int ds;
3223
3224 usereg(s);
3225 usereg(d);
3226
3227 make_tos2(d,s);
3228 ds=stackpos(s);
3229
3230 if (ds!=1) {
3231 printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3232 abort();
3233 }
3234 emit_byte(0xd9);
3235 emit_byte(0xf8); /* take rem from dest by source */
3236 }
3237 LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3238
3239 LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3240 {
3241 int ds;
3242
3243 usereg(s);
3244 usereg(d);
3245
3246 make_tos2(d,s);
3247 ds=stackpos(s);
3248
3249 if (ds!=1) {
3250 printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3251 abort();
3252 }
3253 emit_byte(0xd9);
3254 emit_byte(0xf5); /* take rem1 from dest by source */
3255 }
3256 LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3257
3258
3259 LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3260 {
3261 make_tos(r);
3262 emit_byte(0xd9); /* ftst */
3263 emit_byte(0xe4);
3264 }
3265 LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3266
3267 /* %eax register is clobbered if target processor doesn't support fucomi */
3268 #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3269 #define FFLAG_NREG EAX_INDEX
3270
3271 static __inline__ void raw_fflags_into_flags(int r)
3272 {
3273 int p;
3274
3275 usereg(r);
3276 p=stackpos(r);
3277
3278 emit_byte(0xd9);
3279 emit_byte(0xee); /* Push 0 */
3280 emit_byte(0xd9);
3281 emit_byte(0xc9+p); /* swap top two around */
3282 if (have_cmov) {
3283 // gb-- fucomi is for P6 cores only, not K6-2 then...
3284 emit_byte(0xdb);
3285 emit_byte(0xe9+p); /* fucomi them */
3286 }
3287 else {
3288 emit_byte(0xdd);
3289 emit_byte(0xe1+p); /* fucom them */
3290 emit_byte(0x9b);
3291 emit_byte(0xdf);
3292 emit_byte(0xe0); /* fstsw ax */
3293 raw_sahf(0); /* sahf */
3294 }
3295 emit_byte(0xdd);
3296 emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3297 }