ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.8
Committed: 2002-10-12T16:27:13Z (21 years, 9 months ago) by gbeauche
Branch: MAIN
Changes since 1.7: +62 -0 lines
Log Message:
Add raw_emit_nop_filler() with more efficient no-op fillers stolen from
GNU binutils 2.12.90.0.15. Speed bump is marginal (less than 6%). Make it
default though, that's conditionalized by tune_nop_fillers constant.

File Contents

# Content
1 /*
2 * compiler/codegen_x86.cpp - IA-32 code generator
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2002
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2002 Christian Bauer
10 *
11 * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 /* This should eventually end up in machdep/, but for now, x86 is the
29 only target, and it's easier this way... */
30
31 #include "flags_x86.h"
32
33 /*************************************************************************
34 * Some basic information about the the target CPU *
35 *************************************************************************/
36
37 #define EAX_INDEX 0
38 #define ECX_INDEX 1
39 #define EDX_INDEX 2
40 #define EBX_INDEX 3
41 #define ESP_INDEX 4
42 #define EBP_INDEX 5
43 #define ESI_INDEX 6
44 #define EDI_INDEX 7
45
46 /* The register in which subroutines return an integer return value */
47 #define REG_RESULT 0
48
49 /* The registers subroutines take their first and second argument in */
50 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
51 /* Handle the _fastcall parameters of ECX and EDX */
52 #define REG_PAR1 1
53 #define REG_PAR2 2
54 #else
55 #define REG_PAR1 0
56 #define REG_PAR2 2
57 #endif
58
59 /* Three registers that are not used for any of the above */
60 #define REG_NOPAR1 6
61 #define REG_NOPAR2 5
62 #define REG_NOPAR3 3
63
64 #define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
65 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66 #define REG_PC_TMP 0
67 #else
68 #define REG_PC_TMP 1 /* Another register that is not the above */
69 #endif
70
71 #define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
72 -1 if any reg will do */
73 #define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
74 #define MUL_NREG2 2 /* %edx will hold the high 32 bits */
75
76 uae_s8 always_used[]={4,-1};
77 uae_s8 can_byte[]={0,1,2,3,-1};
78 uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
79
80 /* cpuopti mutate instruction handlers to assume registers are saved
81 by the caller */
82 uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
83
84 /* This *should* be the same as call_saved. But:
85 - We might not really know which registers are saved, and which aren't,
86 so we need to preserve some, but don't want to rely on everyone else
87 also saving those registers
88 - Special registers (such like the stack pointer) should not be "preserved"
89 by pushing, even though they are "saved" across function calls
90 */
91 uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
92
93 /* Whether classes of instructions do or don't clobber the native flags */
94 #define CLOBBER_MOV
95 #define CLOBBER_LEA
96 #define CLOBBER_CMOV
97 #define CLOBBER_POP
98 #define CLOBBER_PUSH
99 #define CLOBBER_SUB clobber_flags()
100 #define CLOBBER_SBB clobber_flags()
101 #define CLOBBER_CMP clobber_flags()
102 #define CLOBBER_ADD clobber_flags()
103 #define CLOBBER_ADC clobber_flags()
104 #define CLOBBER_AND clobber_flags()
105 #define CLOBBER_OR clobber_flags()
106 #define CLOBBER_XOR clobber_flags()
107
108 #define CLOBBER_ROL clobber_flags()
109 #define CLOBBER_ROR clobber_flags()
110 #define CLOBBER_SHLL clobber_flags()
111 #define CLOBBER_SHRL clobber_flags()
112 #define CLOBBER_SHRA clobber_flags()
113 #define CLOBBER_TEST clobber_flags()
114 #define CLOBBER_CL16
115 #define CLOBBER_CL8
116 #define CLOBBER_SE16
117 #define CLOBBER_SE8
118 #define CLOBBER_ZE16
119 #define CLOBBER_ZE8
120 #define CLOBBER_SW16 clobber_flags()
121 #define CLOBBER_SW32
122 #define CLOBBER_SETCC
123 #define CLOBBER_MUL clobber_flags()
124 #define CLOBBER_BT clobber_flags()
125 #define CLOBBER_BSF clobber_flags()
126
127 const bool optimize_accum = true;
128 const bool optimize_imm8 = true;
129 const bool optimize_shift_once = true;
130
131 /*************************************************************************
132 * Actual encoding of the instructions on the target CPU *
133 *************************************************************************/
134
135 static __inline__ int isaccum(int r)
136 {
137 return (r == EAX_INDEX);
138 }
139
140 static __inline__ int isbyte(uae_s32 x)
141 {
142 return (x>=-128 && x<=127);
143 }
144
145 static __inline__ int isword(uae_s32 x)
146 {
147 return (x>=-32768 && x<=32767);
148 }
149
150 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
151 {
152 emit_byte(0x50+r);
153 }
154 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
155
156 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
157 {
158 emit_byte(0x58+r);
159 }
160 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
161
162 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
163 {
164 emit_byte(0x0f);
165 emit_byte(0xba);
166 emit_byte(0xe0+r);
167 emit_byte(i);
168 }
169 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
170
171 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
172 {
173 emit_byte(0x0f);
174 emit_byte(0xa3);
175 emit_byte(0xc0+8*b+r);
176 }
177 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
178
179 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
180 {
181 emit_byte(0x0f);
182 emit_byte(0xba);
183 emit_byte(0xf8+r);
184 emit_byte(i);
185 }
186 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
187
188 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
189 {
190 emit_byte(0x0f);
191 emit_byte(0xbb);
192 emit_byte(0xc0+8*b+r);
193 }
194 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
195
196
197 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
198 {
199 emit_byte(0x0f);
200 emit_byte(0xba);
201 emit_byte(0xf0+r);
202 emit_byte(i);
203 }
204 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
205
206 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
207 {
208 emit_byte(0x0f);
209 emit_byte(0xb3);
210 emit_byte(0xc0+8*b+r);
211 }
212 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
213
214 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
215 {
216 emit_byte(0x0f);
217 emit_byte(0xba);
218 emit_byte(0xe8+r);
219 emit_byte(i);
220 }
221 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
222
223 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
224 {
225 emit_byte(0x0f);
226 emit_byte(0xab);
227 emit_byte(0xc0+8*b+r);
228 }
229 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
230
231 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
232 {
233 emit_byte(0x66);
234 if (isbyte(i)) {
235 emit_byte(0x83);
236 emit_byte(0xe8+d);
237 emit_byte(i);
238 }
239 else {
240 if (optimize_accum && isaccum(d))
241 emit_byte(0x2d);
242 else {
243 emit_byte(0x81);
244 emit_byte(0xe8+d);
245 }
246 emit_word(i);
247 }
248 }
249 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
250
251
252 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
253 {
254 emit_byte(0x8b);
255 emit_byte(0x05+8*d);
256 emit_long(s);
257 }
258 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
259
260 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
261 {
262 emit_byte(0xc7);
263 emit_byte(0x05);
264 emit_long(d);
265 emit_long(s);
266 }
267 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
268
269 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
270 {
271 emit_byte(0x66);
272 emit_byte(0xc7);
273 emit_byte(0x05);
274 emit_long(d);
275 emit_word(s);
276 }
277 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
278
279 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
280 {
281 emit_byte(0xc6);
282 emit_byte(0x05);
283 emit_long(d);
284 emit_byte(s);
285 }
286 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
287
288 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
289 {
290 if (optimize_shift_once && (i == 1)) {
291 emit_byte(0xd0);
292 emit_byte(0x05);
293 emit_long(d);
294 }
295 else {
296 emit_byte(0xc0);
297 emit_byte(0x05);
298 emit_long(d);
299 emit_byte(i);
300 }
301 }
302 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
303
304 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
305 {
306 if (optimize_shift_once && (i == 1)) {
307 emit_byte(0xd0);
308 emit_byte(0xc0+r);
309 }
310 else {
311 emit_byte(0xc0);
312 emit_byte(0xc0+r);
313 emit_byte(i);
314 }
315 }
316 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
317
318 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
319 {
320 emit_byte(0x66);
321 emit_byte(0xc1);
322 emit_byte(0xc0+r);
323 emit_byte(i);
324 }
325 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
326
327 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
328 {
329 if (optimize_shift_once && (i == 1)) {
330 emit_byte(0xd1);
331 emit_byte(0xc0+r);
332 }
333 else {
334 emit_byte(0xc1);
335 emit_byte(0xc0+r);
336 emit_byte(i);
337 }
338 }
339 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
340
341 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
342 {
343 emit_byte(0xd3);
344 emit_byte(0xc0+d);
345 }
346 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
347
348 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
349 {
350 emit_byte(0x66);
351 emit_byte(0xd3);
352 emit_byte(0xc0+d);
353 }
354 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
355
356 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
357 {
358 emit_byte(0xd2);
359 emit_byte(0xc0+d);
360 }
361 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
362
363 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
364 {
365 emit_byte(0xd3);
366 emit_byte(0xe0+d);
367 }
368 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
369
370 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
371 {
372 emit_byte(0x66);
373 emit_byte(0xd3);
374 emit_byte(0xe0+d);
375 }
376 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
377
378 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
379 {
380 emit_byte(0xd2);
381 emit_byte(0xe0+d);
382 }
383 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
384
385 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
386 {
387 if (optimize_shift_once && (i == 1)) {
388 emit_byte(0xd0);
389 emit_byte(0xc8+r);
390 }
391 else {
392 emit_byte(0xc0);
393 emit_byte(0xc8+r);
394 emit_byte(i);
395 }
396 }
397 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
398
399 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
400 {
401 emit_byte(0x66);
402 emit_byte(0xc1);
403 emit_byte(0xc8+r);
404 emit_byte(i);
405 }
406 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
407
408 // gb-- used for making an fpcr value in compemu_fpp.cpp
409 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
410 {
411 emit_byte(0x0b);
412 emit_byte(0x05+8*d);
413 emit_long(s);
414 }
415 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
416
417 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
418 {
419 if (optimize_shift_once && (i == 1)) {
420 emit_byte(0xd1);
421 emit_byte(0xc8+r);
422 }
423 else {
424 emit_byte(0xc1);
425 emit_byte(0xc8+r);
426 emit_byte(i);
427 }
428 }
429 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
430
431 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
432 {
433 emit_byte(0xd3);
434 emit_byte(0xc8+d);
435 }
436 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
437
438 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
439 {
440 emit_byte(0x66);
441 emit_byte(0xd3);
442 emit_byte(0xc8+d);
443 }
444 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
445
446 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
447 {
448 emit_byte(0xd2);
449 emit_byte(0xc8+d);
450 }
451 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
452
453 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
454 {
455 emit_byte(0xd3);
456 emit_byte(0xe8+d);
457 }
458 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
459
460 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
461 {
462 emit_byte(0x66);
463 emit_byte(0xd3);
464 emit_byte(0xe8+d);
465 }
466 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
467
468 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
469 {
470 emit_byte(0xd2);
471 emit_byte(0xe8+d);
472 }
473 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
474
475 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
476 {
477 emit_byte(0xd3);
478 emit_byte(0xf8+d);
479 }
480 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
481
482 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
483 {
484 emit_byte(0x66);
485 emit_byte(0xd3);
486 emit_byte(0xf8+d);
487 }
488 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
489
490 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
491 {
492 emit_byte(0xd2);
493 emit_byte(0xf8+d);
494 }
495 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
496
497 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
498 {
499 if (optimize_shift_once && (i == 1)) {
500 emit_byte(0xd1);
501 emit_byte(0xe0+r);
502 }
503 else {
504 emit_byte(0xc1);
505 emit_byte(0xe0+r);
506 emit_byte(i);
507 }
508 }
509 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
510
511 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
512 {
513 emit_byte(0x66);
514 emit_byte(0xc1);
515 emit_byte(0xe0+r);
516 emit_byte(i);
517 }
518 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
519
520 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
521 {
522 if (optimize_shift_once && (i == 1)) {
523 emit_byte(0xd0);
524 emit_byte(0xe0+r);
525 }
526 else {
527 emit_byte(0xc0);
528 emit_byte(0xe0+r);
529 emit_byte(i);
530 }
531 }
532 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
533
534 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
535 {
536 if (optimize_shift_once && (i == 1)) {
537 emit_byte(0xd1);
538 emit_byte(0xe8+r);
539 }
540 else {
541 emit_byte(0xc1);
542 emit_byte(0xe8+r);
543 emit_byte(i);
544 }
545 }
546 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
547
548 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
549 {
550 emit_byte(0x66);
551 emit_byte(0xc1);
552 emit_byte(0xe8+r);
553 emit_byte(i);
554 }
555 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
556
557 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
558 {
559 if (optimize_shift_once && (i == 1)) {
560 emit_byte(0xd0);
561 emit_byte(0xe8+r);
562 }
563 else {
564 emit_byte(0xc0);
565 emit_byte(0xe8+r);
566 emit_byte(i);
567 }
568 }
569 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
570
571 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
572 {
573 if (optimize_shift_once && (i == 1)) {
574 emit_byte(0xd1);
575 emit_byte(0xf8+r);
576 }
577 else {
578 emit_byte(0xc1);
579 emit_byte(0xf8+r);
580 emit_byte(i);
581 }
582 }
583 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
584
585 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
586 {
587 emit_byte(0x66);
588 emit_byte(0xc1);
589 emit_byte(0xf8+r);
590 emit_byte(i);
591 }
592 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
593
594 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
595 {
596 if (optimize_shift_once && (i == 1)) {
597 emit_byte(0xd0);
598 emit_byte(0xf8+r);
599 }
600 else {
601 emit_byte(0xc0);
602 emit_byte(0xf8+r);
603 emit_byte(i);
604 }
605 }
606 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
607
608 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
609 {
610 emit_byte(0x9e);
611 }
612 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
613
614 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
615 {
616 emit_byte(0x0f);
617 emit_byte(0xa2);
618 }
619 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
620
621 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
622 {
623 emit_byte(0x9f);
624 }
625 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
626
627 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
628 {
629 emit_byte(0x0f);
630 emit_byte(0x90+cc);
631 emit_byte(0xc0+d);
632 }
633 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
634
635 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
636 {
637 emit_byte(0x0f);
638 emit_byte(0x90+cc);
639 emit_byte(0x05);
640 emit_long(d);
641 }
642 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
643
644 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
645 {
646 if (have_cmov) {
647 emit_byte(0x0f);
648 emit_byte(0x40+cc);
649 emit_byte(0xc0+8*d+s);
650 }
651 else { /* replacement using branch and mov */
652 int uncc=(cc^1);
653 emit_byte(0x70+uncc);
654 emit_byte(2); /* skip next 2 bytes if not cc=true */
655 emit_byte(0x89);
656 emit_byte(0xc0+8*s+d);
657 }
658 }
659 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
660
661 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
662 {
663 emit_byte(0x0f);
664 emit_byte(0xbc);
665 emit_byte(0xc0+8*d+s);
666 }
667 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
668
669 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
670 {
671 emit_byte(0x0f);
672 emit_byte(0xbf);
673 emit_byte(0xc0+8*d+s);
674 }
675 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
676
677 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
678 {
679 emit_byte(0x0f);
680 emit_byte(0xbe);
681 emit_byte(0xc0+8*d+s);
682 }
683 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
684
685 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
686 {
687 emit_byte(0x0f);
688 emit_byte(0xb7);
689 emit_byte(0xc0+8*d+s);
690 }
691 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
692
693 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
694 {
695 emit_byte(0x0f);
696 emit_byte(0xb6);
697 emit_byte(0xc0+8*d+s);
698 }
699 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
700
701 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
702 {
703 emit_byte(0x0f);
704 emit_byte(0xaf);
705 emit_byte(0xc0+8*d+s);
706 }
707 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
708
709 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
710 {
711 if (d!=MUL_NREG1 || s!=MUL_NREG2)
712 abort();
713 emit_byte(0xf7);
714 emit_byte(0xea);
715 }
716 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
717
718 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
719 {
720 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
721 printf("Bad register in MUL: d=%d, s=%d\n",d,s);
722 abort();
723 }
724 emit_byte(0xf7);
725 emit_byte(0xe2);
726 }
727 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
728
729 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
730 {
731 abort(); /* %^$&%^$%#^ x86! */
732 emit_byte(0x0f);
733 emit_byte(0xaf);
734 emit_byte(0xc0+8*d+s);
735 }
736 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
737
738 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
739 {
740 emit_byte(0x88);
741 emit_byte(0xc0+8*s+d);
742 }
743 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
744
745 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
746 {
747 emit_byte(0x66);
748 emit_byte(0x89);
749 emit_byte(0xc0+8*s+d);
750 }
751 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
752
753 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
754 {
755 int isebp=(baser==5)?0x40:0;
756 int fi;
757
758 switch(factor) {
759 case 1: fi=0; break;
760 case 2: fi=1; break;
761 case 4: fi=2; break;
762 case 8: fi=3; break;
763 default: abort();
764 }
765
766
767 emit_byte(0x8b);
768 emit_byte(0x04+8*d+isebp);
769 emit_byte(baser+8*index+0x40*fi);
770 if (isebp)
771 emit_byte(0x00);
772 }
773 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
774
775 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
776 {
777 int fi;
778 int isebp;
779
780 switch(factor) {
781 case 1: fi=0; break;
782 case 2: fi=1; break;
783 case 4: fi=2; break;
784 case 8: fi=3; break;
785 default: abort();
786 }
787 isebp=(baser==5)?0x40:0;
788
789 emit_byte(0x66);
790 emit_byte(0x8b);
791 emit_byte(0x04+8*d+isebp);
792 emit_byte(baser+8*index+0x40*fi);
793 if (isebp)
794 emit_byte(0x00);
795 }
796 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
797
798 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
799 {
800 int fi;
801 int isebp;
802
803 switch(factor) {
804 case 1: fi=0; break;
805 case 2: fi=1; break;
806 case 4: fi=2; break;
807 case 8: fi=3; break;
808 default: abort();
809 }
810 isebp=(baser==5)?0x40:0;
811
812 emit_byte(0x8a);
813 emit_byte(0x04+8*d+isebp);
814 emit_byte(baser+8*index+0x40*fi);
815 if (isebp)
816 emit_byte(0x00);
817 }
818 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
819
820 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
821 {
822 int fi;
823 int isebp;
824
825 switch(factor) {
826 case 1: fi=0; break;
827 case 2: fi=1; break;
828 case 4: fi=2; break;
829 case 8: fi=3; break;
830 default: abort();
831 }
832
833
834 isebp=(baser==5)?0x40:0;
835
836 emit_byte(0x89);
837 emit_byte(0x04+8*s+isebp);
838 emit_byte(baser+8*index+0x40*fi);
839 if (isebp)
840 emit_byte(0x00);
841 }
842 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
843
844 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
845 {
846 int fi;
847 int isebp;
848
849 switch(factor) {
850 case 1: fi=0; break;
851 case 2: fi=1; break;
852 case 4: fi=2; break;
853 case 8: fi=3; break;
854 default: abort();
855 }
856 isebp=(baser==5)?0x40:0;
857
858 emit_byte(0x66);
859 emit_byte(0x89);
860 emit_byte(0x04+8*s+isebp);
861 emit_byte(baser+8*index+0x40*fi);
862 if (isebp)
863 emit_byte(0x00);
864 }
865 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
866
867 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
868 {
869 int fi;
870 int isebp;
871
872 switch(factor) {
873 case 1: fi=0; break;
874 case 2: fi=1; break;
875 case 4: fi=2; break;
876 case 8: fi=3; break;
877 default: abort();
878 }
879 isebp=(baser==5)?0x40:0;
880
881 emit_byte(0x88);
882 emit_byte(0x04+8*s+isebp);
883 emit_byte(baser+8*index+0x40*fi);
884 if (isebp)
885 emit_byte(0x00);
886 }
887 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
888
889 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
890 {
891 int fi;
892
893 switch(factor) {
894 case 1: fi=0; break;
895 case 2: fi=1; break;
896 case 4: fi=2; break;
897 case 8: fi=3; break;
898 default: abort();
899 }
900
901 emit_byte(0x89);
902 emit_byte(0x84+8*s);
903 emit_byte(baser+8*index+0x40*fi);
904 emit_long(base);
905 }
906 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
907
908 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
909 {
910 int fi;
911
912 switch(factor) {
913 case 1: fi=0; break;
914 case 2: fi=1; break;
915 case 4: fi=2; break;
916 case 8: fi=3; break;
917 default: abort();
918 }
919
920 emit_byte(0x66);
921 emit_byte(0x89);
922 emit_byte(0x84+8*s);
923 emit_byte(baser+8*index+0x40*fi);
924 emit_long(base);
925 }
926 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
927
928 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
929 {
930 int fi;
931
932 switch(factor) {
933 case 1: fi=0; break;
934 case 2: fi=1; break;
935 case 4: fi=2; break;
936 case 8: fi=3; break;
937 default: abort();
938 }
939
940 emit_byte(0x88);
941 emit_byte(0x84+8*s);
942 emit_byte(baser+8*index+0x40*fi);
943 emit_long(base);
944 }
945 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
946
947 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
948 {
949 int fi;
950
951 switch(factor) {
952 case 1: fi=0; break;
953 case 2: fi=1; break;
954 case 4: fi=2; break;
955 case 8: fi=3; break;
956 default: abort();
957 }
958
959 emit_byte(0x8b);
960 emit_byte(0x84+8*d);
961 emit_byte(baser+8*index+0x40*fi);
962 emit_long(base);
963 }
964 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
965
966 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
967 {
968 int fi;
969
970 switch(factor) {
971 case 1: fi=0; break;
972 case 2: fi=1; break;
973 case 4: fi=2; break;
974 case 8: fi=3; break;
975 default: abort();
976 }
977
978 emit_byte(0x66);
979 emit_byte(0x8b);
980 emit_byte(0x84+8*d);
981 emit_byte(baser+8*index+0x40*fi);
982 emit_long(base);
983 }
984 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
985
986 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
987 {
988 int fi;
989
990 switch(factor) {
991 case 1: fi=0; break;
992 case 2: fi=1; break;
993 case 4: fi=2; break;
994 case 8: fi=3; break;
995 default: abort();
996 }
997
998 emit_byte(0x8a);
999 emit_byte(0x84+8*d);
1000 emit_byte(baser+8*index+0x40*fi);
1001 emit_long(base);
1002 }
1003 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
1004
1005 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1006 {
1007 int fi;
1008 switch(factor) {
1009 case 1: fi=0; break;
1010 case 2: fi=1; break;
1011 case 4: fi=2; break;
1012 case 8: fi=3; break;
1013 default:
1014 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1015 abort();
1016 }
1017 emit_byte(0x8b);
1018 emit_byte(0x04+8*d);
1019 emit_byte(0x05+8*index+64*fi);
1020 emit_long(base);
1021 }
1022 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
1023
1024 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1025 {
1026 int fi;
1027 switch(factor) {
1028 case 1: fi=0; break;
1029 case 2: fi=1; break;
1030 case 4: fi=2; break;
1031 case 8: fi=3; break;
1032 default:
1033 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
1034 abort();
1035 }
1036 if (have_cmov) {
1037 emit_byte(0x0f);
1038 emit_byte(0x40+cond);
1039 emit_byte(0x04+8*d);
1040 emit_byte(0x05+8*index+64*fi);
1041 emit_long(base);
1042 }
1043 else { /* replacement using branch and mov */
1044 int uncc=(cond^1);
1045 emit_byte(0x70+uncc);
1046 emit_byte(7); /* skip next 7 bytes if not cc=true */
1047 emit_byte(0x8b);
1048 emit_byte(0x04+8*d);
1049 emit_byte(0x05+8*index+64*fi);
1050 emit_long(base);
1051 }
1052 }
1053 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
1054
1055 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1056 {
1057 if (have_cmov) {
1058 emit_byte(0x0f);
1059 emit_byte(0x40+cond);
1060 emit_byte(0x05+8*d);
1061 emit_long(mem);
1062 }
1063 else { /* replacement using branch and mov */
1064 int uncc=(cond^1);
1065 emit_byte(0x70+uncc);
1066 emit_byte(6); /* skip next 6 bytes if not cc=true */
1067 emit_byte(0x8b);
1068 emit_byte(0x05+8*d);
1069 emit_long(mem);
1070 }
1071 }
1072 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
1073
1074 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1075 {
1076 emit_byte(0x8b);
1077 emit_byte(0x40+8*d+s);
1078 emit_byte(offset);
1079 }
1080 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
1081
1082 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1083 {
1084 emit_byte(0x66);
1085 emit_byte(0x8b);
1086 emit_byte(0x40+8*d+s);
1087 emit_byte(offset);
1088 }
1089 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
1090
1091 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1092 {
1093 emit_byte(0x8a);
1094 emit_byte(0x40+8*d+s);
1095 emit_byte(offset);
1096 }
1097 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
1098
1099 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1100 {
1101 emit_byte(0x8b);
1102 emit_byte(0x80+8*d+s);
1103 emit_long(offset);
1104 }
1105 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
1106
1107 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1108 {
1109 emit_byte(0x66);
1110 emit_byte(0x8b);
1111 emit_byte(0x80+8*d+s);
1112 emit_long(offset);
1113 }
1114 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
1115
1116 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1117 {
1118 emit_byte(0x8a);
1119 emit_byte(0x80+8*d+s);
1120 emit_long(offset);
1121 }
1122 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
1123
1124 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1125 {
1126 emit_byte(0xc7);
1127 emit_byte(0x40+d);
1128 emit_byte(offset);
1129 emit_long(i);
1130 }
1131 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
1132
1133 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1134 {
1135 emit_byte(0x66);
1136 emit_byte(0xc7);
1137 emit_byte(0x40+d);
1138 emit_byte(offset);
1139 emit_word(i);
1140 }
1141 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
1142
1143 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1144 {
1145 emit_byte(0xc6);
1146 emit_byte(0x40+d);
1147 emit_byte(offset);
1148 emit_byte(i);
1149 }
1150 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
1151
1152 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1153 {
1154 emit_byte(0x89);
1155 emit_byte(0x40+8*s+d);
1156 emit_byte(offset);
1157 }
1158 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
1159
1160 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1161 {
1162 emit_byte(0x66);
1163 emit_byte(0x89);
1164 emit_byte(0x40+8*s+d);
1165 emit_byte(offset);
1166 }
1167 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
1168
1169 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1170 {
1171 emit_byte(0x88);
1172 emit_byte(0x40+8*s+d);
1173 emit_byte(offset);
1174 }
1175 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
1176
1177 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1178 {
1179 if (optimize_imm8 && isbyte(offset)) {
1180 emit_byte(0x8d);
1181 emit_byte(0x40+8*d+s);
1182 emit_byte(offset);
1183 }
1184 else {
1185 emit_byte(0x8d);
1186 emit_byte(0x80+8*d+s);
1187 emit_long(offset);
1188 }
1189 }
1190 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
1191
1192 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1193 {
1194 int fi;
1195
1196 switch(factor) {
1197 case 1: fi=0; break;
1198 case 2: fi=1; break;
1199 case 4: fi=2; break;
1200 case 8: fi=3; break;
1201 default: abort();
1202 }
1203
1204 if (optimize_imm8 && isbyte(offset)) {
1205 emit_byte(0x8d);
1206 emit_byte(0x44+8*d);
1207 emit_byte(0x40*fi+8*index+s);
1208 emit_byte(offset);
1209 }
1210 else {
1211 emit_byte(0x8d);
1212 emit_byte(0x84+8*d);
1213 emit_byte(0x40*fi+8*index+s);
1214 emit_long(offset);
1215 }
1216 }
1217 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
1218
1219 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1220 {
1221 int isebp=(s==5)?0x40:0;
1222 int fi;
1223
1224 switch(factor) {
1225 case 1: fi=0; break;
1226 case 2: fi=1; break;
1227 case 4: fi=2; break;
1228 case 8: fi=3; break;
1229 default: abort();
1230 }
1231
1232 emit_byte(0x8d);
1233 emit_byte(0x04+8*d+isebp);
1234 emit_byte(0x40*fi+8*index+s);
1235 if (isebp)
1236 emit_byte(0);
1237 }
1238 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
1239
1240 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1241 {
1242 if (optimize_imm8 && isbyte(offset)) {
1243 emit_byte(0x89);
1244 emit_byte(0x40+8*s+d);
1245 emit_byte(offset);
1246 }
1247 else {
1248 emit_byte(0x89);
1249 emit_byte(0x80+8*s+d);
1250 emit_long(offset);
1251 }
1252 }
1253 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
1254
1255 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1256 {
1257 emit_byte(0x66);
1258 emit_byte(0x89);
1259 emit_byte(0x80+8*s+d);
1260 emit_long(offset);
1261 }
1262 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
1263
1264 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1265 {
1266 if (optimize_imm8 && isbyte(offset)) {
1267 emit_byte(0x88);
1268 emit_byte(0x40+8*s+d);
1269 emit_byte(offset);
1270 }
1271 else {
1272 emit_byte(0x88);
1273 emit_byte(0x80+8*s+d);
1274 emit_long(offset);
1275 }
1276 }
1277 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
1278
1279 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1280 {
1281 emit_byte(0x0f);
1282 emit_byte(0xc8+r);
1283 }
1284 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
1285
1286 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1287 {
1288 emit_byte(0x66);
1289 emit_byte(0xc1);
1290 emit_byte(0xc0+r);
1291 emit_byte(0x08);
1292 }
1293 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
1294
1295 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1296 {
1297 emit_byte(0x89);
1298 emit_byte(0xc0+8*s+d);
1299 }
1300 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
1301
1302 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1303 {
1304 emit_byte(0x89);
1305 emit_byte(0x05+8*s);
1306 emit_long(d);
1307 }
1308 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
1309
1310 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1311 {
1312 emit_byte(0x66);
1313 emit_byte(0x89);
1314 emit_byte(0x05+8*s);
1315 emit_long(d);
1316 }
1317 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
1318
1319 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1320 {
1321 emit_byte(0x66);
1322 emit_byte(0x8b);
1323 emit_byte(0x05+8*d);
1324 emit_long(s);
1325 }
1326 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
1327
1328 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1329 {
1330 emit_byte(0x88);
1331 emit_byte(0x05+8*s);
1332 emit_long(d);
1333 }
1334 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
1335
1336 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1337 {
1338 emit_byte(0x8a);
1339 emit_byte(0x05+8*d);
1340 emit_long(s);
1341 }
1342 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
1343
1344 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1345 {
1346 emit_byte(0xb8+d);
1347 emit_long(s);
1348 }
1349 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
1350
1351 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1352 {
1353 emit_byte(0x66);
1354 emit_byte(0xb8+d);
1355 emit_word(s);
1356 }
1357 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
1358
1359 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1360 {
1361 emit_byte(0xb0+d);
1362 emit_byte(s);
1363 }
1364 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
1365
1366 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1367 {
1368 emit_byte(0x81);
1369 emit_byte(0x15);
1370 emit_long(d);
1371 emit_long(s);
1372 }
1373 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
1374
1375 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1376 {
1377 if (optimize_imm8 && isbyte(s)) {
1378 emit_byte(0x83);
1379 emit_byte(0x05);
1380 emit_long(d);
1381 emit_byte(s);
1382 }
1383 else {
1384 emit_byte(0x81);
1385 emit_byte(0x05);
1386 emit_long(d);
1387 emit_long(s);
1388 }
1389 }
1390 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1391
1392 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1393 {
1394 emit_byte(0x66);
1395 emit_byte(0x81);
1396 emit_byte(0x05);
1397 emit_long(d);
1398 emit_word(s);
1399 }
1400 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1401
1402 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1403 {
1404 emit_byte(0x80);
1405 emit_byte(0x05);
1406 emit_long(d);
1407 emit_byte(s);
1408 }
1409 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1410
1411 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1412 {
1413 if (optimize_accum && isaccum(d))
1414 emit_byte(0xa9);
1415 else {
1416 emit_byte(0xf7);
1417 emit_byte(0xc0+d);
1418 }
1419 emit_long(i);
1420 }
1421 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1422
1423 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1424 {
1425 emit_byte(0x85);
1426 emit_byte(0xc0+8*s+d);
1427 }
1428 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1429
1430 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1431 {
1432 emit_byte(0x66);
1433 emit_byte(0x85);
1434 emit_byte(0xc0+8*s+d);
1435 }
1436 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1437
1438 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1439 {
1440 emit_byte(0x84);
1441 emit_byte(0xc0+8*s+d);
1442 }
1443 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1444
1445 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1446 {
1447 if (optimize_imm8 && isbyte(i)) {
1448 emit_byte(0x83);
1449 emit_byte(0xe0+d);
1450 emit_byte(i);
1451 }
1452 else {
1453 if (optimize_accum && isaccum(d))
1454 emit_byte(0x25);
1455 else {
1456 emit_byte(0x81);
1457 emit_byte(0xe0+d);
1458 }
1459 emit_long(i);
1460 }
1461 }
1462 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1463
1464 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1465 {
1466 emit_byte(0x66);
1467 if (optimize_imm8 && isbyte(i)) {
1468 emit_byte(0x83);
1469 emit_byte(0xe0+d);
1470 emit_byte(i);
1471 }
1472 else {
1473 if (optimize_accum && isaccum(d))
1474 emit_byte(0x25);
1475 else {
1476 emit_byte(0x81);
1477 emit_byte(0xe0+d);
1478 }
1479 emit_word(i);
1480 }
1481 }
1482 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1483
1484 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1485 {
1486 emit_byte(0x21);
1487 emit_byte(0xc0+8*s+d);
1488 }
1489 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1490
1491 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1492 {
1493 emit_byte(0x66);
1494 emit_byte(0x21);
1495 emit_byte(0xc0+8*s+d);
1496 }
1497 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1498
1499 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1500 {
1501 emit_byte(0x20);
1502 emit_byte(0xc0+8*s+d);
1503 }
1504 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1505
1506 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1507 {
1508 if (optimize_imm8 && isbyte(i)) {
1509 emit_byte(0x83);
1510 emit_byte(0xc8+d);
1511 emit_byte(i);
1512 }
1513 else {
1514 if (optimize_accum && isaccum(d))
1515 emit_byte(0x0d);
1516 else {
1517 emit_byte(0x81);
1518 emit_byte(0xc8+d);
1519 }
1520 emit_long(i);
1521 }
1522 }
1523 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1524
1525 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1526 {
1527 emit_byte(0x09);
1528 emit_byte(0xc0+8*s+d);
1529 }
1530 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1531
1532 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1533 {
1534 emit_byte(0x66);
1535 emit_byte(0x09);
1536 emit_byte(0xc0+8*s+d);
1537 }
1538 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1539
1540 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1541 {
1542 emit_byte(0x08);
1543 emit_byte(0xc0+8*s+d);
1544 }
1545 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1546
1547 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1548 {
1549 emit_byte(0x11);
1550 emit_byte(0xc0+8*s+d);
1551 }
1552 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1553
1554 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1555 {
1556 emit_byte(0x66);
1557 emit_byte(0x11);
1558 emit_byte(0xc0+8*s+d);
1559 }
1560 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1561
1562 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1563 {
1564 emit_byte(0x10);
1565 emit_byte(0xc0+8*s+d);
1566 }
1567 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1568
1569 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1570 {
1571 emit_byte(0x01);
1572 emit_byte(0xc0+8*s+d);
1573 }
1574 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1575
1576 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1577 {
1578 emit_byte(0x66);
1579 emit_byte(0x01);
1580 emit_byte(0xc0+8*s+d);
1581 }
1582 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1583
1584 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1585 {
1586 emit_byte(0x00);
1587 emit_byte(0xc0+8*s+d);
1588 }
1589 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1590
1591 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1592 {
1593 if (isbyte(i)) {
1594 emit_byte(0x83);
1595 emit_byte(0xe8+d);
1596 emit_byte(i);
1597 }
1598 else {
1599 if (optimize_accum && isaccum(d))
1600 emit_byte(0x2d);
1601 else {
1602 emit_byte(0x81);
1603 emit_byte(0xe8+d);
1604 }
1605 emit_long(i);
1606 }
1607 }
1608 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1609
1610 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1611 {
1612 if (optimize_accum && isaccum(d))
1613 emit_byte(0x2c);
1614 else {
1615 emit_byte(0x80);
1616 emit_byte(0xe8+d);
1617 }
1618 emit_byte(i);
1619 }
1620 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1621
1622 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1623 {
1624 if (isbyte(i)) {
1625 emit_byte(0x83);
1626 emit_byte(0xc0+d);
1627 emit_byte(i);
1628 }
1629 else {
1630 if (optimize_accum && isaccum(d))
1631 emit_byte(0x05);
1632 else {
1633 emit_byte(0x81);
1634 emit_byte(0xc0+d);
1635 }
1636 emit_long(i);
1637 }
1638 }
1639 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1640
1641 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1642 {
1643 emit_byte(0x66);
1644 if (isbyte(i)) {
1645 emit_byte(0x83);
1646 emit_byte(0xc0+d);
1647 emit_byte(i);
1648 }
1649 else {
1650 if (optimize_accum && isaccum(d))
1651 emit_byte(0x05);
1652 else {
1653 emit_byte(0x81);
1654 emit_byte(0xc0+d);
1655 }
1656 emit_word(i);
1657 }
1658 }
1659 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1660
1661 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1662 {
1663 if (optimize_accum && isaccum(d))
1664 emit_byte(0x04);
1665 else {
1666 emit_byte(0x80);
1667 emit_byte(0xc0+d);
1668 }
1669 emit_byte(i);
1670 }
1671 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1672
1673 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1674 {
1675 emit_byte(0x19);
1676 emit_byte(0xc0+8*s+d);
1677 }
1678 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1679
1680 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1681 {
1682 emit_byte(0x66);
1683 emit_byte(0x19);
1684 emit_byte(0xc0+8*s+d);
1685 }
1686 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1687
1688 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1689 {
1690 emit_byte(0x18);
1691 emit_byte(0xc0+8*s+d);
1692 }
1693 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1694
1695 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1696 {
1697 emit_byte(0x29);
1698 emit_byte(0xc0+8*s+d);
1699 }
1700 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1701
1702 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1703 {
1704 emit_byte(0x66);
1705 emit_byte(0x29);
1706 emit_byte(0xc0+8*s+d);
1707 }
1708 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1709
1710 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1711 {
1712 emit_byte(0x28);
1713 emit_byte(0xc0+8*s+d);
1714 }
1715 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1716
1717 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1718 {
1719 emit_byte(0x39);
1720 emit_byte(0xc0+8*s+d);
1721 }
1722 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1723
1724 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1725 {
1726 if (optimize_imm8 && isbyte(i)) {
1727 emit_byte(0x83);
1728 emit_byte(0xf8+r);
1729 emit_byte(i);
1730 }
1731 else {
1732 if (optimize_accum && isaccum(r))
1733 emit_byte(0x3d);
1734 else {
1735 emit_byte(0x81);
1736 emit_byte(0xf8+r);
1737 }
1738 emit_long(i);
1739 }
1740 }
1741 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1742
1743 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1744 {
1745 emit_byte(0x66);
1746 emit_byte(0x39);
1747 emit_byte(0xc0+8*s+d);
1748 }
1749 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1750
1751 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1752 {
1753 emit_byte(0x80);
1754 emit_byte(0x3d);
1755 emit_long(d);
1756 emit_byte(s);
1757 }
1758 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1759
1760 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1761 {
1762 if (optimize_accum && isaccum(d))
1763 emit_byte(0x3c);
1764 else {
1765 emit_byte(0x80);
1766 emit_byte(0xf8+d);
1767 }
1768 emit_byte(i);
1769 }
1770 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1771
1772 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1773 {
1774 emit_byte(0x38);
1775 emit_byte(0xc0+8*s+d);
1776 }
1777 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1778
1779 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1780 {
1781 int fi;
1782
1783 switch(factor) {
1784 case 1: fi=0; break;
1785 case 2: fi=1; break;
1786 case 4: fi=2; break;
1787 case 8: fi=3; break;
1788 default: abort();
1789 }
1790 emit_byte(0x39);
1791 emit_byte(0x04+8*d);
1792 emit_byte(5+8*index+0x40*fi);
1793 emit_long(offset);
1794 }
1795 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1796
1797 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1798 {
1799 emit_byte(0x31);
1800 emit_byte(0xc0+8*s+d);
1801 }
1802 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1803
1804 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1805 {
1806 emit_byte(0x66);
1807 emit_byte(0x31);
1808 emit_byte(0xc0+8*s+d);
1809 }
1810 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1811
1812 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1813 {
1814 emit_byte(0x30);
1815 emit_byte(0xc0+8*s+d);
1816 }
1817 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1818
1819 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1820 {
1821 if (optimize_imm8 && isbyte(s)) {
1822 emit_byte(0x83);
1823 emit_byte(0x2d);
1824 emit_long(d);
1825 emit_byte(s);
1826 }
1827 else {
1828 emit_byte(0x81);
1829 emit_byte(0x2d);
1830 emit_long(d);
1831 emit_long(s);
1832 }
1833 }
1834 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1835
1836 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1837 {
1838 if (optimize_imm8 && isbyte(s)) {
1839 emit_byte(0x83);
1840 emit_byte(0x3d);
1841 emit_long(d);
1842 emit_byte(s);
1843 }
1844 else {
1845 emit_byte(0x81);
1846 emit_byte(0x3d);
1847 emit_long(d);
1848 emit_long(s);
1849 }
1850 }
1851 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1852
1853 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1854 {
1855 emit_byte(0x87);
1856 emit_byte(0xc0+8*r1+r2);
1857 }
1858 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1859
1860 /*************************************************************************
1861 * FIXME: string-related instructions *
1862 *************************************************************************/
1863
1864 LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1865 {
1866 emit_byte(0xfc);
1867 }
1868 LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1869
1870 LOWFUNC(WRITE,NONE,0,raw_std,(void))
1871 {
1872 emit_byte(0xfd);
1873 }
1874 LENDFUNC(WRITE,NONE,0,raw_std,(void))
1875
1876 LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1877 {
1878 emit_byte(0xa4);
1879 }
1880 LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1881
1882 LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1883 {
1884 emit_byte(0xa5);
1885 }
1886 LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1887
1888 LOWFUNC(NONE,RMW,0,raw_rep,(void))
1889 {
1890 emit_byte(0xf3);
1891 }
1892 LENDFUNC(NONE,RMW,0,raw_rep,(void))
1893
1894 LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1895 {
1896 raw_rep();
1897 raw_movs_b();
1898 }
1899 LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1900
1901 LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1902 {
1903 raw_rep();
1904 raw_movs_l();
1905 }
1906 LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1907
1908 /*************************************************************************
1909 * FIXME: mem access modes probably wrong *
1910 *************************************************************************/
1911
1912 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1913 {
1914 emit_byte(0x9c);
1915 }
1916 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1917
1918 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1919 {
1920 emit_byte(0x9d);
1921 }
1922 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1923
1924 /*************************************************************************
1925 * Unoptimizable stuff --- jump *
1926 *************************************************************************/
1927
1928 static __inline__ void raw_call_r(R4 r)
1929 {
1930 emit_byte(0xff);
1931 emit_byte(0xd0+r);
1932 }
1933
1934 static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1935 {
1936 int mu;
1937 switch(m) {
1938 case 1: mu=0; break;
1939 case 2: mu=1; break;
1940 case 4: mu=2; break;
1941 case 8: mu=3; break;
1942 default: abort();
1943 }
1944 emit_byte(0xff);
1945 emit_byte(0x14);
1946 emit_byte(0x05+8*r+0x40*mu);
1947 emit_long(base);
1948 }
1949
1950 static __inline__ void raw_jmp_r(R4 r)
1951 {
1952 emit_byte(0xff);
1953 emit_byte(0xe0+r);
1954 }
1955
1956 static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1957 {
1958 int mu;
1959 switch(m) {
1960 case 1: mu=0; break;
1961 case 2: mu=1; break;
1962 case 4: mu=2; break;
1963 case 8: mu=3; break;
1964 default: abort();
1965 }
1966 emit_byte(0xff);
1967 emit_byte(0x24);
1968 emit_byte(0x05+8*r+0x40*mu);
1969 emit_long(base);
1970 }
1971
1972 static __inline__ void raw_jmp_m(uae_u32 base)
1973 {
1974 emit_byte(0xff);
1975 emit_byte(0x25);
1976 emit_long(base);
1977 }
1978
1979
1980 static __inline__ void raw_call(uae_u32 t)
1981 {
1982 emit_byte(0xe8);
1983 emit_long(t-(uae_u32)target-4);
1984 }
1985
1986 static __inline__ void raw_jmp(uae_u32 t)
1987 {
1988 emit_byte(0xe9);
1989 emit_long(t-(uae_u32)target-4);
1990 }
1991
1992 static __inline__ void raw_jl(uae_u32 t)
1993 {
1994 emit_byte(0x0f);
1995 emit_byte(0x8c);
1996 emit_long(t-(uae_u32)target-4);
1997 }
1998
1999 static __inline__ void raw_jz(uae_u32 t)
2000 {
2001 emit_byte(0x0f);
2002 emit_byte(0x84);
2003 emit_long(t-(uae_u32)target-4);
2004 }
2005
2006 static __inline__ void raw_jnz(uae_u32 t)
2007 {
2008 emit_byte(0x0f);
2009 emit_byte(0x85);
2010 emit_long(t-(uae_u32)target-4);
2011 }
2012
2013 static __inline__ void raw_jnz_l_oponly(void)
2014 {
2015 emit_byte(0x0f);
2016 emit_byte(0x85);
2017 }
2018
2019 static __inline__ void raw_jcc_l_oponly(int cc)
2020 {
2021 emit_byte(0x0f);
2022 emit_byte(0x80+cc);
2023 }
2024
2025 static __inline__ void raw_jnz_b_oponly(void)
2026 {
2027 emit_byte(0x75);
2028 }
2029
2030 static __inline__ void raw_jz_b_oponly(void)
2031 {
2032 emit_byte(0x74);
2033 }
2034
2035 static __inline__ void raw_jcc_b_oponly(int cc)
2036 {
2037 emit_byte(0x70+cc);
2038 }
2039
2040 static __inline__ void raw_jmp_l_oponly(void)
2041 {
2042 emit_byte(0xe9);
2043 }
2044
2045 static __inline__ void raw_jmp_b_oponly(void)
2046 {
2047 emit_byte(0xeb);
2048 }
2049
2050 static __inline__ void raw_ret(void)
2051 {
2052 emit_byte(0xc3);
2053 }
2054
2055 static __inline__ void raw_nop(void)
2056 {
2057 emit_byte(0x90);
2058 }
2059
2060 static __inline__ void raw_emit_nop_filler(int nbytes)
2061 {
2062 /* Source: GNU Binutils 2.12.90.0.15 */
2063 /* Various efficient no-op patterns for aligning code labels.
2064 Note: Don't try to assemble the instructions in the comments.
2065 0L and 0w are not legal. */
2066 static const uae_u8 f32_1[] =
2067 {0x90}; /* nop */
2068 static const uae_u8 f32_2[] =
2069 {0x89,0xf6}; /* movl %esi,%esi */
2070 static const uae_u8 f32_3[] =
2071 {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
2072 static const uae_u8 f32_4[] =
2073 {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
2074 static const uae_u8 f32_5[] =
2075 {0x90, /* nop */
2076 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
2077 static const uae_u8 f32_6[] =
2078 {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
2079 static const uae_u8 f32_7[] =
2080 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
2081 static const uae_u8 f32_8[] =
2082 {0x90, /* nop */
2083 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
2084 static const uae_u8 f32_9[] =
2085 {0x89,0xf6, /* movl %esi,%esi */
2086 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2087 static const uae_u8 f32_10[] =
2088 {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
2089 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2090 static const uae_u8 f32_11[] =
2091 {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
2092 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2093 static const uae_u8 f32_12[] =
2094 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
2095 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
2096 static const uae_u8 f32_13[] =
2097 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
2098 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2099 static const uae_u8 f32_14[] =
2100 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
2101 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
2102 static const uae_u8 f32_15[] =
2103 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
2104 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
2105 static const uae_u8 f32_16[] =
2106 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
2107 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
2108 static const uae_u8 *const f32_patt[] = {
2109 f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
2110 f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
2111 };
2112
2113 int nloops = nbytes / 16;
2114 while (nloops-- > 0)
2115 emit_block(f32_16, sizeof(f32_16));
2116
2117 nbytes %= 16;
2118 if (nbytes)
2119 emit_block(f32_patt[nbytes - 1], nbytes);
2120 }
2121
2122
2123 /*************************************************************************
2124 * Flag handling, to and fro UAE flag register *
2125 *************************************************************************/
2126
2127 #ifdef SAHF_SETO_PROFITABLE
2128
2129 #define FLAG_NREG1 0 /* Set to -1 if any register will do */
2130
2131 static __inline__ void raw_flags_to_reg(int r)
2132 {
2133 raw_lahf(0); /* Most flags in AH */
2134 //raw_setcc(r,0); /* V flag in AL */
2135 raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
2136
2137 #if 1 /* Let's avoid those nasty partial register stalls */
2138 //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
2139 raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
2140 //live.state[FLAGTMP].status=CLEAN;
2141 live.state[FLAGTMP].status=INMEM;
2142 live.state[FLAGTMP].realreg=-1;
2143 /* We just "evicted" FLAGTMP. */
2144 if (live.nat[r].nholds!=1) {
2145 /* Huh? */
2146 abort();
2147 }
2148 live.nat[r].nholds=0;
2149 #endif
2150 }
2151
2152 #define FLAG_NREG2 0 /* Set to -1 if any register will do */
2153 static __inline__ void raw_reg_to_flags(int r)
2154 {
2155 raw_cmp_b_ri(r,-127); /* set V */
2156 raw_sahf(0);
2157 }
2158
2159 #else
2160
2161 #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
2162 static __inline__ void raw_flags_to_reg(int r)
2163 {
2164 raw_pushfl();
2165 raw_pop_l_r(r);
2166 raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r);
2167 // live.state[FLAGTMP].status=CLEAN;
2168 live.state[FLAGTMP].status=INMEM;
2169 live.state[FLAGTMP].realreg=-1;
2170 /* We just "evicted" FLAGTMP. */
2171 if (live.nat[r].nholds!=1) {
2172 /* Huh? */
2173 abort();
2174 }
2175 live.nat[r].nholds=0;
2176 }
2177
2178 #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
2179 static __inline__ void raw_reg_to_flags(int r)
2180 {
2181 raw_push_l_r(r);
2182 raw_popfl();
2183 }
2184
2185 #endif
2186
2187 /* Apparently, there are enough instructions between flag store and
2188 flag reload to avoid the partial memory stall */
2189 static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
2190 {
2191 #if 1
2192 raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2193 #else
2194 raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2195 raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
2196 #endif
2197 }
2198
2199 /* FLAGX is byte sized, and we *do* write it at that size */
2200 static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
2201 {
2202 if (live.nat[target].canbyte)
2203 raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
2204 else if (live.nat[target].canword)
2205 raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
2206 else
2207 raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
2208 }
2209
2210
2211 static __inline__ void raw_inc_sp(int off)
2212 {
2213 raw_add_l_ri(ESP_INDEX,off);
2214 }
2215
2216 /*************************************************************************
2217 * Handling mistaken direct memory access *
2218 *************************************************************************/
2219
2220 // gb-- I don't need that part for JIT Basilisk II
2221 #if defined(NATMEM_OFFSET) && 0
2222 #include <asm/sigcontext.h>
2223 #include <signal.h>
2224
2225 #define SIG_READ 1
2226 #define SIG_WRITE 2
2227
2228 static int in_handler=0;
2229 static uae_u8 veccode[256];
2230
2231 static void vec(int x, struct sigcontext sc)
2232 {
2233 uae_u8* i=(uae_u8*)sc.eip;
2234 uae_u32 addr=sc.cr2;
2235 int r=-1;
2236 int size=4;
2237 int dir=-1;
2238 int len=0;
2239 int j;
2240
2241 write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
2242 if (!canbang)
2243 write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
2244 if (in_handler)
2245 write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
2246
2247 if (canbang && i>=compiled_code && i<=current_compile_p) {
2248 if (*i==0x66) {
2249 i++;
2250 size=2;
2251 len++;
2252 }
2253
2254 switch(i[0]) {
2255 case 0x8a:
2256 if ((i[1]&0xc0)==0x80) {
2257 r=(i[1]>>3)&7;
2258 dir=SIG_READ;
2259 size=1;
2260 len+=6;
2261 break;
2262 }
2263 break;
2264 case 0x88:
2265 if ((i[1]&0xc0)==0x80) {
2266 r=(i[1]>>3)&7;
2267 dir=SIG_WRITE;
2268 size=1;
2269 len+=6;
2270 break;
2271 }
2272 break;
2273 case 0x8b:
2274 if ((i[1]&0xc0)==0x80) {
2275 r=(i[1]>>3)&7;
2276 dir=SIG_READ;
2277 len+=6;
2278 break;
2279 }
2280 if ((i[1]&0xc0)==0x40) {
2281 r=(i[1]>>3)&7;
2282 dir=SIG_READ;
2283 len+=3;
2284 break;
2285 }
2286 break;
2287 case 0x89:
2288 if ((i[1]&0xc0)==0x80) {
2289 r=(i[1]>>3)&7;
2290 dir=SIG_WRITE;
2291 len+=6;
2292 break;
2293 }
2294 if ((i[1]&0xc0)==0x40) {
2295 r=(i[1]>>3)&7;
2296 dir=SIG_WRITE;
2297 len+=3;
2298 break;
2299 }
2300 break;
2301 }
2302 }
2303
2304 if (r!=-1) {
2305 void* pr=NULL;
2306 write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
2307
2308 switch(r) {
2309 case 0: pr=&(sc.eax); break;
2310 case 1: pr=&(sc.ecx); break;
2311 case 2: pr=&(sc.edx); break;
2312 case 3: pr=&(sc.ebx); break;
2313 case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
2314 case 5: pr=(size>1)?
2315 (void*)(&(sc.ebp)):
2316 (void*)(((uae_u8*)&(sc.ecx))+1); break;
2317 case 6: pr=(size>1)?
2318 (void*)(&(sc.esi)):
2319 (void*)(((uae_u8*)&(sc.edx))+1); break;
2320 case 7: pr=(size>1)?
2321 (void*)(&(sc.edi)):
2322 (void*)(((uae_u8*)&(sc.ebx))+1); break;
2323 default: abort();
2324 }
2325 if (pr) {
2326 blockinfo* bi;
2327
2328 if (currprefs.comp_oldsegv) {
2329 addr-=NATMEM_OFFSET;
2330
2331 if ((addr>=0x10000000 && addr<0x40000000) ||
2332 (addr>=0x50000000)) {
2333 write_log("Suspicious address in %x SEGV handler.\n",addr);
2334 }
2335 if (dir==SIG_READ) {
2336 switch(size) {
2337 case 1: *((uae_u8*)pr)=get_byte(addr); break;
2338 case 2: *((uae_u16*)pr)=get_word(addr); break;
2339 case 4: *((uae_u32*)pr)=get_long(addr); break;
2340 default: abort();
2341 }
2342 }
2343 else { /* write */
2344 switch(size) {
2345 case 1: put_byte(addr,*((uae_u8*)pr)); break;
2346 case 2: put_word(addr,*((uae_u16*)pr)); break;
2347 case 4: put_long(addr,*((uae_u32*)pr)); break;
2348 default: abort();
2349 }
2350 }
2351 write_log("Handled one access!\n");
2352 fflush(stdout);
2353 segvcount++;
2354 sc.eip+=len;
2355 }
2356 else {
2357 void* tmp=target;
2358 int i;
2359 uae_u8 vecbuf[5];
2360
2361 addr-=NATMEM_OFFSET;
2362
2363 if ((addr>=0x10000000 && addr<0x40000000) ||
2364 (addr>=0x50000000)) {
2365 write_log("Suspicious address in %x SEGV handler.\n",addr);
2366 }
2367
2368 target=(uae_u8*)sc.eip;
2369 for (i=0;i<5;i++)
2370 vecbuf[i]=target[i];
2371 emit_byte(0xe9);
2372 emit_long((uae_u32)veccode-(uae_u32)target-4);
2373 write_log("Create jump to %p\n",veccode);
2374
2375 write_log("Handled one access!\n");
2376 fflush(stdout);
2377 segvcount++;
2378
2379 target=veccode;
2380
2381 if (dir==SIG_READ) {
2382 switch(size) {
2383 case 1: raw_mov_b_ri(r,get_byte(addr)); break;
2384 case 2: raw_mov_w_ri(r,get_byte(addr)); break;
2385 case 4: raw_mov_l_ri(r,get_byte(addr)); break;
2386 default: abort();
2387 }
2388 }
2389 else { /* write */
2390 switch(size) {
2391 case 1: put_byte(addr,*((uae_u8*)pr)); break;
2392 case 2: put_word(addr,*((uae_u16*)pr)); break;
2393 case 4: put_long(addr,*((uae_u32*)pr)); break;
2394 default: abort();
2395 }
2396 }
2397 for (i=0;i<5;i++)
2398 raw_mov_b_mi(sc.eip+i,vecbuf[i]);
2399 raw_mov_l_mi((uae_u32)&in_handler,0);
2400 emit_byte(0xe9);
2401 emit_long(sc.eip+len-(uae_u32)target-4);
2402 in_handler=1;
2403 target=tmp;
2404 }
2405 bi=active;
2406 while (bi) {
2407 if (bi->handler &&
2408 (uae_u8*)bi->direct_handler<=i &&
2409 (uae_u8*)bi->nexthandler>i) {
2410 write_log("deleted trigger (%p<%p<%p) %p\n",
2411 bi->handler,
2412 i,
2413 bi->nexthandler,
2414 bi->pc_p);
2415 invalidate_block(bi);
2416 raise_in_cl_list(bi);
2417 set_special(0);
2418 return;
2419 }
2420 bi=bi->next;
2421 }
2422 /* Not found in the active list. Might be a rom routine that
2423 is in the dormant list */
2424 bi=dormant;
2425 while (bi) {
2426 if (bi->handler &&
2427 (uae_u8*)bi->direct_handler<=i &&
2428 (uae_u8*)bi->nexthandler>i) {
2429 write_log("deleted trigger (%p<%p<%p) %p\n",
2430 bi->handler,
2431 i,
2432 bi->nexthandler,
2433 bi->pc_p);
2434 invalidate_block(bi);
2435 raise_in_cl_list(bi);
2436 set_special(0);
2437 return;
2438 }
2439 bi=bi->next;
2440 }
2441 write_log("Huh? Could not find trigger!\n");
2442 return;
2443 }
2444 }
2445 write_log("Can't handle access!\n");
2446 for (j=0;j<10;j++) {
2447 write_log("instruction byte %2d is %02x\n",j,i[j]);
2448 }
2449 write_log("Please send the above info (starting at \"fault address\") to\n"
2450 "bmeyer@csse.monash.edu.au\n"
2451 "This shouldn't happen ;-)\n");
2452 fflush(stdout);
2453 signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
2454 }
2455 #endif
2456
2457
2458 /*************************************************************************
2459 * Checking for CPU features *
2460 *************************************************************************/
2461
2462 struct cpuinfo_x86 {
2463 uae_u8 x86; // CPU family
2464 uae_u8 x86_vendor; // CPU vendor
2465 uae_u8 x86_processor; // CPU canonical processor type
2466 uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
2467 uae_u32 x86_hwcap;
2468 uae_u8 x86_model;
2469 uae_u8 x86_mask;
2470 int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
2471 char x86_vendor_id[16];
2472 };
2473 struct cpuinfo_x86 cpuinfo;
2474
2475 enum {
2476 X86_VENDOR_INTEL = 0,
2477 X86_VENDOR_CYRIX = 1,
2478 X86_VENDOR_AMD = 2,
2479 X86_VENDOR_UMC = 3,
2480 X86_VENDOR_NEXGEN = 4,
2481 X86_VENDOR_CENTAUR = 5,
2482 X86_VENDOR_RISE = 6,
2483 X86_VENDOR_TRANSMETA = 7,
2484 X86_VENDOR_NSC = 8,
2485 X86_VENDOR_UNKNOWN = 0xff
2486 };
2487
2488 enum {
2489 X86_PROCESSOR_I386, /* 80386 */
2490 X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
2491 X86_PROCESSOR_PENTIUM,
2492 X86_PROCESSOR_PENTIUMPRO,
2493 X86_PROCESSOR_K6,
2494 X86_PROCESSOR_ATHLON,
2495 X86_PROCESSOR_PENTIUM4,
2496 X86_PROCESSOR_max
2497 };
2498
2499 static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2500 "80386",
2501 "80486",
2502 "Pentium",
2503 "PentiumPro",
2504 "K6",
2505 "Athlon",
2506 "Pentium4"
2507 };
2508
2509 static struct ptt {
2510 const int align_loop;
2511 const int align_loop_max_skip;
2512 const int align_jump;
2513 const int align_jump_max_skip;
2514 const int align_func;
2515 }
2516 x86_alignments[X86_PROCESSOR_max] = {
2517 { 4, 3, 4, 3, 4 },
2518 { 16, 15, 16, 15, 16 },
2519 { 16, 7, 16, 7, 16 },
2520 { 16, 15, 16, 7, 16 },
2521 { 32, 7, 32, 7, 32 },
2522 { 16, 7, 16, 7, 16 },
2523 { 0, 0, 0, 0, 0 }
2524 };
2525
2526 static void
2527 x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2528 {
2529 char *v = c->x86_vendor_id;
2530
2531 if (!strcmp(v, "GenuineIntel"))
2532 c->x86_vendor = X86_VENDOR_INTEL;
2533 else if (!strcmp(v, "AuthenticAMD"))
2534 c->x86_vendor = X86_VENDOR_AMD;
2535 else if (!strcmp(v, "CyrixInstead"))
2536 c->x86_vendor = X86_VENDOR_CYRIX;
2537 else if (!strcmp(v, "Geode by NSC"))
2538 c->x86_vendor = X86_VENDOR_NSC;
2539 else if (!strcmp(v, "UMC UMC UMC "))
2540 c->x86_vendor = X86_VENDOR_UMC;
2541 else if (!strcmp(v, "CentaurHauls"))
2542 c->x86_vendor = X86_VENDOR_CENTAUR;
2543 else if (!strcmp(v, "NexGenDriven"))
2544 c->x86_vendor = X86_VENDOR_NEXGEN;
2545 else if (!strcmp(v, "RiseRiseRise"))
2546 c->x86_vendor = X86_VENDOR_RISE;
2547 else if (!strcmp(v, "GenuineTMx86") ||
2548 !strcmp(v, "TransmetaCPU"))
2549 c->x86_vendor = X86_VENDOR_TRANSMETA;
2550 else
2551 c->x86_vendor = X86_VENDOR_UNKNOWN;
2552 }
2553
2554 static void
2555 cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2556 {
2557 static uae_u8 cpuid_space[256];
2558 uae_u8* tmp=get_target();
2559
2560 set_target(cpuid_space);
2561 raw_push_l_r(0); /* eax */
2562 raw_push_l_r(1); /* ecx */
2563 raw_push_l_r(2); /* edx */
2564 raw_push_l_r(3); /* ebx */
2565 raw_mov_l_rm(0,(uae_u32)&op);
2566 raw_cpuid(0);
2567 if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2568 if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2569 if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2570 if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2571 raw_pop_l_r(3);
2572 raw_pop_l_r(2);
2573 raw_pop_l_r(1);
2574 raw_pop_l_r(0);
2575 raw_ret();
2576 set_target(tmp);
2577
2578 ((cpuop_func*)cpuid_space)(0);
2579 }
2580
2581 static void
2582 raw_init_cpu(void)
2583 {
2584 struct cpuinfo_x86 *c = &cpuinfo;
2585
2586 /* Defaults */
2587 c->x86_vendor = X86_VENDOR_UNKNOWN;
2588 c->cpuid_level = -1; /* CPUID not detected */
2589 c->x86_model = c->x86_mask = 0; /* So far unknown... */
2590 c->x86_vendor_id[0] = '\0'; /* Unset */
2591 c->x86_hwcap = 0;
2592
2593 /* Get vendor name */
2594 c->x86_vendor_id[12] = '\0';
2595 cpuid(0x00000000,
2596 (uae_u32 *)&c->cpuid_level,
2597 (uae_u32 *)&c->x86_vendor_id[0],
2598 (uae_u32 *)&c->x86_vendor_id[8],
2599 (uae_u32 *)&c->x86_vendor_id[4]);
2600 x86_get_cpu_vendor(c);
2601
2602 /* Intel-defined flags: level 0x00000001 */
2603 c->x86_brand_id = 0;
2604 if ( c->cpuid_level >= 0x00000001 ) {
2605 uae_u32 tfms, brand_id;
2606 cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2607 c->x86 = (tfms >> 8) & 15;
2608 c->x86_model = (tfms >> 4) & 15;
2609 c->x86_brand_id = brand_id & 0xff;
2610 if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2611 (c->x86 == 0xf)) {
2612 /* AMD Extended Family and Model Values */
2613 c->x86 += (tfms >> 20) & 0xff;
2614 c->x86_model += (tfms >> 12) & 0xf0;
2615 }
2616 c->x86_mask = tfms & 15;
2617 } else {
2618 /* Have CPUID level 0 only - unheard of */
2619 c->x86 = 4;
2620 }
2621
2622 /* Canonicalize processor ID */
2623 c->x86_processor = X86_PROCESSOR_max;
2624 switch (c->x86) {
2625 case 3:
2626 c->x86_processor = X86_PROCESSOR_I386;
2627 break;
2628 case 4:
2629 c->x86_processor = X86_PROCESSOR_I486;
2630 break;
2631 case 5:
2632 if (c->x86_vendor == X86_VENDOR_AMD)
2633 c->x86_processor = X86_PROCESSOR_K6;
2634 else
2635 c->x86_processor = X86_PROCESSOR_PENTIUM;
2636 break;
2637 case 6:
2638 if (c->x86_vendor == X86_VENDOR_AMD)
2639 c->x86_processor = X86_PROCESSOR_ATHLON;
2640 else
2641 c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2642 break;
2643 case 15:
2644 if (c->x86_vendor == X86_VENDOR_INTEL) {
2645 /* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2646 if (c->x86_brand_id >= 8)
2647 c->x86_processor = X86_PROCESSOR_PENTIUM4;
2648 }
2649 break;
2650 }
2651 if (c->x86_processor == X86_PROCESSOR_max) {
2652 fprintf(stderr, "Error: unknown processor type\n");
2653 fprintf(stderr, " Family : %d\n", c->x86);
2654 fprintf(stderr, " Model : %d\n", c->x86_model);
2655 fprintf(stderr, " Mask : %d\n", c->x86_mask);
2656 if (c->x86_brand_id)
2657 fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
2658 abort();
2659 }
2660
2661 /* Have CMOV support? */
2662 have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2663
2664 /* Can the host CPU suffer from partial register stalls? */
2665 have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2666 #if 1
2667 /* It appears that partial register writes are a bad idea even on
2668 AMD K7 cores, even though they are not supposed to have the
2669 dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2670 if (c->x86_processor == X86_PROCESSOR_ATHLON)
2671 have_rat_stall = true;
2672 #endif
2673
2674 /* Alignments */
2675 if (tune_alignment) {
2676 align_loops = x86_alignments[c->x86_processor].align_loop;
2677 align_jumps = x86_alignments[c->x86_processor].align_jump;
2678 }
2679
2680 write_log("Max CPUID level=%d Processor is %s [%s]\n",
2681 c->cpuid_level, c->x86_vendor_id,
2682 x86_processor_string_table[c->x86_processor]);
2683 }
2684
2685
2686 /*************************************************************************
2687 * FPU stuff *
2688 *************************************************************************/
2689
2690
2691 static __inline__ void raw_fp_init(void)
2692 {
2693 int i;
2694
2695 for (i=0;i<N_FREGS;i++)
2696 live.spos[i]=-2;
2697 live.tos=-1; /* Stack is empty */
2698 }
2699
2700 static __inline__ void raw_fp_cleanup_drop(void)
2701 {
2702 #if 0
2703 /* using FINIT instead of popping all the entries.
2704 Seems to have side effects --- there is display corruption in
2705 Quake when this is used */
2706 if (live.tos>1) {
2707 emit_byte(0x9b);
2708 emit_byte(0xdb);
2709 emit_byte(0xe3);
2710 live.tos=-1;
2711 }
2712 #endif
2713 while (live.tos>=1) {
2714 emit_byte(0xde);
2715 emit_byte(0xd9);
2716 live.tos-=2;
2717 }
2718 while (live.tos>=0) {
2719 emit_byte(0xdd);
2720 emit_byte(0xd8);
2721 live.tos--;
2722 }
2723 raw_fp_init();
2724 }
2725
2726 static __inline__ void make_tos(int r)
2727 {
2728 int p,q;
2729
2730 if (live.spos[r]<0) { /* Register not yet on stack */
2731 emit_byte(0xd9);
2732 emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
2733 live.tos++;
2734 live.spos[r]=live.tos;
2735 live.onstack[live.tos]=r;
2736 return;
2737 }
2738 /* Register is on stack */
2739 if (live.tos==live.spos[r])
2740 return;
2741 p=live.spos[r];
2742 q=live.onstack[live.tos];
2743
2744 emit_byte(0xd9);
2745 emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
2746 live.onstack[live.tos]=r;
2747 live.spos[r]=live.tos;
2748 live.onstack[p]=q;
2749 live.spos[q]=p;
2750 }
2751
2752 static __inline__ void make_tos2(int r, int r2)
2753 {
2754 int q;
2755
2756 make_tos(r2); /* Put the reg that's supposed to end up in position2
2757 on top */
2758
2759 if (live.spos[r]<0) { /* Register not yet on stack */
2760 make_tos(r); /* This will extend the stack */
2761 return;
2762 }
2763 /* Register is on stack */
2764 emit_byte(0xd9);
2765 emit_byte(0xc9); /* Move r2 into position 2 */
2766
2767 q=live.onstack[live.tos-1];
2768 live.onstack[live.tos]=q;
2769 live.spos[q]=live.tos;
2770 live.onstack[live.tos-1]=r2;
2771 live.spos[r2]=live.tos-1;
2772
2773 make_tos(r); /* And r into 1 */
2774 }
2775
2776 static __inline__ int stackpos(int r)
2777 {
2778 if (live.spos[r]<0)
2779 abort();
2780 if (live.tos<live.spos[r]) {
2781 printf("Looking for spos for fnreg %d\n",r);
2782 abort();
2783 }
2784 return live.tos-live.spos[r];
2785 }
2786
2787 static __inline__ void usereg(int r)
2788 {
2789 if (live.spos[r]<0)
2790 make_tos(r);
2791 }
2792
2793 /* This is called with one FP value in a reg *above* tos, which it will
2794 pop off the stack if necessary */
2795 static __inline__ void tos_make(int r)
2796 {
2797 if (live.spos[r]<0) {
2798 live.tos++;
2799 live.spos[r]=live.tos;
2800 live.onstack[live.tos]=r;
2801 return;
2802 }
2803 emit_byte(0xdd);
2804 emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
2805 and pop it*/
2806 }
2807
2808
2809 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2810 {
2811 make_tos(r);
2812 emit_byte(0xdd);
2813 emit_byte(0x15);
2814 emit_long(m);
2815 }
2816 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2817
2818 LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
2819 {
2820 make_tos(r);
2821 emit_byte(0xdd);
2822 emit_byte(0x1d);
2823 emit_long(m);
2824 live.onstack[live.tos]=-1;
2825 live.tos--;
2826 live.spos[r]=-2;
2827 }
2828 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
2829
2830 LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2831 {
2832 emit_byte(0xdd);
2833 emit_byte(0x05);
2834 emit_long(m);
2835 tos_make(r);
2836 }
2837 LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
2838
2839 LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2840 {
2841 emit_byte(0xdb);
2842 emit_byte(0x05);
2843 emit_long(m);
2844 tos_make(r);
2845 }
2846 LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
2847
2848 LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2849 {
2850 make_tos(r);
2851 emit_byte(0xdb);
2852 emit_byte(0x15);
2853 emit_long(m);
2854 }
2855 LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
2856
2857 LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2858 {
2859 emit_byte(0xd9);
2860 emit_byte(0x05);
2861 emit_long(m);
2862 tos_make(r);
2863 }
2864 LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
2865
2866 LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2867 {
2868 make_tos(r);
2869 emit_byte(0xd9);
2870 emit_byte(0x15);
2871 emit_long(m);
2872 }
2873 LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
2874
2875 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2876 {
2877 int rs;
2878
2879 /* Stupid x87 can't write a long double to mem without popping the
2880 stack! */
2881 usereg(r);
2882 rs=stackpos(r);
2883 emit_byte(0xd9); /* Get a copy to the top of stack */
2884 emit_byte(0xc0+rs);
2885
2886 emit_byte(0xdb); /* store and pop it */
2887 emit_byte(0x3d);
2888 emit_long(m);
2889 }
2890 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2891
2892 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
2893 {
2894 int rs;
2895
2896 make_tos(r);
2897 emit_byte(0xdb); /* store and pop it */
2898 emit_byte(0x3d);
2899 emit_long(m);
2900 live.onstack[live.tos]=-1;
2901 live.tos--;
2902 live.spos[r]=-2;
2903 }
2904 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
2905
2906 LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2907 {
2908 emit_byte(0xdb);
2909 emit_byte(0x2d);
2910 emit_long(m);
2911 tos_make(r);
2912 }
2913 LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
2914
2915 LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2916 {
2917 emit_byte(0xd9);
2918 emit_byte(0xeb);
2919 tos_make(r);
2920 }
2921 LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
2922
2923 LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2924 {
2925 emit_byte(0xd9);
2926 emit_byte(0xec);
2927 tos_make(r);
2928 }
2929 LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
2930
2931 LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2932 {
2933 emit_byte(0xd9);
2934 emit_byte(0xea);
2935 tos_make(r);
2936 }
2937 LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
2938
2939 LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2940 {
2941 emit_byte(0xd9);
2942 emit_byte(0xed);
2943 tos_make(r);
2944 }
2945 LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
2946
2947 LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2948 {
2949 emit_byte(0xd9);
2950 emit_byte(0xe8);
2951 tos_make(r);
2952 }
2953 LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
2954
2955 LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2956 {
2957 emit_byte(0xd9);
2958 emit_byte(0xee);
2959 tos_make(r);
2960 }
2961 LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
2962
2963 LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2964 {
2965 int ds;
2966
2967 usereg(s);
2968 ds=stackpos(s);
2969 if (ds==0 && live.spos[d]>=0) {
2970 /* source is on top of stack, and we already have the dest */
2971 int dd=stackpos(d);
2972 emit_byte(0xdd);
2973 emit_byte(0xd0+dd);
2974 }
2975 else {
2976 emit_byte(0xd9);
2977 emit_byte(0xc0+ds); /* duplicate source on tos */
2978 tos_make(d); /* store to destination, pop if necessary */
2979 }
2980 }
2981 LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
2982
2983 LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2984 {
2985 emit_byte(0xd9);
2986 emit_byte(0xa8+index);
2987 emit_long(base);
2988 }
2989 LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
2990
2991
2992 LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
2993 {
2994 int ds;
2995
2996 if (d!=s) {
2997 usereg(s);
2998 ds=stackpos(s);
2999 emit_byte(0xd9);
3000 emit_byte(0xc0+ds); /* duplicate source */
3001 emit_byte(0xd9);
3002 emit_byte(0xfa); /* take square root */
3003 tos_make(d); /* store to destination */
3004 }
3005 else {
3006 make_tos(d);
3007 emit_byte(0xd9);
3008 emit_byte(0xfa); /* take square root */
3009 }
3010 }
3011 LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
3012
3013 LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
3014 {
3015 int ds;
3016
3017 if (d!=s) {
3018 usereg(s);
3019 ds=stackpos(s);
3020 emit_byte(0xd9);
3021 emit_byte(0xc0+ds); /* duplicate source */
3022 emit_byte(0xd9);
3023 emit_byte(0xe1); /* take fabs */
3024 tos_make(d); /* store to destination */
3025 }
3026 else {
3027 make_tos(d);
3028 emit_byte(0xd9);
3029 emit_byte(0xe1); /* take fabs */
3030 }
3031 }
3032 LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
3033
3034 LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
3035 {
3036 int ds;
3037
3038 if (d!=s) {
3039 usereg(s);
3040 ds=stackpos(s);
3041 emit_byte(0xd9);
3042 emit_byte(0xc0+ds); /* duplicate source */
3043 emit_byte(0xd9);
3044 emit_byte(0xfc); /* take frndint */
3045 tos_make(d); /* store to destination */
3046 }
3047 else {
3048 make_tos(d);
3049 emit_byte(0xd9);
3050 emit_byte(0xfc); /* take frndint */
3051 }
3052 }
3053 LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
3054
3055 LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
3056 {
3057 int ds;
3058
3059 if (d!=s) {
3060 usereg(s);
3061 ds=stackpos(s);
3062 emit_byte(0xd9);
3063 emit_byte(0xc0+ds); /* duplicate source */
3064 emit_byte(0xd9);
3065 emit_byte(0xff); /* take cos */
3066 tos_make(d); /* store to destination */
3067 }
3068 else {
3069 make_tos(d);
3070 emit_byte(0xd9);
3071 emit_byte(0xff); /* take cos */
3072 }
3073 }
3074 LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
3075
3076 LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3077 {
3078 int ds;
3079
3080 if (d!=s) {
3081 usereg(s);
3082 ds=stackpos(s);
3083 emit_byte(0xd9);
3084 emit_byte(0xc0+ds); /* duplicate source */
3085 emit_byte(0xd9);
3086 emit_byte(0xfe); /* take sin */
3087 tos_make(d); /* store to destination */
3088 }
3089 else {
3090 make_tos(d);
3091 emit_byte(0xd9);
3092 emit_byte(0xfe); /* take sin */
3093 }
3094 }
3095 LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
3096
3097 double one=1;
3098 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3099 {
3100 int ds;
3101
3102 usereg(s);
3103 ds=stackpos(s);
3104 emit_byte(0xd9);
3105 emit_byte(0xc0+ds); /* duplicate source */
3106
3107 emit_byte(0xd9);
3108 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3109 emit_byte(0xd9);
3110 emit_byte(0xfc); /* rndint */
3111 emit_byte(0xd9);
3112 emit_byte(0xc9); /* swap top two elements */
3113 emit_byte(0xd8);
3114 emit_byte(0xe1); /* subtract rounded from original */
3115 emit_byte(0xd9);
3116 emit_byte(0xf0); /* f2xm1 */
3117 emit_byte(0xdc);
3118 emit_byte(0x05);
3119 emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3120 emit_byte(0xd9);
3121 emit_byte(0xfd); /* and scale it */
3122 emit_byte(0xdd);
3123 emit_byte(0xd9); /* take he rounded value off */
3124 tos_make(d); /* store to destination */
3125 }
3126 LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
3127
3128 LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3129 {
3130 int ds;
3131
3132 usereg(s);
3133 ds=stackpos(s);
3134 emit_byte(0xd9);
3135 emit_byte(0xc0+ds); /* duplicate source */
3136 emit_byte(0xd9);
3137 emit_byte(0xea); /* fldl2e */
3138 emit_byte(0xde);
3139 emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
3140
3141 emit_byte(0xd9);
3142 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
3143 emit_byte(0xd9);
3144 emit_byte(0xfc); /* rndint */
3145 emit_byte(0xd9);
3146 emit_byte(0xc9); /* swap top two elements */
3147 emit_byte(0xd8);
3148 emit_byte(0xe1); /* subtract rounded from original */
3149 emit_byte(0xd9);
3150 emit_byte(0xf0); /* f2xm1 */
3151 emit_byte(0xdc);
3152 emit_byte(0x05);
3153 emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
3154 emit_byte(0xd9);
3155 emit_byte(0xfd); /* and scale it */
3156 emit_byte(0xdd);
3157 emit_byte(0xd9); /* take he rounded value off */
3158 tos_make(d); /* store to destination */
3159 }
3160 LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
3161
3162 LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3163 {
3164 int ds;
3165
3166 usereg(s);
3167 ds=stackpos(s);
3168 emit_byte(0xd9);
3169 emit_byte(0xc0+ds); /* duplicate source */
3170 emit_byte(0xd9);
3171 emit_byte(0xe8); /* push '1' */
3172 emit_byte(0xd9);
3173 emit_byte(0xc9); /* swap top two */
3174 emit_byte(0xd9);
3175 emit_byte(0xf1); /* take 1*log2(x) */
3176 tos_make(d); /* store to destination */
3177 }
3178 LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
3179
3180
3181 LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3182 {
3183 int ds;
3184
3185 if (d!=s) {
3186 usereg(s);
3187 ds=stackpos(s);
3188 emit_byte(0xd9);
3189 emit_byte(0xc0+ds); /* duplicate source */
3190 emit_byte(0xd9);
3191 emit_byte(0xe0); /* take fchs */
3192 tos_make(d); /* store to destination */
3193 }
3194 else {
3195 make_tos(d);
3196 emit_byte(0xd9);
3197 emit_byte(0xe0); /* take fchs */
3198 }
3199 }
3200 LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
3201
3202 LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3203 {
3204 int ds;
3205
3206 usereg(s);
3207 usereg(d);
3208
3209 if (live.spos[s]==live.tos) {
3210 /* Source is on top of stack */
3211 ds=stackpos(d);
3212 emit_byte(0xdc);
3213 emit_byte(0xc0+ds); /* add source to dest*/
3214 }
3215 else {
3216 make_tos(d);
3217 ds=stackpos(s);
3218
3219 emit_byte(0xd8);
3220 emit_byte(0xc0+ds); /* add source to dest*/
3221 }
3222 }
3223 LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
3224
3225 LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3226 {
3227 int ds;
3228
3229 usereg(s);
3230 usereg(d);
3231
3232 if (live.spos[s]==live.tos) {
3233 /* Source is on top of stack */
3234 ds=stackpos(d);
3235 emit_byte(0xdc);
3236 emit_byte(0xe8+ds); /* sub source from dest*/
3237 }
3238 else {
3239 make_tos(d);
3240 ds=stackpos(s);
3241
3242 emit_byte(0xd8);
3243 emit_byte(0xe0+ds); /* sub src from dest */
3244 }
3245 }
3246 LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
3247
3248 LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3249 {
3250 int ds;
3251
3252 usereg(s);
3253 usereg(d);
3254
3255 make_tos(d);
3256 ds=stackpos(s);
3257
3258 emit_byte(0xdd);
3259 emit_byte(0xe0+ds); /* cmp dest with source*/
3260 }
3261 LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
3262
3263 LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3264 {
3265 int ds;
3266
3267 usereg(s);
3268 usereg(d);
3269
3270 if (live.spos[s]==live.tos) {
3271 /* Source is on top of stack */
3272 ds=stackpos(d);
3273 emit_byte(0xdc);
3274 emit_byte(0xc8+ds); /* mul dest by source*/
3275 }
3276 else {
3277 make_tos(d);
3278 ds=stackpos(s);
3279
3280 emit_byte(0xd8);
3281 emit_byte(0xc8+ds); /* mul dest by source*/
3282 }
3283 }
3284 LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
3285
3286 LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3287 {
3288 int ds;
3289
3290 usereg(s);
3291 usereg(d);
3292
3293 if (live.spos[s]==live.tos) {
3294 /* Source is on top of stack */
3295 ds=stackpos(d);
3296 emit_byte(0xdc);
3297 emit_byte(0xf8+ds); /* div dest by source */
3298 }
3299 else {
3300 make_tos(d);
3301 ds=stackpos(s);
3302
3303 emit_byte(0xd8);
3304 emit_byte(0xf0+ds); /* div dest by source*/
3305 }
3306 }
3307 LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
3308
3309 LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3310 {
3311 int ds;
3312
3313 usereg(s);
3314 usereg(d);
3315
3316 make_tos2(d,s);
3317 ds=stackpos(s);
3318
3319 if (ds!=1) {
3320 printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
3321 abort();
3322 }
3323 emit_byte(0xd9);
3324 emit_byte(0xf8); /* take rem from dest by source */
3325 }
3326 LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
3327
3328 LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3329 {
3330 int ds;
3331
3332 usereg(s);
3333 usereg(d);
3334
3335 make_tos2(d,s);
3336 ds=stackpos(s);
3337
3338 if (ds!=1) {
3339 printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
3340 abort();
3341 }
3342 emit_byte(0xd9);
3343 emit_byte(0xf5); /* take rem1 from dest by source */
3344 }
3345 LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
3346
3347
3348 LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3349 {
3350 make_tos(r);
3351 emit_byte(0xd9); /* ftst */
3352 emit_byte(0xe4);
3353 }
3354 LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
3355
3356 /* %eax register is clobbered if target processor doesn't support fucomi */
3357 #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
3358 #define FFLAG_NREG EAX_INDEX
3359
3360 static __inline__ void raw_fflags_into_flags(int r)
3361 {
3362 int p;
3363
3364 usereg(r);
3365 p=stackpos(r);
3366
3367 emit_byte(0xd9);
3368 emit_byte(0xee); /* Push 0 */
3369 emit_byte(0xd9);
3370 emit_byte(0xc9+p); /* swap top two around */
3371 if (have_cmov) {
3372 // gb-- fucomi is for P6 cores only, not K6-2 then...
3373 emit_byte(0xdb);
3374 emit_byte(0xe9+p); /* fucomi them */
3375 }
3376 else {
3377 emit_byte(0xdd);
3378 emit_byte(0xe1+p); /* fucom them */
3379 emit_byte(0x9b);
3380 emit_byte(0xdf);
3381 emit_byte(0xe0); /* fstsw ax */
3382 raw_sahf(0); /* sahf */
3383 }
3384 emit_byte(0xdd);
3385 emit_byte(0xd9+p); /* store value back, and get rid of 0 */
3386 }