ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.41
Committed: 2008-02-16T22:15:00Z (16 years, 7 months ago) by gbeauche
Branch: MAIN
CVS Tags: HEAD
Changes since 1.40: +4 -4 lines
Log Message:
Cope with assembler updates.

File Contents

# Content
1 /*
2 * compiler/codegen_x86.cpp - IA-32 code generator
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2008 Christian Bauer
10 *
11 * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 /* This should eventually end up in machdep/, but for now, x86 is the
29 only target, and it's easier this way... */
30
31 #include "flags_x86.h"
32
33 /*************************************************************************
34 * Some basic information about the the target CPU *
35 *************************************************************************/
36
37 #define EAX_INDEX 0
38 #define ECX_INDEX 1
39 #define EDX_INDEX 2
40 #define EBX_INDEX 3
41 #define ESP_INDEX 4
42 #define EBP_INDEX 5
43 #define ESI_INDEX 6
44 #define EDI_INDEX 7
45 #if defined(__x86_64__)
46 #define R8_INDEX 8
47 #define R9_INDEX 9
48 #define R10_INDEX 10
49 #define R11_INDEX 11
50 #define R12_INDEX 12
51 #define R13_INDEX 13
52 #define R14_INDEX 14
53 #define R15_INDEX 15
54 #endif
55 /* XXX this has to match X86_Reg8H_Base + 4 */
56 #define AH_INDEX (0x10+4+EAX_INDEX)
57 #define CH_INDEX (0x10+4+ECX_INDEX)
58 #define DH_INDEX (0x10+4+EDX_INDEX)
59 #define BH_INDEX (0x10+4+EBX_INDEX)
60
61 /* The register in which subroutines return an integer return value */
62 #define REG_RESULT EAX_INDEX
63
64 /* The registers subroutines take their first and second argument in */
65 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66 /* Handle the _fastcall parameters of ECX and EDX */
67 #define REG_PAR1 ECX_INDEX
68 #define REG_PAR2 EDX_INDEX
69 #elif defined(__x86_64__)
70 #define REG_PAR1 EDI_INDEX
71 #define REG_PAR2 ESI_INDEX
72 #else
73 #define REG_PAR1 EAX_INDEX
74 #define REG_PAR2 EDX_INDEX
75 #endif
76
77 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
78 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
79 #define REG_PC_TMP EAX_INDEX
80 #else
81 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
82 #endif
83
84 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
85 -1 if any reg will do */
86 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
87 #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
88
89 #define STACK_ALIGN 16
90 #define STACK_OFFSET sizeof(void *)
91
92 uae_s8 always_used[]={4,-1};
93 #if defined(__x86_64__)
94 uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
95 uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
96 #else
97 uae_s8 can_byte[]={0,1,2,3,-1};
98 uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
99 #endif
100
101 #if USE_OPTIMIZED_CALLS
102 /* Make sure interpretive core does not use cpuopti */
103 uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
104 #error FIXME: code not ready
105 #else
106 /* cpuopti mutate instruction handlers to assume registers are saved
107 by the caller */
108 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
109 #endif
110
111 /* This *should* be the same as call_saved. But:
112 - We might not really know which registers are saved, and which aren't,
113 so we need to preserve some, but don't want to rely on everyone else
114 also saving those registers
115 - Special registers (such like the stack pointer) should not be "preserved"
116 by pushing, even though they are "saved" across function calls
117 */
118 #if defined(__x86_64__)
119 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
120 /* preserve r11 because it's generally used to hold pointers to functions */
121 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
122 #else
123 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
124 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
125 #endif
126
127 /* Whether classes of instructions do or don't clobber the native flags */
128 #define CLOBBER_MOV
129 #define CLOBBER_LEA
130 #define CLOBBER_CMOV
131 #define CLOBBER_POP
132 #define CLOBBER_PUSH
133 #define CLOBBER_SUB clobber_flags()
134 #define CLOBBER_SBB clobber_flags()
135 #define CLOBBER_CMP clobber_flags()
136 #define CLOBBER_ADD clobber_flags()
137 #define CLOBBER_ADC clobber_flags()
138 #define CLOBBER_AND clobber_flags()
139 #define CLOBBER_OR clobber_flags()
140 #define CLOBBER_XOR clobber_flags()
141
142 #define CLOBBER_ROL clobber_flags()
143 #define CLOBBER_ROR clobber_flags()
144 #define CLOBBER_SHLL clobber_flags()
145 #define CLOBBER_SHRL clobber_flags()
146 #define CLOBBER_SHRA clobber_flags()
147 #define CLOBBER_TEST clobber_flags()
148 #define CLOBBER_CL16
149 #define CLOBBER_CL8
150 #define CLOBBER_SE32
151 #define CLOBBER_SE16
152 #define CLOBBER_SE8
153 #define CLOBBER_ZE32
154 #define CLOBBER_ZE16
155 #define CLOBBER_ZE8
156 #define CLOBBER_SW16 clobber_flags()
157 #define CLOBBER_SW32
158 #define CLOBBER_SETCC
159 #define CLOBBER_MUL clobber_flags()
160 #define CLOBBER_BT clobber_flags()
161 #define CLOBBER_BSF clobber_flags()
162
163 /* The older code generator is now deprecated. */
164 #define USE_NEW_RTASM 1
165
166 #if USE_NEW_RTASM
167
168 #if defined(__x86_64__)
169 #define X86_TARGET_64BIT 1
170 /* The address override prefix causes a 5 cycles penalty on Intel Core
171 processors. Another solution would be to decompose the load in an LEA,
172 MOV (to zero-extend), MOV (from memory): is it better? */
173 #define ADDR32 x86_emit_byte(0x67),
174 #else
175 #define ADDR32 /**/
176 #endif
177 #define X86_FLAT_REGISTERS 0
178 #define X86_OPTIMIZE_ALU 1
179 #define X86_OPTIMIZE_ROTSHI 1
180 #include "codegen_x86.h"
181
182 #define x86_emit_byte(B) emit_byte(B)
183 #define x86_emit_word(W) emit_word(W)
184 #define x86_emit_long(L) emit_long(L)
185 #define x86_emit_quad(Q) emit_quad(Q)
186 #define x86_get_target() get_target()
187 #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
188
189 static void jit_fail(const char *msg, const char *file, int line, const char *function)
190 {
191 fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
192 function, file, line, msg);
193 abort();
194 }
195
196 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
197 {
198 #if defined(__x86_64__)
199 PUSHQr(r);
200 #else
201 PUSHLr(r);
202 #endif
203 }
204 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
205
206 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
207 {
208 #if defined(__x86_64__)
209 POPQr(r);
210 #else
211 POPLr(r);
212 #endif
213 }
214 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
215
216 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
217 {
218 #if defined(__x86_64__)
219 POPQm(d, X86_NOREG, X86_NOREG, 1);
220 #else
221 POPLm(d, X86_NOREG, X86_NOREG, 1);
222 #endif
223 }
224 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
225
226 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
227 {
228 BTLir(i, r);
229 }
230 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
231
232 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
233 {
234 BTLrr(b, r);
235 }
236 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
237
238 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
239 {
240 BTCLir(i, r);
241 }
242 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
243
244 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
245 {
246 BTCLrr(b, r);
247 }
248 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
249
250 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
251 {
252 BTRLir(i, r);
253 }
254 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
255
256 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
257 {
258 BTRLrr(b, r);
259 }
260 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
261
262 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
263 {
264 BTSLir(i, r);
265 }
266 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
267
268 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
269 {
270 BTSLrr(b, r);
271 }
272 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
273
274 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
275 {
276 SUBWir(i, d);
277 }
278 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
279
280 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
281 {
282 MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
283 }
284 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
285
286 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
287 {
288 MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
289 }
290 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
291
292 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
293 {
294 MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
295 }
296 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
297
298 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
299 {
300 MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
301 }
302 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
303
304 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
305 {
306 ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
307 }
308 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
309
310 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
311 {
312 ROLBir(i, r);
313 }
314 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
315
316 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
317 {
318 ROLWir(i, r);
319 }
320 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
321
322 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
323 {
324 ROLLir(i, r);
325 }
326 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
327
328 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
329 {
330 ROLLrr(r, d);
331 }
332 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
333
334 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
335 {
336 ROLWrr(r, d);
337 }
338 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
339
340 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
341 {
342 ROLBrr(r, d);
343 }
344 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
345
346 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
347 {
348 SHLLrr(r, d);
349 }
350 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
351
352 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
353 {
354 SHLWrr(r, d);
355 }
356 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
357
358 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
359 {
360 SHLBrr(r, d);
361 }
362 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
363
364 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
365 {
366 RORBir(i, r);
367 }
368 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
369
370 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
371 {
372 RORWir(i, r);
373 }
374 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
375
376 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
377 {
378 ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
379 }
380 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
381
382 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
383 {
384 RORLir(i, r);
385 }
386 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
387
388 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
389 {
390 RORLrr(r, d);
391 }
392 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
393
394 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
395 {
396 RORWrr(r, d);
397 }
398 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
399
400 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
401 {
402 RORBrr(r, d);
403 }
404 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
405
406 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
407 {
408 SHRLrr(r, d);
409 }
410 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
411
412 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
413 {
414 SHRWrr(r, d);
415 }
416 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
417
418 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
419 {
420 SHRBrr(r, d);
421 }
422 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
423
424 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
425 {
426 SARLrr(r, d);
427 }
428 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
429
430 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
431 {
432 SARWrr(r, d);
433 }
434 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
435
436 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
437 {
438 SARBrr(r, d);
439 }
440 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
441
442 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
443 {
444 SHLLir(i, r);
445 }
446 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
447
448 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
449 {
450 SHLWir(i, r);
451 }
452 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
453
454 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
455 {
456 SHLBir(i, r);
457 }
458 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
459
460 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
461 {
462 SHRLir(i, r);
463 }
464 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
465
466 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
467 {
468 SHRWir(i, r);
469 }
470 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
471
472 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
473 {
474 SHRBir(i, r);
475 }
476 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
477
478 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
479 {
480 SARLir(i, r);
481 }
482 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
483
484 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
485 {
486 SARWir(i, r);
487 }
488 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
489
490 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
491 {
492 SARBir(i, r);
493 }
494 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
495
496 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
497 {
498 SAHF();
499 }
500 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
501
502 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
503 {
504 CPUID();
505 }
506 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
507
508 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
509 {
510 LAHF();
511 }
512 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
513
514 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
515 {
516 SETCCir(cc, d);
517 }
518 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
519
520 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
521 {
522 SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
523 }
524 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
525
526 LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
527 {
528 /* replacement using branch and mov */
529 int8 *target_p = (int8 *)x86_get_target() + 1;
530 JCCSii(cc^1, 0);
531 MOVBrr(s, d);
532 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
533 }
534 LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
535
536 LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
537 {
538 if (have_cmov)
539 CMOVWrr(cc, s, d);
540 else { /* replacement using branch and mov */
541 int8 *target_p = (int8 *)x86_get_target() + 1;
542 JCCSii(cc^1, 0);
543 MOVWrr(s, d);
544 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
545 }
546 }
547 LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
548
549 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
550 {
551 if (have_cmov)
552 CMOVLrr(cc, s, d);
553 else { /* replacement using branch and mov */
554 int8 *target_p = (int8 *)x86_get_target() + 1;
555 JCCSii(cc^1, 0);
556 MOVLrr(s, d);
557 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
558 }
559 }
560 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
561
562 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
563 {
564 BSFLrr(s, d);
565 }
566 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
567
568 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
569 {
570 MOVSLQrr(s, d);
571 }
572 LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
573
574 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
575 {
576 MOVSWLrr(s, d);
577 }
578 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
579
580 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
581 {
582 MOVSBLrr(s, d);
583 }
584 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
585
586 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
587 {
588 MOVZWLrr(s, d);
589 }
590 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
591
592 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
593 {
594 MOVZBLrr(s, d);
595 }
596 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
597
598 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
599 {
600 IMULLrr(s, d);
601 }
602 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
603
604 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
605 {
606 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
607 write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
608 abort();
609 }
610 IMULLr(s);
611 }
612 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
613
614 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
615 {
616 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
617 write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
618 abort();
619 }
620 MULLr(s);
621 }
622 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
623
624 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
625 {
626 abort(); /* %^$&%^$%#^ x86! */
627 }
628 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
629
630 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
631 {
632 MOVBrr(s, d);
633 }
634 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
635
636 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
637 {
638 MOVWrr(s, d);
639 }
640 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
641
642 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
643 {
644 ADDR32 MOVLmr(0, baser, index, factor, d);
645 }
646 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
647
648 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
649 {
650 ADDR32 MOVWmr(0, baser, index, factor, d);
651 }
652 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
653
654 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
655 {
656 ADDR32 MOVBmr(0, baser, index, factor, d);
657 }
658 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
659
660 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
661 {
662 ADDR32 MOVLrm(s, 0, baser, index, factor);
663 }
664 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
665
666 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
667 {
668 ADDR32 MOVWrm(s, 0, baser, index, factor);
669 }
670 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
671
672 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
673 {
674 ADDR32 MOVBrm(s, 0, baser, index, factor);
675 }
676 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
677
678 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
679 {
680 ADDR32 MOVLrm(s, base, baser, index, factor);
681 }
682 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
683
684 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
685 {
686 ADDR32 MOVWrm(s, base, baser, index, factor);
687 }
688 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
689
690 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
691 {
692 ADDR32 MOVBrm(s, base, baser, index, factor);
693 }
694 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
695
696 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
697 {
698 ADDR32 MOVLmr(base, baser, index, factor, d);
699 }
700 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
701
702 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
703 {
704 ADDR32 MOVWmr(base, baser, index, factor, d);
705 }
706 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
707
708 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
709 {
710 ADDR32 MOVBmr(base, baser, index, factor, d);
711 }
712 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
713
714 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
715 {
716 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
717 }
718 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
719
720 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
721 {
722 if (have_cmov)
723 ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
724 else { /* replacement using branch and mov */
725 int8 *target_p = (int8 *)x86_get_target() + 1;
726 JCCSii(cond^1, 0);
727 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
728 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
729 }
730 }
731 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
732
733 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
734 {
735 if (have_cmov)
736 CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
737 else { /* replacement using branch and mov */
738 int8 *target_p = (int8 *)x86_get_target() + 1;
739 JCCSii(cond^1, 0);
740 MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
741 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
742 }
743 }
744 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
745
746 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
747 {
748 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
749 }
750 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
751
752 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
753 {
754 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
755 }
756 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
757
758 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
759 {
760 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
761 }
762 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
763
764 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
765 {
766 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
767 }
768 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
769
770 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
771 {
772 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
773 }
774 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
775
776 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
777 {
778 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
779 }
780 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
781
782 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
783 {
784 ADDR32 MOVLim(i, offset, d, X86_NOREG, 1);
785 }
786 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
787
788 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
789 {
790 ADDR32 MOVWim(i, offset, d, X86_NOREG, 1);
791 }
792 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
793
794 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
795 {
796 ADDR32 MOVBim(i, offset, d, X86_NOREG, 1);
797 }
798 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
799
800 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
801 {
802 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
803 }
804 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
805
806 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
807 {
808 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
809 }
810 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
811
812 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
813 {
814 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
815 }
816 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
817
818 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
819 {
820 LEALmr(offset, s, X86_NOREG, 1, d);
821 }
822 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
823
824 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
825 {
826 LEALmr(offset, s, index, factor, d);
827 }
828 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
829
830 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
831 {
832 LEALmr(0, s, index, factor, d);
833 }
834 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
835
836 LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
837 {
838 LEALmr(0, X86_NOREG, index, factor, d);
839 }
840 LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
841
842 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
843 {
844 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
845 }
846 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
847
848 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
849 {
850 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
851 }
852 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
853
854 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
855 {
856 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
857 }
858 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
859
860 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
861 {
862 BSWAPLr(r);
863 }
864 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
865
866 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
867 {
868 ROLWir(8, r);
869 }
870 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
871
872 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
873 {
874 MOVLrr(s, d);
875 }
876 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
877
878 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
879 {
880 MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
881 }
882 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
883
884 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
885 {
886 MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
887 }
888 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
889
890 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
891 {
892 MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
893 }
894 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
895
896 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
897 {
898 MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
899 }
900 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
901
902 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
903 {
904 MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
905 }
906 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
907
908 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
909 {
910 MOVLir(s, d);
911 }
912 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
913
914 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
915 {
916 MOVWir(s, d);
917 }
918 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
919
920 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
921 {
922 MOVBir(s, d);
923 }
924 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
925
926 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
927 {
928 ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
929 }
930 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
931
932 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
933 {
934 ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
935 }
936 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
937
938 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
939 {
940 ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
941 }
942 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
943
944 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
945 {
946 ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
947 }
948 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
949
950 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
951 {
952 TESTLir(i, d);
953 }
954 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
955
956 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
957 {
958 TESTLrr(s, d);
959 }
960 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
961
962 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
963 {
964 TESTWrr(s, d);
965 }
966 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
967
968 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
969 {
970 TESTBrr(s, d);
971 }
972 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
973
974 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
975 {
976 XORLir(i, d);
977 }
978 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
979
980 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
981 {
982 ANDLir(i, d);
983 }
984 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
985
986 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
987 {
988 ANDWir(i, d);
989 }
990 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
991
992 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
993 {
994 ANDLrr(s, d);
995 }
996 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
997
998 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
999 {
1000 ANDWrr(s, d);
1001 }
1002 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1003
1004 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1005 {
1006 ANDBrr(s, d);
1007 }
1008 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1009
1010 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1011 {
1012 ORLir(i, d);
1013 }
1014 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1015
1016 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1017 {
1018 ORLrr(s, d);
1019 }
1020 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1021
1022 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1023 {
1024 ORWrr(s, d);
1025 }
1026 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1027
1028 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1029 {
1030 ORBrr(s, d);
1031 }
1032 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1033
1034 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1035 {
1036 ADCLrr(s, d);
1037 }
1038 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1039
1040 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1041 {
1042 ADCWrr(s, d);
1043 }
1044 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1045
1046 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1047 {
1048 ADCBrr(s, d);
1049 }
1050 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1051
1052 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1053 {
1054 ADDLrr(s, d);
1055 }
1056 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1057
1058 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1059 {
1060 ADDWrr(s, d);
1061 }
1062 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1063
1064 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1065 {
1066 ADDBrr(s, d);
1067 }
1068 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1069
1070 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1071 {
1072 SUBLir(i, d);
1073 }
1074 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1075
1076 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1077 {
1078 SUBBir(i, d);
1079 }
1080 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1081
1082 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1083 {
1084 ADDLir(i, d);
1085 }
1086 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1087
1088 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1089 {
1090 ADDWir(i, d);
1091 }
1092 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1093
1094 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1095 {
1096 ADDBir(i, d);
1097 }
1098 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1099
1100 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1101 {
1102 SBBLrr(s, d);
1103 }
1104 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1105
1106 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1107 {
1108 SBBWrr(s, d);
1109 }
1110 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1111
1112 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1113 {
1114 SBBBrr(s, d);
1115 }
1116 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1117
1118 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1119 {
1120 SUBLrr(s, d);
1121 }
1122 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1123
1124 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1125 {
1126 SUBWrr(s, d);
1127 }
1128 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1129
1130 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1131 {
1132 SUBBrr(s, d);
1133 }
1134 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1135
1136 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1137 {
1138 CMPLrr(s, d);
1139 }
1140 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1141
1142 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1143 {
1144 CMPLir(i, r);
1145 }
1146 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1147
1148 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1149 {
1150 CMPWrr(s, d);
1151 }
1152 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1153
1154 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1155 {
1156 CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1157 }
1158 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1159
1160 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1161 {
1162 CMPBir(i, d);
1163 }
1164 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1165
1166 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1167 {
1168 CMPBrr(s, d);
1169 }
1170 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1171
1172 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1173 {
1174 ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d);
1175 }
1176 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1177
1178 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1179 {
1180 XORLrr(s, d);
1181 }
1182 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1183
1184 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1185 {
1186 XORWrr(s, d);
1187 }
1188 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1189
1190 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1191 {
1192 XORBrr(s, d);
1193 }
1194 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1195
1196 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1197 {
1198 SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1199 }
1200 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1201
1202 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1203 {
1204 CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1205 }
1206 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1207
1208 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1209 {
1210 XCHGLrr(r2, r1);
1211 }
1212 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1213
1214 LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1215 {
1216 XCHGBrr(r2, r1);
1217 }
1218 LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1219
1220 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1221 {
1222 PUSHF();
1223 }
1224 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1225
1226 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1227 {
1228 POPF();
1229 }
1230 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1231
1232 /* Generate floating-point instructions */
1233 static inline void x86_fadd_m(MEMR s)
1234 {
1235 FADDDm(s,X86_NOREG,X86_NOREG,1);
1236 }
1237
1238 #else
1239
1240 const bool optimize_accum = true;
1241 const bool optimize_imm8 = true;
1242 const bool optimize_shift_once = true;
1243
1244 /*************************************************************************
1245 * Actual encoding of the instructions on the target CPU *
1246 *************************************************************************/
1247
1248 static __inline__ int isaccum(int r)
1249 {
1250 return (r == EAX_INDEX);
1251 }
1252
1253 static __inline__ int isbyte(uae_s32 x)
1254 {
1255 return (x>=-128 && x<=127);
1256 }
1257
1258 static __inline__ int isword(uae_s32 x)
1259 {
1260 return (x>=-32768 && x<=32767);
1261 }
1262
1263 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1264 {
1265 emit_byte(0x50+r);
1266 }
1267 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1268
1269 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1270 {
1271 emit_byte(0x58+r);
1272 }
1273 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1274
1275 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1276 {
1277 emit_byte(0x8f);
1278 emit_byte(0x05);
1279 emit_long(d);
1280 }
1281 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1282
1283 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1284 {
1285 emit_byte(0x0f);
1286 emit_byte(0xba);
1287 emit_byte(0xe0+r);
1288 emit_byte(i);
1289 }
1290 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1291
1292 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1293 {
1294 emit_byte(0x0f);
1295 emit_byte(0xa3);
1296 emit_byte(0xc0+8*b+r);
1297 }
1298 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1299
1300 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1301 {
1302 emit_byte(0x0f);
1303 emit_byte(0xba);
1304 emit_byte(0xf8+r);
1305 emit_byte(i);
1306 }
1307 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1308
1309 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1310 {
1311 emit_byte(0x0f);
1312 emit_byte(0xbb);
1313 emit_byte(0xc0+8*b+r);
1314 }
1315 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1316
1317
1318 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1319 {
1320 emit_byte(0x0f);
1321 emit_byte(0xba);
1322 emit_byte(0xf0+r);
1323 emit_byte(i);
1324 }
1325 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1326
1327 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1328 {
1329 emit_byte(0x0f);
1330 emit_byte(0xb3);
1331 emit_byte(0xc0+8*b+r);
1332 }
1333 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1334
1335 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1336 {
1337 emit_byte(0x0f);
1338 emit_byte(0xba);
1339 emit_byte(0xe8+r);
1340 emit_byte(i);
1341 }
1342 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1343
1344 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1345 {
1346 emit_byte(0x0f);
1347 emit_byte(0xab);
1348 emit_byte(0xc0+8*b+r);
1349 }
1350 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1351
1352 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1353 {
1354 emit_byte(0x66);
1355 if (isbyte(i)) {
1356 emit_byte(0x83);
1357 emit_byte(0xe8+d);
1358 emit_byte(i);
1359 }
1360 else {
1361 if (optimize_accum && isaccum(d))
1362 emit_byte(0x2d);
1363 else {
1364 emit_byte(0x81);
1365 emit_byte(0xe8+d);
1366 }
1367 emit_word(i);
1368 }
1369 }
1370 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1371
1372
1373 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1374 {
1375 emit_byte(0x8b);
1376 emit_byte(0x05+8*d);
1377 emit_long(s);
1378 }
1379 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1380
1381 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1382 {
1383 emit_byte(0xc7);
1384 emit_byte(0x05);
1385 emit_long(d);
1386 emit_long(s);
1387 }
1388 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1389
1390 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1391 {
1392 emit_byte(0x66);
1393 emit_byte(0xc7);
1394 emit_byte(0x05);
1395 emit_long(d);
1396 emit_word(s);
1397 }
1398 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1399
1400 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1401 {
1402 emit_byte(0xc6);
1403 emit_byte(0x05);
1404 emit_long(d);
1405 emit_byte(s);
1406 }
1407 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1408
1409 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1410 {
1411 if (optimize_shift_once && (i == 1)) {
1412 emit_byte(0xd0);
1413 emit_byte(0x05);
1414 emit_long(d);
1415 }
1416 else {
1417 emit_byte(0xc0);
1418 emit_byte(0x05);
1419 emit_long(d);
1420 emit_byte(i);
1421 }
1422 }
1423 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1424
1425 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1426 {
1427 if (optimize_shift_once && (i == 1)) {
1428 emit_byte(0xd0);
1429 emit_byte(0xc0+r);
1430 }
1431 else {
1432 emit_byte(0xc0);
1433 emit_byte(0xc0+r);
1434 emit_byte(i);
1435 }
1436 }
1437 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1438
1439 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1440 {
1441 emit_byte(0x66);
1442 emit_byte(0xc1);
1443 emit_byte(0xc0+r);
1444 emit_byte(i);
1445 }
1446 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1447
1448 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1449 {
1450 if (optimize_shift_once && (i == 1)) {
1451 emit_byte(0xd1);
1452 emit_byte(0xc0+r);
1453 }
1454 else {
1455 emit_byte(0xc1);
1456 emit_byte(0xc0+r);
1457 emit_byte(i);
1458 }
1459 }
1460 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1461
1462 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1463 {
1464 emit_byte(0xd3);
1465 emit_byte(0xc0+d);
1466 }
1467 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1468
1469 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1470 {
1471 emit_byte(0x66);
1472 emit_byte(0xd3);
1473 emit_byte(0xc0+d);
1474 }
1475 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1476
1477 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1478 {
1479 emit_byte(0xd2);
1480 emit_byte(0xc0+d);
1481 }
1482 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1483
1484 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1485 {
1486 emit_byte(0xd3);
1487 emit_byte(0xe0+d);
1488 }
1489 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1490
1491 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1492 {
1493 emit_byte(0x66);
1494 emit_byte(0xd3);
1495 emit_byte(0xe0+d);
1496 }
1497 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1498
1499 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1500 {
1501 emit_byte(0xd2);
1502 emit_byte(0xe0+d);
1503 }
1504 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1505
1506 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1507 {
1508 if (optimize_shift_once && (i == 1)) {
1509 emit_byte(0xd0);
1510 emit_byte(0xc8+r);
1511 }
1512 else {
1513 emit_byte(0xc0);
1514 emit_byte(0xc8+r);
1515 emit_byte(i);
1516 }
1517 }
1518 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1519
1520 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1521 {
1522 emit_byte(0x66);
1523 emit_byte(0xc1);
1524 emit_byte(0xc8+r);
1525 emit_byte(i);
1526 }
1527 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1528
1529 // gb-- used for making an fpcr value in compemu_fpp.cpp
1530 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1531 {
1532 emit_byte(0x0b);
1533 emit_byte(0x05+8*d);
1534 emit_long(s);
1535 }
1536 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1537
1538 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1539 {
1540 if (optimize_shift_once && (i == 1)) {
1541 emit_byte(0xd1);
1542 emit_byte(0xc8+r);
1543 }
1544 else {
1545 emit_byte(0xc1);
1546 emit_byte(0xc8+r);
1547 emit_byte(i);
1548 }
1549 }
1550 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1551
1552 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1553 {
1554 emit_byte(0xd3);
1555 emit_byte(0xc8+d);
1556 }
1557 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1558
1559 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1560 {
1561 emit_byte(0x66);
1562 emit_byte(0xd3);
1563 emit_byte(0xc8+d);
1564 }
1565 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1566
1567 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1568 {
1569 emit_byte(0xd2);
1570 emit_byte(0xc8+d);
1571 }
1572 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1573
1574 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1575 {
1576 emit_byte(0xd3);
1577 emit_byte(0xe8+d);
1578 }
1579 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1580
1581 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1582 {
1583 emit_byte(0x66);
1584 emit_byte(0xd3);
1585 emit_byte(0xe8+d);
1586 }
1587 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1588
1589 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1590 {
1591 emit_byte(0xd2);
1592 emit_byte(0xe8+d);
1593 }
1594 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1595
1596 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1597 {
1598 emit_byte(0xd3);
1599 emit_byte(0xf8+d);
1600 }
1601 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1602
1603 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1604 {
1605 emit_byte(0x66);
1606 emit_byte(0xd3);
1607 emit_byte(0xf8+d);
1608 }
1609 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1610
1611 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1612 {
1613 emit_byte(0xd2);
1614 emit_byte(0xf8+d);
1615 }
1616 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1617
1618 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1619 {
1620 if (optimize_shift_once && (i == 1)) {
1621 emit_byte(0xd1);
1622 emit_byte(0xe0+r);
1623 }
1624 else {
1625 emit_byte(0xc1);
1626 emit_byte(0xe0+r);
1627 emit_byte(i);
1628 }
1629 }
1630 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1631
1632 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1633 {
1634 emit_byte(0x66);
1635 emit_byte(0xc1);
1636 emit_byte(0xe0+r);
1637 emit_byte(i);
1638 }
1639 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1640
1641 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1642 {
1643 if (optimize_shift_once && (i == 1)) {
1644 emit_byte(0xd0);
1645 emit_byte(0xe0+r);
1646 }
1647 else {
1648 emit_byte(0xc0);
1649 emit_byte(0xe0+r);
1650 emit_byte(i);
1651 }
1652 }
1653 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1654
1655 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1656 {
1657 if (optimize_shift_once && (i == 1)) {
1658 emit_byte(0xd1);
1659 emit_byte(0xe8+r);
1660 }
1661 else {
1662 emit_byte(0xc1);
1663 emit_byte(0xe8+r);
1664 emit_byte(i);
1665 }
1666 }
1667 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1668
1669 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1670 {
1671 emit_byte(0x66);
1672 emit_byte(0xc1);
1673 emit_byte(0xe8+r);
1674 emit_byte(i);
1675 }
1676 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1677
1678 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1679 {
1680 if (optimize_shift_once && (i == 1)) {
1681 emit_byte(0xd0);
1682 emit_byte(0xe8+r);
1683 }
1684 else {
1685 emit_byte(0xc0);
1686 emit_byte(0xe8+r);
1687 emit_byte(i);
1688 }
1689 }
1690 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1691
1692 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1693 {
1694 if (optimize_shift_once && (i == 1)) {
1695 emit_byte(0xd1);
1696 emit_byte(0xf8+r);
1697 }
1698 else {
1699 emit_byte(0xc1);
1700 emit_byte(0xf8+r);
1701 emit_byte(i);
1702 }
1703 }
1704 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1705
1706 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1707 {
1708 emit_byte(0x66);
1709 emit_byte(0xc1);
1710 emit_byte(0xf8+r);
1711 emit_byte(i);
1712 }
1713 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1714
1715 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1716 {
1717 if (optimize_shift_once && (i == 1)) {
1718 emit_byte(0xd0);
1719 emit_byte(0xf8+r);
1720 }
1721 else {
1722 emit_byte(0xc0);
1723 emit_byte(0xf8+r);
1724 emit_byte(i);
1725 }
1726 }
1727 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1728
1729 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1730 {
1731 emit_byte(0x9e);
1732 }
1733 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1734
1735 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1736 {
1737 emit_byte(0x0f);
1738 emit_byte(0xa2);
1739 }
1740 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1741
1742 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1743 {
1744 emit_byte(0x9f);
1745 }
1746 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1747
1748 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1749 {
1750 emit_byte(0x0f);
1751 emit_byte(0x90+cc);
1752 emit_byte(0xc0+d);
1753 }
1754 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1755
1756 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1757 {
1758 emit_byte(0x0f);
1759 emit_byte(0x90+cc);
1760 emit_byte(0x05);
1761 emit_long(d);
1762 }
1763 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1764
1765 LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
1766 {
1767 /* replacement using branch and mov */
1768 int uncc=(cc^1);
1769 emit_byte(0x70+uncc);
1770 emit_byte(3); /* skip next 2 bytes if not cc=true */
1771 emit_byte(0x88);
1772 emit_byte(0xc0+8*s+d);
1773 }
1774 LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
1775
1776 LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
1777 {
1778 if (have_cmov) {
1779 emit_byte(0x66);
1780 emit_byte(0x0f);
1781 emit_byte(0x40+cc);
1782 emit_byte(0xc0+8*d+s);
1783 }
1784 else { /* replacement using branch and mov */
1785 int uncc=(cc^1);
1786 emit_byte(0x70+uncc);
1787 emit_byte(3); /* skip next 3 bytes if not cc=true */
1788 emit_byte(0x66);
1789 emit_byte(0x89);
1790 emit_byte(0xc0+8*s+d);
1791 }
1792 }
1793 LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
1794
1795 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1796 {
1797 if (have_cmov) {
1798 emit_byte(0x0f);
1799 emit_byte(0x40+cc);
1800 emit_byte(0xc0+8*d+s);
1801 }
1802 else { /* replacement using branch and mov */
1803 int uncc=(cc^1);
1804 emit_byte(0x70+uncc);
1805 emit_byte(2); /* skip next 2 bytes if not cc=true */
1806 emit_byte(0x89);
1807 emit_byte(0xc0+8*s+d);
1808 }
1809 }
1810 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1811
1812 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1813 {
1814 emit_byte(0x0f);
1815 emit_byte(0xbc);
1816 emit_byte(0xc0+8*d+s);
1817 }
1818 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1819
1820 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1821 {
1822 emit_byte(0x0f);
1823 emit_byte(0xbf);
1824 emit_byte(0xc0+8*d+s);
1825 }
1826 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1827
1828 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1829 {
1830 emit_byte(0x0f);
1831 emit_byte(0xbe);
1832 emit_byte(0xc0+8*d+s);
1833 }
1834 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1835
1836 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1837 {
1838 emit_byte(0x0f);
1839 emit_byte(0xb7);
1840 emit_byte(0xc0+8*d+s);
1841 }
1842 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1843
1844 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1845 {
1846 emit_byte(0x0f);
1847 emit_byte(0xb6);
1848 emit_byte(0xc0+8*d+s);
1849 }
1850 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1851
1852 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1853 {
1854 emit_byte(0x0f);
1855 emit_byte(0xaf);
1856 emit_byte(0xc0+8*d+s);
1857 }
1858 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1859
1860 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1861 {
1862 if (d!=MUL_NREG1 || s!=MUL_NREG2)
1863 abort();
1864 emit_byte(0xf7);
1865 emit_byte(0xea);
1866 }
1867 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1868
1869 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1870 {
1871 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1872 printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1873 abort();
1874 }
1875 emit_byte(0xf7);
1876 emit_byte(0xe2);
1877 }
1878 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1879
1880 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1881 {
1882 abort(); /* %^$&%^$%#^ x86! */
1883 emit_byte(0x0f);
1884 emit_byte(0xaf);
1885 emit_byte(0xc0+8*d+s);
1886 }
1887 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1888
1889 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1890 {
1891 emit_byte(0x88);
1892 emit_byte(0xc0+8*s+d);
1893 }
1894 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1895
1896 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1897 {
1898 emit_byte(0x66);
1899 emit_byte(0x89);
1900 emit_byte(0xc0+8*s+d);
1901 }
1902 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1903
1904 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1905 {
1906 int isebp=(baser==5)?0x40:0;
1907 int fi;
1908
1909 switch(factor) {
1910 case 1: fi=0; break;
1911 case 2: fi=1; break;
1912 case 4: fi=2; break;
1913 case 8: fi=3; break;
1914 default: abort();
1915 }
1916
1917
1918 emit_byte(0x8b);
1919 emit_byte(0x04+8*d+isebp);
1920 emit_byte(baser+8*index+0x40*fi);
1921 if (isebp)
1922 emit_byte(0x00);
1923 }
1924 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1925
1926 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1927 {
1928 int fi;
1929 int isebp;
1930
1931 switch(factor) {
1932 case 1: fi=0; break;
1933 case 2: fi=1; break;
1934 case 4: fi=2; break;
1935 case 8: fi=3; break;
1936 default: abort();
1937 }
1938 isebp=(baser==5)?0x40:0;
1939
1940 emit_byte(0x66);
1941 emit_byte(0x8b);
1942 emit_byte(0x04+8*d+isebp);
1943 emit_byte(baser+8*index+0x40*fi);
1944 if (isebp)
1945 emit_byte(0x00);
1946 }
1947 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1948
1949 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1950 {
1951 int fi;
1952 int isebp;
1953
1954 switch(factor) {
1955 case 1: fi=0; break;
1956 case 2: fi=1; break;
1957 case 4: fi=2; break;
1958 case 8: fi=3; break;
1959 default: abort();
1960 }
1961 isebp=(baser==5)?0x40:0;
1962
1963 emit_byte(0x8a);
1964 emit_byte(0x04+8*d+isebp);
1965 emit_byte(baser+8*index+0x40*fi);
1966 if (isebp)
1967 emit_byte(0x00);
1968 }
1969 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1970
1971 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1972 {
1973 int fi;
1974 int isebp;
1975
1976 switch(factor) {
1977 case 1: fi=0; break;
1978 case 2: fi=1; break;
1979 case 4: fi=2; break;
1980 case 8: fi=3; break;
1981 default: abort();
1982 }
1983
1984
1985 isebp=(baser==5)?0x40:0;
1986
1987 emit_byte(0x89);
1988 emit_byte(0x04+8*s+isebp);
1989 emit_byte(baser+8*index+0x40*fi);
1990 if (isebp)
1991 emit_byte(0x00);
1992 }
1993 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1994
1995 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1996 {
1997 int fi;
1998 int isebp;
1999
2000 switch(factor) {
2001 case 1: fi=0; break;
2002 case 2: fi=1; break;
2003 case 4: fi=2; break;
2004 case 8: fi=3; break;
2005 default: abort();
2006 }
2007 isebp=(baser==5)?0x40:0;
2008
2009 emit_byte(0x66);
2010 emit_byte(0x89);
2011 emit_byte(0x04+8*s+isebp);
2012 emit_byte(baser+8*index+0x40*fi);
2013 if (isebp)
2014 emit_byte(0x00);
2015 }
2016 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2017
2018 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2019 {
2020 int fi;
2021 int isebp;
2022
2023 switch(factor) {
2024 case 1: fi=0; break;
2025 case 2: fi=1; break;
2026 case 4: fi=2; break;
2027 case 8: fi=3; break;
2028 default: abort();
2029 }
2030 isebp=(baser==5)?0x40:0;
2031
2032 emit_byte(0x88);
2033 emit_byte(0x04+8*s+isebp);
2034 emit_byte(baser+8*index+0x40*fi);
2035 if (isebp)
2036 emit_byte(0x00);
2037 }
2038 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2039
2040 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2041 {
2042 int fi;
2043
2044 switch(factor) {
2045 case 1: fi=0; break;
2046 case 2: fi=1; break;
2047 case 4: fi=2; break;
2048 case 8: fi=3; break;
2049 default: abort();
2050 }
2051
2052 emit_byte(0x89);
2053 emit_byte(0x84+8*s);
2054 emit_byte(baser+8*index+0x40*fi);
2055 emit_long(base);
2056 }
2057 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2058
2059 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2060 {
2061 int fi;
2062
2063 switch(factor) {
2064 case 1: fi=0; break;
2065 case 2: fi=1; break;
2066 case 4: fi=2; break;
2067 case 8: fi=3; break;
2068 default: abort();
2069 }
2070
2071 emit_byte(0x66);
2072 emit_byte(0x89);
2073 emit_byte(0x84+8*s);
2074 emit_byte(baser+8*index+0x40*fi);
2075 emit_long(base);
2076 }
2077 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2078
2079 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2080 {
2081 int fi;
2082
2083 switch(factor) {
2084 case 1: fi=0; break;
2085 case 2: fi=1; break;
2086 case 4: fi=2; break;
2087 case 8: fi=3; break;
2088 default: abort();
2089 }
2090
2091 emit_byte(0x88);
2092 emit_byte(0x84+8*s);
2093 emit_byte(baser+8*index+0x40*fi);
2094 emit_long(base);
2095 }
2096 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2097
2098 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2099 {
2100 int fi;
2101
2102 switch(factor) {
2103 case 1: fi=0; break;
2104 case 2: fi=1; break;
2105 case 4: fi=2; break;
2106 case 8: fi=3; break;
2107 default: abort();
2108 }
2109
2110 emit_byte(0x8b);
2111 emit_byte(0x84+8*d);
2112 emit_byte(baser+8*index+0x40*fi);
2113 emit_long(base);
2114 }
2115 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2116
2117 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2118 {
2119 int fi;
2120
2121 switch(factor) {
2122 case 1: fi=0; break;
2123 case 2: fi=1; break;
2124 case 4: fi=2; break;
2125 case 8: fi=3; break;
2126 default: abort();
2127 }
2128
2129 emit_byte(0x66);
2130 emit_byte(0x8b);
2131 emit_byte(0x84+8*d);
2132 emit_byte(baser+8*index+0x40*fi);
2133 emit_long(base);
2134 }
2135 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2136
2137 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2138 {
2139 int fi;
2140
2141 switch(factor) {
2142 case 1: fi=0; break;
2143 case 2: fi=1; break;
2144 case 4: fi=2; break;
2145 case 8: fi=3; break;
2146 default: abort();
2147 }
2148
2149 emit_byte(0x8a);
2150 emit_byte(0x84+8*d);
2151 emit_byte(baser+8*index+0x40*fi);
2152 emit_long(base);
2153 }
2154 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2155
2156 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2157 {
2158 int fi;
2159 switch(factor) {
2160 case 1: fi=0; break;
2161 case 2: fi=1; break;
2162 case 4: fi=2; break;
2163 case 8: fi=3; break;
2164 default:
2165 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2166 abort();
2167 }
2168 emit_byte(0x8b);
2169 emit_byte(0x04+8*d);
2170 emit_byte(0x05+8*index+64*fi);
2171 emit_long(base);
2172 }
2173 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2174
2175 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2176 {
2177 int fi;
2178 switch(factor) {
2179 case 1: fi=0; break;
2180 case 2: fi=1; break;
2181 case 4: fi=2; break;
2182 case 8: fi=3; break;
2183 default:
2184 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2185 abort();
2186 }
2187 if (have_cmov) {
2188 emit_byte(0x0f);
2189 emit_byte(0x40+cond);
2190 emit_byte(0x04+8*d);
2191 emit_byte(0x05+8*index+64*fi);
2192 emit_long(base);
2193 }
2194 else { /* replacement using branch and mov */
2195 int uncc=(cond^1);
2196 emit_byte(0x70+uncc);
2197 emit_byte(7); /* skip next 7 bytes if not cc=true */
2198 emit_byte(0x8b);
2199 emit_byte(0x04+8*d);
2200 emit_byte(0x05+8*index+64*fi);
2201 emit_long(base);
2202 }
2203 }
2204 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2205
2206 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2207 {
2208 if (have_cmov) {
2209 emit_byte(0x0f);
2210 emit_byte(0x40+cond);
2211 emit_byte(0x05+8*d);
2212 emit_long(mem);
2213 }
2214 else { /* replacement using branch and mov */
2215 int uncc=(cond^1);
2216 emit_byte(0x70+uncc);
2217 emit_byte(6); /* skip next 6 bytes if not cc=true */
2218 emit_byte(0x8b);
2219 emit_byte(0x05+8*d);
2220 emit_long(mem);
2221 }
2222 }
2223 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2224
2225 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2226 {
2227 Dif(!isbyte(offset)) abort();
2228 emit_byte(0x8b);
2229 emit_byte(0x40+8*d+s);
2230 emit_byte(offset);
2231 }
2232 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2233
2234 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2235 {
2236 Dif(!isbyte(offset)) abort();
2237 emit_byte(0x66);
2238 emit_byte(0x8b);
2239 emit_byte(0x40+8*d+s);
2240 emit_byte(offset);
2241 }
2242 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2243
2244 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2245 {
2246 Dif(!isbyte(offset)) abort();
2247 emit_byte(0x8a);
2248 emit_byte(0x40+8*d+s);
2249 emit_byte(offset);
2250 }
2251 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2252
2253 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2254 {
2255 emit_byte(0x8b);
2256 emit_byte(0x80+8*d+s);
2257 emit_long(offset);
2258 }
2259 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2260
2261 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2262 {
2263 emit_byte(0x66);
2264 emit_byte(0x8b);
2265 emit_byte(0x80+8*d+s);
2266 emit_long(offset);
2267 }
2268 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2269
2270 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2271 {
2272 emit_byte(0x8a);
2273 emit_byte(0x80+8*d+s);
2274 emit_long(offset);
2275 }
2276 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2277
2278 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2279 {
2280 Dif(!isbyte(offset)) abort();
2281 emit_byte(0xc7);
2282 emit_byte(0x40+d);
2283 emit_byte(offset);
2284 emit_long(i);
2285 }
2286 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2287
2288 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2289 {
2290 Dif(!isbyte(offset)) abort();
2291 emit_byte(0x66);
2292 emit_byte(0xc7);
2293 emit_byte(0x40+d);
2294 emit_byte(offset);
2295 emit_word(i);
2296 }
2297 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2298
2299 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2300 {
2301 Dif(!isbyte(offset)) abort();
2302 emit_byte(0xc6);
2303 emit_byte(0x40+d);
2304 emit_byte(offset);
2305 emit_byte(i);
2306 }
2307 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2308
2309 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2310 {
2311 Dif(!isbyte(offset)) abort();
2312 emit_byte(0x89);
2313 emit_byte(0x40+8*s+d);
2314 emit_byte(offset);
2315 }
2316 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2317
2318 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2319 {
2320 Dif(!isbyte(offset)) abort();
2321 emit_byte(0x66);
2322 emit_byte(0x89);
2323 emit_byte(0x40+8*s+d);
2324 emit_byte(offset);
2325 }
2326 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2327
2328 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2329 {
2330 Dif(!isbyte(offset)) abort();
2331 emit_byte(0x88);
2332 emit_byte(0x40+8*s+d);
2333 emit_byte(offset);
2334 }
2335 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2336
2337 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2338 {
2339 if (optimize_imm8 && isbyte(offset)) {
2340 emit_byte(0x8d);
2341 emit_byte(0x40+8*d+s);
2342 emit_byte(offset);
2343 }
2344 else {
2345 emit_byte(0x8d);
2346 emit_byte(0x80+8*d+s);
2347 emit_long(offset);
2348 }
2349 }
2350 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2351
2352 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2353 {
2354 int fi;
2355
2356 switch(factor) {
2357 case 1: fi=0; break;
2358 case 2: fi=1; break;
2359 case 4: fi=2; break;
2360 case 8: fi=3; break;
2361 default: abort();
2362 }
2363
2364 if (optimize_imm8 && isbyte(offset)) {
2365 emit_byte(0x8d);
2366 emit_byte(0x44+8*d);
2367 emit_byte(0x40*fi+8*index+s);
2368 emit_byte(offset);
2369 }
2370 else {
2371 emit_byte(0x8d);
2372 emit_byte(0x84+8*d);
2373 emit_byte(0x40*fi+8*index+s);
2374 emit_long(offset);
2375 }
2376 }
2377 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2378
2379 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2380 {
2381 int isebp=(s==5)?0x40:0;
2382 int fi;
2383
2384 switch(factor) {
2385 case 1: fi=0; break;
2386 case 2: fi=1; break;
2387 case 4: fi=2; break;
2388 case 8: fi=3; break;
2389 default: abort();
2390 }
2391
2392 emit_byte(0x8d);
2393 emit_byte(0x04+8*d+isebp);
2394 emit_byte(0x40*fi+8*index+s);
2395 if (isebp)
2396 emit_byte(0);
2397 }
2398 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2399
2400 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2401 {
2402 if (optimize_imm8 && isbyte(offset)) {
2403 emit_byte(0x89);
2404 emit_byte(0x40+8*s+d);
2405 emit_byte(offset);
2406 }
2407 else {
2408 emit_byte(0x89);
2409 emit_byte(0x80+8*s+d);
2410 emit_long(offset);
2411 }
2412 }
2413 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2414
2415 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2416 {
2417 emit_byte(0x66);
2418 emit_byte(0x89);
2419 emit_byte(0x80+8*s+d);
2420 emit_long(offset);
2421 }
2422 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2423
2424 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2425 {
2426 if (optimize_imm8 && isbyte(offset)) {
2427 emit_byte(0x88);
2428 emit_byte(0x40+8*s+d);
2429 emit_byte(offset);
2430 }
2431 else {
2432 emit_byte(0x88);
2433 emit_byte(0x80+8*s+d);
2434 emit_long(offset);
2435 }
2436 }
2437 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2438
2439 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2440 {
2441 emit_byte(0x0f);
2442 emit_byte(0xc8+r);
2443 }
2444 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2445
2446 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2447 {
2448 emit_byte(0x66);
2449 emit_byte(0xc1);
2450 emit_byte(0xc0+r);
2451 emit_byte(0x08);
2452 }
2453 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2454
2455 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2456 {
2457 emit_byte(0x89);
2458 emit_byte(0xc0+8*s+d);
2459 }
2460 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2461
2462 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2463 {
2464 emit_byte(0x89);
2465 emit_byte(0x05+8*s);
2466 emit_long(d);
2467 }
2468 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2469
2470 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2471 {
2472 emit_byte(0x66);
2473 emit_byte(0x89);
2474 emit_byte(0x05+8*s);
2475 emit_long(d);
2476 }
2477 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2478
2479 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2480 {
2481 emit_byte(0x66);
2482 emit_byte(0x8b);
2483 emit_byte(0x05+8*d);
2484 emit_long(s);
2485 }
2486 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2487
2488 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2489 {
2490 emit_byte(0x88);
2491 emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
2492 emit_long(d);
2493 }
2494 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2495
2496 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2497 {
2498 emit_byte(0x8a);
2499 emit_byte(0x05+8*d);
2500 emit_long(s);
2501 }
2502 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2503
2504 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2505 {
2506 emit_byte(0xb8+d);
2507 emit_long(s);
2508 }
2509 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2510
2511 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2512 {
2513 emit_byte(0x66);
2514 emit_byte(0xb8+d);
2515 emit_word(s);
2516 }
2517 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2518
2519 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2520 {
2521 emit_byte(0xb0+d);
2522 emit_byte(s);
2523 }
2524 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2525
2526 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2527 {
2528 emit_byte(0x81);
2529 emit_byte(0x15);
2530 emit_long(d);
2531 emit_long(s);
2532 }
2533 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2534
2535 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2536 {
2537 if (optimize_imm8 && isbyte(s)) {
2538 emit_byte(0x83);
2539 emit_byte(0x05);
2540 emit_long(d);
2541 emit_byte(s);
2542 }
2543 else {
2544 emit_byte(0x81);
2545 emit_byte(0x05);
2546 emit_long(d);
2547 emit_long(s);
2548 }
2549 }
2550 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2551
2552 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2553 {
2554 emit_byte(0x66);
2555 emit_byte(0x81);
2556 emit_byte(0x05);
2557 emit_long(d);
2558 emit_word(s);
2559 }
2560 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2561
2562 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2563 {
2564 emit_byte(0x80);
2565 emit_byte(0x05);
2566 emit_long(d);
2567 emit_byte(s);
2568 }
2569 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2570
2571 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2572 {
2573 if (optimize_accum && isaccum(d))
2574 emit_byte(0xa9);
2575 else {
2576 emit_byte(0xf7);
2577 emit_byte(0xc0+d);
2578 }
2579 emit_long(i);
2580 }
2581 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2582
2583 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2584 {
2585 emit_byte(0x85);
2586 emit_byte(0xc0+8*s+d);
2587 }
2588 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2589
2590 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2591 {
2592 emit_byte(0x66);
2593 emit_byte(0x85);
2594 emit_byte(0xc0+8*s+d);
2595 }
2596 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2597
2598 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2599 {
2600 emit_byte(0x84);
2601 emit_byte(0xc0+8*s+d);
2602 }
2603 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2604
2605 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2606 {
2607 emit_byte(0x81);
2608 emit_byte(0xf0+d);
2609 emit_long(i);
2610 }
2611 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2612
2613 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2614 {
2615 if (optimize_imm8 && isbyte(i)) {
2616 emit_byte(0x83);
2617 emit_byte(0xe0+d);
2618 emit_byte(i);
2619 }
2620 else {
2621 if (optimize_accum && isaccum(d))
2622 emit_byte(0x25);
2623 else {
2624 emit_byte(0x81);
2625 emit_byte(0xe0+d);
2626 }
2627 emit_long(i);
2628 }
2629 }
2630 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2631
2632 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2633 {
2634 emit_byte(0x66);
2635 if (optimize_imm8 && isbyte(i)) {
2636 emit_byte(0x83);
2637 emit_byte(0xe0+d);
2638 emit_byte(i);
2639 }
2640 else {
2641 if (optimize_accum && isaccum(d))
2642 emit_byte(0x25);
2643 else {
2644 emit_byte(0x81);
2645 emit_byte(0xe0+d);
2646 }
2647 emit_word(i);
2648 }
2649 }
2650 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2651
2652 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2653 {
2654 emit_byte(0x21);
2655 emit_byte(0xc0+8*s+d);
2656 }
2657 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2658
2659 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2660 {
2661 emit_byte(0x66);
2662 emit_byte(0x21);
2663 emit_byte(0xc0+8*s+d);
2664 }
2665 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2666
2667 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2668 {
2669 emit_byte(0x20);
2670 emit_byte(0xc0+8*s+d);
2671 }
2672 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2673
2674 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2675 {
2676 if (optimize_imm8 && isbyte(i)) {
2677 emit_byte(0x83);
2678 emit_byte(0xc8+d);
2679 emit_byte(i);
2680 }
2681 else {
2682 if (optimize_accum && isaccum(d))
2683 emit_byte(0x0d);
2684 else {
2685 emit_byte(0x81);
2686 emit_byte(0xc8+d);
2687 }
2688 emit_long(i);
2689 }
2690 }
2691 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2692
2693 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2694 {
2695 emit_byte(0x09);
2696 emit_byte(0xc0+8*s+d);
2697 }
2698 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2699
2700 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2701 {
2702 emit_byte(0x66);
2703 emit_byte(0x09);
2704 emit_byte(0xc0+8*s+d);
2705 }
2706 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2707
2708 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2709 {
2710 emit_byte(0x08);
2711 emit_byte(0xc0+8*s+d);
2712 }
2713 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2714
2715 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2716 {
2717 emit_byte(0x11);
2718 emit_byte(0xc0+8*s+d);
2719 }
2720 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2721
2722 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2723 {
2724 emit_byte(0x66);
2725 emit_byte(0x11);
2726 emit_byte(0xc0+8*s+d);
2727 }
2728 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2729
2730 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2731 {
2732 emit_byte(0x10);
2733 emit_byte(0xc0+8*s+d);
2734 }
2735 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2736
2737 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2738 {
2739 emit_byte(0x01);
2740 emit_byte(0xc0+8*s+d);
2741 }
2742 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2743
2744 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2745 {
2746 emit_byte(0x66);
2747 emit_byte(0x01);
2748 emit_byte(0xc0+8*s+d);
2749 }
2750 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2751
2752 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2753 {
2754 emit_byte(0x00);
2755 emit_byte(0xc0+8*s+d);
2756 }
2757 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2758
2759 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2760 {
2761 if (isbyte(i)) {
2762 emit_byte(0x83);
2763 emit_byte(0xe8+d);
2764 emit_byte(i);
2765 }
2766 else {
2767 if (optimize_accum && isaccum(d))
2768 emit_byte(0x2d);
2769 else {
2770 emit_byte(0x81);
2771 emit_byte(0xe8+d);
2772 }
2773 emit_long(i);
2774 }
2775 }
2776 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2777
2778 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2779 {
2780 if (optimize_accum && isaccum(d))
2781 emit_byte(0x2c);
2782 else {
2783 emit_byte(0x80);
2784 emit_byte(0xe8+d);
2785 }
2786 emit_byte(i);
2787 }
2788 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2789
2790 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2791 {
2792 if (isbyte(i)) {
2793 emit_byte(0x83);
2794 emit_byte(0xc0+d);
2795 emit_byte(i);
2796 }
2797 else {
2798 if (optimize_accum && isaccum(d))
2799 emit_byte(0x05);
2800 else {
2801 emit_byte(0x81);
2802 emit_byte(0xc0+d);
2803 }
2804 emit_long(i);
2805 }
2806 }
2807 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2808
2809 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2810 {
2811 emit_byte(0x66);
2812 if (isbyte(i)) {
2813 emit_byte(0x83);
2814 emit_byte(0xc0+d);
2815 emit_byte(i);
2816 }
2817 else {
2818 if (optimize_accum && isaccum(d))
2819 emit_byte(0x05);
2820 else {
2821 emit_byte(0x81);
2822 emit_byte(0xc0+d);
2823 }
2824 emit_word(i);
2825 }
2826 }
2827 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2828
2829 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2830 {
2831 if (optimize_accum && isaccum(d))
2832 emit_byte(0x04);
2833 else {
2834 emit_byte(0x80);
2835 emit_byte(0xc0+d);
2836 }
2837 emit_byte(i);
2838 }
2839 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2840
2841 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2842 {
2843 emit_byte(0x19);
2844 emit_byte(0xc0+8*s+d);
2845 }
2846 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2847
2848 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2849 {
2850 emit_byte(0x66);
2851 emit_byte(0x19);
2852 emit_byte(0xc0+8*s+d);
2853 }
2854 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2855
2856 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2857 {
2858 emit_byte(0x18);
2859 emit_byte(0xc0+8*s+d);
2860 }
2861 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2862
2863 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2864 {
2865 emit_byte(0x29);
2866 emit_byte(0xc0+8*s+d);
2867 }
2868 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2869
2870 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2871 {
2872 emit_byte(0x66);
2873 emit_byte(0x29);
2874 emit_byte(0xc0+8*s+d);
2875 }
2876 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2877
2878 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2879 {
2880 emit_byte(0x28);
2881 emit_byte(0xc0+8*s+d);
2882 }
2883 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2884
2885 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2886 {
2887 emit_byte(0x39);
2888 emit_byte(0xc0+8*s+d);
2889 }
2890 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2891
2892 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2893 {
2894 if (optimize_imm8 && isbyte(i)) {
2895 emit_byte(0x83);
2896 emit_byte(0xf8+r);
2897 emit_byte(i);
2898 }
2899 else {
2900 if (optimize_accum && isaccum(r))
2901 emit_byte(0x3d);
2902 else {
2903 emit_byte(0x81);
2904 emit_byte(0xf8+r);
2905 }
2906 emit_long(i);
2907 }
2908 }
2909 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2910
2911 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2912 {
2913 emit_byte(0x66);
2914 emit_byte(0x39);
2915 emit_byte(0xc0+8*s+d);
2916 }
2917 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2918
2919 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2920 {
2921 emit_byte(0x80);
2922 emit_byte(0x3d);
2923 emit_long(d);
2924 emit_byte(s);
2925 }
2926 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2927
2928 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2929 {
2930 if (optimize_accum && isaccum(d))
2931 emit_byte(0x3c);
2932 else {
2933 emit_byte(0x80);
2934 emit_byte(0xf8+d);
2935 }
2936 emit_byte(i);
2937 }
2938 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2939
2940 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2941 {
2942 emit_byte(0x38);
2943 emit_byte(0xc0+8*s+d);
2944 }
2945 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2946
2947 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2948 {
2949 int fi;
2950
2951 switch(factor) {
2952 case 1: fi=0; break;
2953 case 2: fi=1; break;
2954 case 4: fi=2; break;
2955 case 8: fi=3; break;
2956 default: abort();
2957 }
2958 emit_byte(0x39);
2959 emit_byte(0x04+8*d);
2960 emit_byte(5+8*index+0x40*fi);
2961 emit_long(offset);
2962 }
2963 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2964
2965 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2966 {
2967 emit_byte(0x31);
2968 emit_byte(0xc0+8*s+d);
2969 }
2970 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2971
2972 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2973 {
2974 emit_byte(0x66);
2975 emit_byte(0x31);
2976 emit_byte(0xc0+8*s+d);
2977 }
2978 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2979
2980 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2981 {
2982 emit_byte(0x30);
2983 emit_byte(0xc0+8*s+d);
2984 }
2985 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2986
2987 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2988 {
2989 if (optimize_imm8 && isbyte(s)) {
2990 emit_byte(0x83);
2991 emit_byte(0x2d);
2992 emit_long(d);
2993 emit_byte(s);
2994 }
2995 else {
2996 emit_byte(0x81);
2997 emit_byte(0x2d);
2998 emit_long(d);
2999 emit_long(s);
3000 }
3001 }
3002 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
3003
3004 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
3005 {
3006 if (optimize_imm8 && isbyte(s)) {
3007 emit_byte(0x83);
3008 emit_byte(0x3d);
3009 emit_long(d);
3010 emit_byte(s);
3011 }
3012 else {
3013 emit_byte(0x81);
3014 emit_byte(0x3d);
3015 emit_long(d);
3016 emit_long(s);
3017 }
3018 }
3019 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
3020
3021 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
3022 {
3023 emit_byte(0x87);
3024 emit_byte(0xc0+8*r1+r2);
3025 }
3026 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
3027
3028 LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
3029 {
3030 emit_byte(0x86);
3031 emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */
3032 }
3033 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
3034
3035 /*************************************************************************
3036 * FIXME: mem access modes probably wrong *
3037 *************************************************************************/
3038
3039 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
3040 {
3041 emit_byte(0x9c);
3042 }
3043 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
3044
3045 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
3046 {
3047 emit_byte(0x9d);
3048 }
3049 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
3050
3051 /* Generate floating-point instructions */
3052 static inline void x86_fadd_m(MEMR s)
3053 {
3054 emit_byte(0xdc);
3055 emit_byte(0x05);
3056 emit_long(s);
3057 }
3058
3059 #endif
3060
3061 /*************************************************************************
3062 * Unoptimizable stuff --- jump *
3063 *************************************************************************/
3064
3065 static __inline__ void raw_call_r(R4 r)
3066 {
3067 #if USE_NEW_RTASM
3068 CALLsr(r);
3069 #else
3070 emit_byte(0xff);
3071 emit_byte(0xd0+r);
3072 #endif
3073 }
3074
3075 static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3076 {
3077 #if USE_NEW_RTASM
3078 CALLsm(base, X86_NOREG, r, m);
3079 #else
3080 int mu;
3081 switch(m) {
3082 case 1: mu=0; break;
3083 case 2: mu=1; break;
3084 case 4: mu=2; break;
3085 case 8: mu=3; break;
3086 default: abort();
3087 }
3088 emit_byte(0xff);
3089 emit_byte(0x14);
3090 emit_byte(0x05+8*r+0x40*mu);
3091 emit_long(base);
3092 #endif
3093 }
3094
3095 static __inline__ void raw_jmp_r(R4 r)
3096 {
3097 #if USE_NEW_RTASM
3098 JMPsr(r);
3099 #else
3100 emit_byte(0xff);
3101 emit_byte(0xe0+r);
3102 #endif
3103 }
3104
3105 static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3106 {
3107 #if USE_NEW_RTASM
3108 JMPsm(base, X86_NOREG, r, m);
3109 #else
3110 int mu;
3111 switch(m) {
3112 case 1: mu=0; break;
3113 case 2: mu=1; break;
3114 case 4: mu=2; break;
3115 case 8: mu=3; break;
3116 default: abort();
3117 }
3118 emit_byte(0xff);
3119 emit_byte(0x24);
3120 emit_byte(0x05+8*r+0x40*mu);
3121 emit_long(base);
3122 #endif
3123 }
3124
3125 static __inline__ void raw_jmp_m(uae_u32 base)
3126 {
3127 emit_byte(0xff);
3128 emit_byte(0x25);
3129 emit_long(base);
3130 }
3131
3132
3133 static __inline__ void raw_call(uae_u32 t)
3134 {
3135 #if USE_NEW_RTASM
3136 CALLm(t);
3137 #else
3138 emit_byte(0xe8);
3139 emit_long(t-(uae_u32)target-4);
3140 #endif
3141 }
3142
3143 static __inline__ void raw_jmp(uae_u32 t)
3144 {
3145 #if USE_NEW_RTASM
3146 JMPm(t);
3147 #else
3148 emit_byte(0xe9);
3149 emit_long(t-(uae_u32)target-4);
3150 #endif
3151 }
3152
3153 static __inline__ void raw_jl(uae_u32 t)
3154 {
3155 emit_byte(0x0f);
3156 emit_byte(0x8c);
3157 emit_long(t-(uintptr)target-4);
3158 }
3159
3160 static __inline__ void raw_jz(uae_u32 t)
3161 {
3162 emit_byte(0x0f);
3163 emit_byte(0x84);
3164 emit_long(t-(uintptr)target-4);
3165 }
3166
3167 static __inline__ void raw_jnz(uae_u32 t)
3168 {
3169 emit_byte(0x0f);
3170 emit_byte(0x85);
3171 emit_long(t-(uintptr)target-4);
3172 }
3173
3174 static __inline__ void raw_jnz_l_oponly(void)
3175 {
3176 emit_byte(0x0f);
3177 emit_byte(0x85);
3178 }
3179
3180 static __inline__ void raw_jcc_l_oponly(int cc)
3181 {
3182 emit_byte(0x0f);
3183 emit_byte(0x80+cc);
3184 }
3185
3186 static __inline__ void raw_jnz_b_oponly(void)
3187 {
3188 emit_byte(0x75);
3189 }
3190
3191 static __inline__ void raw_jz_b_oponly(void)
3192 {
3193 emit_byte(0x74);
3194 }
3195
3196 static __inline__ void raw_jcc_b_oponly(int cc)
3197 {
3198 emit_byte(0x70+cc);
3199 }
3200
3201 static __inline__ void raw_jmp_l_oponly(void)
3202 {
3203 emit_byte(0xe9);
3204 }
3205
3206 static __inline__ void raw_jmp_b_oponly(void)
3207 {
3208 emit_byte(0xeb);
3209 }
3210
3211 static __inline__ void raw_ret(void)
3212 {
3213 emit_byte(0xc3);
3214 }
3215
3216 static __inline__ void raw_nop(void)
3217 {
3218 emit_byte(0x90);
3219 }
3220
3221 static __inline__ void raw_emit_nop_filler(int nbytes)
3222 {
3223 /* Source: GNU Binutils 2.12.90.0.15 */
3224 /* Various efficient no-op patterns for aligning code labels.
3225 Note: Don't try to assemble the instructions in the comments.
3226 0L and 0w are not legal. */
3227 static const uae_u8 f32_1[] =
3228 {0x90}; /* nop */
3229 static const uae_u8 f32_2[] =
3230 {0x89,0xf6}; /* movl %esi,%esi */
3231 static const uae_u8 f32_3[] =
3232 {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3233 static const uae_u8 f32_4[] =
3234 {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3235 static const uae_u8 f32_5[] =
3236 {0x90, /* nop */
3237 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3238 static const uae_u8 f32_6[] =
3239 {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3240 static const uae_u8 f32_7[] =
3241 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3242 static const uae_u8 f32_8[] =
3243 {0x90, /* nop */
3244 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3245 static const uae_u8 f32_9[] =
3246 {0x89,0xf6, /* movl %esi,%esi */
3247 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3248 static const uae_u8 f32_10[] =
3249 {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3250 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3251 static const uae_u8 f32_11[] =
3252 {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3253 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3254 static const uae_u8 f32_12[] =
3255 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3256 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3257 static const uae_u8 f32_13[] =
3258 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3259 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3260 static const uae_u8 f32_14[] =
3261 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3262 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3263 static const uae_u8 f32_15[] =
3264 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3265 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3266 static const uae_u8 f32_16[] =
3267 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3268 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3269 static const uae_u8 *const f32_patt[] = {
3270 f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3271 f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3272 };
3273 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3274
3275 #if defined(__x86_64__)
3276 /* The recommended way to pad 64bit code is to use NOPs preceded by
3277 maximally four 0x66 prefixes. Balance the size of nops. */
3278 if (nbytes == 0)
3279 return;
3280
3281 int i;
3282 int nnops = (nbytes + 3) / 4;
3283 int len = nbytes / nnops;
3284 int remains = nbytes - nnops * len;
3285
3286 for (i = 0; i < remains; i++) {
3287 emit_block(prefixes, len);
3288 raw_nop();
3289 }
3290 for (; i < nnops; i++) {
3291 emit_block(prefixes, len - 1);
3292 raw_nop();
3293 }
3294 #else
3295 int nloops = nbytes / 16;
3296 while (nloops-- > 0)
3297 emit_block(f32_16, sizeof(f32_16));
3298
3299 nbytes %= 16;
3300 if (nbytes)
3301 emit_block(f32_patt[nbytes - 1], nbytes);
3302 #endif
3303 }
3304
3305
3306 /*************************************************************************
3307 * Flag handling, to and fro UAE flag register *
3308 *************************************************************************/
3309
3310 static __inline__ void raw_flags_evicted(int r)
3311 {
3312 //live.state[FLAGTMP].status=CLEAN;
3313 live.state[FLAGTMP].status=INMEM;
3314 live.state[FLAGTMP].realreg=-1;
3315 /* We just "evicted" FLAGTMP. */
3316 if (live.nat[r].nholds!=1) {
3317 /* Huh? */
3318 abort();
3319 }
3320 live.nat[r].nholds=0;
3321 }
3322
3323 #define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */
3324 static __inline__ void raw_flags_to_reg_FLAGREG(int r)
3325 {
3326 raw_lahf(0); /* Most flags in AH */
3327 //raw_setcc(r,0); /* V flag in AL */
3328 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3329
3330 #if 1 /* Let's avoid those nasty partial register stalls */
3331 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3332 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
3333 raw_flags_evicted(r);
3334 #endif
3335 }
3336
3337 #define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */
3338 static __inline__ void raw_reg_to_flags_FLAGREG(int r)
3339 {
3340 raw_cmp_b_ri(r,-127); /* set V */
3341 raw_sahf(0);
3342 }
3343
3344 #define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */
3345 static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp)
3346 {
3347 raw_mov_l_rr(tmp,s);
3348 raw_lahf(s); /* flags into ah */
3349 raw_and_l_ri(s,0xffffbfff);
3350 raw_and_l_ri(tmp,0x00004000);
3351 raw_xor_l_ri(tmp,0x00004000);
3352 raw_or_l(s,tmp);
3353 raw_sahf(s);
3354 }
3355
3356 static __inline__ void raw_flags_init_FLAGREG(void) { }
3357
3358 #define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */
3359 static __inline__ void raw_flags_to_reg_FLAGSTK(int r)
3360 {
3361 raw_pushfl();
3362 raw_pop_l_r(r);
3363 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3364 raw_flags_evicted(r);
3365 }
3366
3367 #define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */
3368 static __inline__ void raw_reg_to_flags_FLAGSTK(int r)
3369 {
3370 raw_push_l_r(r);
3371 raw_popfl();
3372 }
3373
3374 #define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */
3375 static __inline__ void raw_flags_set_zero_FLAGSTK(int s, int tmp)
3376 {
3377 raw_mov_l_rr(tmp,s);
3378 raw_pushfl();
3379 raw_pop_l_r(s);
3380 raw_and_l_ri(s,0xffffffbf);
3381 raw_and_l_ri(tmp,0x00000040);
3382 raw_xor_l_ri(tmp,0x00000040);
3383 raw_or_l(s,tmp);
3384 raw_push_l_r(s);
3385 raw_popfl();
3386 }
3387
3388 static __inline__ void raw_flags_init_FLAGSTK(void) { }
3389
3390 #if defined(__x86_64__)
3391 /* Try to use the LAHF/SETO method on x86_64 since it is faster.
3392 This can't be the default because some older CPUs don't support
3393 LAHF/SAHF in long mode. */
3394 static int FLAG_NREG1_FLAGGEN = 0;
3395 static __inline__ void raw_flags_to_reg_FLAGGEN(int r)
3396 {
3397 if (have_lahf_lm) {
3398 // NOTE: the interpreter uses the normal EFLAGS layout
3399 // pushf/popf CF(0) ZF( 6) SF( 7) OF(11)
3400 // sahf/lahf CF(8) ZF(14) SF(15) OF( 0)
3401 assert(r == 0);
3402 raw_setcc(r,0); /* V flag in AL */
3403 raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */
3404 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0);
3405 raw_lahf(0); /* most flags in AH */
3406 raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX);
3407 raw_flags_evicted(r);
3408 }
3409 else
3410 raw_flags_to_reg_FLAGSTK(r);
3411 }
3412
3413 static int FLAG_NREG2_FLAGGEN = 0;
3414 static __inline__ void raw_reg_to_flags_FLAGGEN(int r)
3415 {
3416 if (have_lahf_lm) {
3417 raw_xchg_b_rr(0,AH_INDEX);
3418 raw_cmp_b_ri(r,-120); /* set V */
3419 raw_sahf(0);
3420 }
3421 else
3422 raw_reg_to_flags_FLAGSTK(r);
3423 }
3424
3425 static int FLAG_NREG3_FLAGGEN = 0;
3426 static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp)
3427 {
3428 if (have_lahf_lm)
3429 raw_flags_set_zero_FLAGREG(s, tmp);
3430 else
3431 raw_flags_set_zero_FLAGSTK(s, tmp);
3432 }
3433
3434 static __inline__ void raw_flags_init_FLAGGEN(void)
3435 {
3436 if (have_lahf_lm) {
3437 FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG;
3438 FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG;
3439 FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG;
3440 }
3441 else {
3442 FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK;
3443 FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK;
3444 FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK;
3445 }
3446 }
3447 #endif
3448
3449 #ifdef SAHF_SETO_PROFITABLE
3450 #define FLAG_SUFFIX FLAGREG
3451 #elif defined __x86_64__
3452 #define FLAG_SUFFIX FLAGGEN
3453 #else
3454 #define FLAG_SUFFIX FLAGSTK
3455 #endif
3456
3457 #define FLAG_GLUE_2(x, y) x ## _ ## y
3458 #define FLAG_GLUE_1(x, y) FLAG_GLUE_2(x, y)
3459 #define FLAG_GLUE(x) FLAG_GLUE_1(x, FLAG_SUFFIX)
3460
3461 #define raw_flags_init FLAG_GLUE(raw_flags_init)
3462 #define FLAG_NREG1 FLAG_GLUE(FLAG_NREG1)
3463 #define raw_flags_to_reg FLAG_GLUE(raw_flags_to_reg)
3464 #define FLAG_NREG2 FLAG_GLUE(FLAG_NREG2)
3465 #define raw_reg_to_flags FLAG_GLUE(raw_reg_to_flags)
3466 #define FLAG_NREG3 FLAG_GLUE(FLAG_NREG3)
3467 #define raw_flags_set_zero FLAG_GLUE(raw_flags_set_zero)
3468
3469 /* Apparently, there are enough instructions between flag store and
3470 flag reload to avoid the partial memory stall */
3471 static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3472 {
3473 #if 1
3474 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3475 #else
3476 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3477 raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3478 #endif
3479 }
3480
3481 /* FLAGX is byte sized, and we *do* write it at that size */
3482 static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3483 {
3484 if (live.nat[target].canbyte)
3485 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3486 else if (live.nat[target].canword)
3487 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3488 else
3489 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3490 }
3491
3492 static __inline__ void raw_dec_sp(int off)
3493 {
3494 if (off) raw_sub_l_ri(ESP_INDEX,off);
3495 }
3496
3497 static __inline__ void raw_inc_sp(int off)
3498 {
3499 if (off) raw_add_l_ri(ESP_INDEX,off);
3500 }
3501
3502 /*************************************************************************
3503 * Handling mistaken direct memory access *
3504 *************************************************************************/
3505
3506 // gb-- I don't need that part for JIT Basilisk II
3507 #if defined(NATMEM_OFFSET) && 0
3508 #include <asm/sigcontext.h>
3509 #include <signal.h>
3510
3511 #define SIG_READ 1
3512 #define SIG_WRITE 2
3513
3514 static int in_handler=0;
3515 static uae_u8 veccode[256];
3516
3517 static void vec(int x, struct sigcontext sc)
3518 {
3519 uae_u8* i=(uae_u8*)sc.eip;
3520 uae_u32 addr=sc.cr2;
3521 int r=-1;
3522 int size=4;
3523 int dir=-1;
3524 int len=0;
3525 int j;
3526
3527 write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3528 if (!canbang)
3529 write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3530 if (in_handler)
3531 write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3532
3533 if (canbang && i>=compiled_code && i<=current_compile_p) {
3534 if (*i==0x66) {
3535 i++;
3536 size=2;
3537 len++;
3538 }
3539
3540 switch(i[0]) {
3541 case 0x8a:
3542 if ((i[1]&0xc0)==0x80) {
3543 r=(i[1]>>3)&7;
3544 dir=SIG_READ;
3545 size=1;
3546 len+=6;
3547 break;
3548 }
3549 break;
3550 case 0x88:
3551 if ((i[1]&0xc0)==0x80) {
3552 r=(i[1]>>3)&7;
3553 dir=SIG_WRITE;
3554 size=1;
3555 len+=6;
3556 break;
3557 }
3558 break;
3559 case 0x8b:
3560 if ((i[1]&0xc0)==0x80) {
3561 r=(i[1]>>3)&7;
3562 dir=SIG_READ;
3563 len+=6;
3564 break;
3565 }
3566 if ((i[1]&0xc0)==0x40) {
3567 r=(i[1]>>3)&7;
3568 dir=SIG_READ;
3569 len+=3;
3570 break;
3571 }
3572 break;
3573 case 0x89:
3574 if ((i[1]&0xc0)==0x80) {
3575 r=(i[1]>>3)&7;
3576 dir=SIG_WRITE;
3577 len+=6;
3578 break;
3579 }
3580 if ((i[1]&0xc0)==0x40) {
3581 r=(i[1]>>3)&7;
3582 dir=SIG_WRITE;
3583 len+=3;
3584 break;
3585 }
3586 break;
3587 }
3588 }
3589
3590 if (r!=-1) {
3591 void* pr=NULL;
3592 write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3593
3594 switch(r) {
3595 case 0: pr=&(sc.eax); break;
3596 case 1: pr=&(sc.ecx); break;
3597 case 2: pr=&(sc.edx); break;
3598 case 3: pr=&(sc.ebx); break;
3599 case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3600 case 5: pr=(size>1)?
3601 (void*)(&(sc.ebp)):
3602 (void*)(((uae_u8*)&(sc.ecx))+1); break;
3603 case 6: pr=(size>1)?
3604 (void*)(&(sc.esi)):
3605 (void*)(((uae_u8*)&(sc.edx))+1); break;
3606 case 7: pr=(size>1)?
3607 (void*)(&(sc.edi)):
3608 (void*)(((uae_u8*)&(sc.ebx))+1); break;
3609 default: abort();
3610 }
3611 if (pr) {
3612 blockinfo* bi;
3613
3614 if (currprefs.comp_oldsegv) {
3615 addr-=NATMEM_OFFSET;
3616
3617 if ((addr>=0x10000000 && addr<0x40000000) ||
3618 (addr>=0x50000000)) {
3619 write_log("Suspicious address in %x SEGV handler.\n",addr);
3620 }
3621 if (dir==SIG_READ) {
3622 switch(size) {
3623 case 1: *((uae_u8*)pr)=get_byte(addr); break;
3624 case 2: *((uae_u16*)pr)=get_word(addr); break;
3625 case 4: *((uae_u32*)pr)=get_long(addr); break;
3626 default: abort();
3627 }
3628 }
3629 else { /* write */
3630 switch(size) {
3631 case 1: put_byte(addr,*((uae_u8*)pr)); break;
3632 case 2: put_word(addr,*((uae_u16*)pr)); break;
3633 case 4: put_long(addr,*((uae_u32*)pr)); break;
3634 default: abort();
3635 }
3636 }
3637 write_log("Handled one access!\n");
3638 fflush(stdout);
3639 segvcount++;
3640 sc.eip+=len;
3641 }
3642 else {
3643 void* tmp=target;
3644 int i;
3645 uae_u8 vecbuf[5];
3646
3647 addr-=NATMEM_OFFSET;
3648
3649 if ((addr>=0x10000000 && addr<0x40000000) ||
3650 (addr>=0x50000000)) {
3651 write_log("Suspicious address in %x SEGV handler.\n",addr);
3652 }
3653
3654 target=(uae_u8*)sc.eip;
3655 for (i=0;i<5;i++)
3656 vecbuf[i]=target[i];
3657 emit_byte(0xe9);
3658 emit_long((uintptr)veccode-(uintptr)target-4);
3659 write_log("Create jump to %p\n",veccode);
3660
3661 write_log("Handled one access!\n");
3662 fflush(stdout);
3663 segvcount++;
3664
3665 target=veccode;
3666
3667 if (dir==SIG_READ) {
3668 switch(size) {
3669 case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3670 case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3671 case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3672 default: abort();
3673 }
3674 }
3675 else { /* write */
3676 switch(size) {
3677 case 1: put_byte(addr,*((uae_u8*)pr)); break;
3678 case 2: put_word(addr,*((uae_u16*)pr)); break;
3679 case 4: put_long(addr,*((uae_u32*)pr)); break;
3680 default: abort();
3681 }
3682 }
3683 for (i=0;i<5;i++)
3684 raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3685 raw_mov_l_mi((uintptr)&in_handler,0);
3686 emit_byte(0xe9);
3687 emit_long(sc.eip+len-(uintptr)target-4);
3688 in_handler=1;
3689 target=tmp;
3690 }
3691 bi=active;
3692 while (bi) {
3693 if (bi->handler &&
3694 (uae_u8*)bi->direct_handler<=i &&
3695 (uae_u8*)bi->nexthandler>i) {
3696 write_log("deleted trigger (%p<%p<%p) %p\n",
3697 bi->handler,
3698 i,
3699 bi->nexthandler,
3700 bi->pc_p);
3701 invalidate_block(bi);
3702 raise_in_cl_list(bi);
3703 set_special(0);
3704 return;
3705 }
3706 bi=bi->next;
3707 }
3708 /* Not found in the active list. Might be a rom routine that
3709 is in the dormant list */
3710 bi=dormant;
3711 while (bi) {
3712 if (bi->handler &&
3713 (uae_u8*)bi->direct_handler<=i &&
3714 (uae_u8*)bi->nexthandler>i) {
3715 write_log("deleted trigger (%p<%p<%p) %p\n",
3716 bi->handler,
3717 i,
3718 bi->nexthandler,
3719 bi->pc_p);
3720 invalidate_block(bi);
3721 raise_in_cl_list(bi);
3722 set_special(0);
3723 return;
3724 }
3725 bi=bi->next;
3726 }
3727 write_log("Huh? Could not find trigger!\n");
3728 return;
3729 }
3730 }
3731 write_log("Can't handle access!\n");
3732 for (j=0;j<10;j++) {
3733 write_log("instruction byte %2d is %02x\n",j,i[j]);
3734 }
3735 write_log("Please send the above info (starting at \"fault address\") to\n"
3736 "bmeyer@csse.monash.edu.au\n"
3737 "This shouldn't happen ;-)\n");
3738 fflush(stdout);
3739 signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3740 }
3741 #endif
3742
3743
3744 /*************************************************************************
3745 * Checking for CPU features *
3746 *************************************************************************/
3747
3748 struct cpuinfo_x86 {
3749 uae_u8 x86; // CPU family
3750 uae_u8 x86_vendor; // CPU vendor
3751 uae_u8 x86_processor; // CPU canonical processor type
3752 uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3753 uae_u32 x86_hwcap;
3754 uae_u8 x86_model;
3755 uae_u8 x86_mask;
3756 int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3757 char x86_vendor_id[16];
3758 };
3759 struct cpuinfo_x86 cpuinfo;
3760
3761 enum {
3762 X86_VENDOR_INTEL = 0,
3763 X86_VENDOR_CYRIX = 1,
3764 X86_VENDOR_AMD = 2,
3765 X86_VENDOR_UMC = 3,
3766 X86_VENDOR_NEXGEN = 4,
3767 X86_VENDOR_CENTAUR = 5,
3768 X86_VENDOR_RISE = 6,
3769 X86_VENDOR_TRANSMETA = 7,
3770 X86_VENDOR_NSC = 8,
3771 X86_VENDOR_UNKNOWN = 0xff
3772 };
3773
3774 enum {
3775 X86_PROCESSOR_I386, /* 80386 */
3776 X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3777 X86_PROCESSOR_PENTIUM,
3778 X86_PROCESSOR_PENTIUMPRO,
3779 X86_PROCESSOR_K6,
3780 X86_PROCESSOR_ATHLON,
3781 X86_PROCESSOR_PENTIUM4,
3782 X86_PROCESSOR_X86_64,
3783 X86_PROCESSOR_max
3784 };
3785
3786 static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3787 "80386",
3788 "80486",
3789 "Pentium",
3790 "PentiumPro",
3791 "K6",
3792 "Athlon",
3793 "Pentium4",
3794 "x86-64"
3795 };
3796
3797 static struct ptt {
3798 const int align_loop;
3799 const int align_loop_max_skip;
3800 const int align_jump;
3801 const int align_jump_max_skip;
3802 const int align_func;
3803 }
3804 x86_alignments[X86_PROCESSOR_max] = {
3805 { 4, 3, 4, 3, 4 },
3806 { 16, 15, 16, 15, 16 },
3807 { 16, 7, 16, 7, 16 },
3808 { 16, 15, 16, 7, 16 },
3809 { 32, 7, 32, 7, 32 },
3810 { 16, 7, 16, 7, 16 },
3811 { 0, 0, 0, 0, 0 },
3812 { 16, 7, 16, 7, 16 }
3813 };
3814
3815 static void
3816 x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3817 {
3818 char *v = c->x86_vendor_id;
3819
3820 if (!strcmp(v, "GenuineIntel"))
3821 c->x86_vendor = X86_VENDOR_INTEL;
3822 else if (!strcmp(v, "AuthenticAMD"))
3823 c->x86_vendor = X86_VENDOR_AMD;
3824 else if (!strcmp(v, "CyrixInstead"))
3825 c->x86_vendor = X86_VENDOR_CYRIX;
3826 else if (!strcmp(v, "Geode by NSC"))
3827 c->x86_vendor = X86_VENDOR_NSC;
3828 else if (!strcmp(v, "UMC UMC UMC "))
3829 c->x86_vendor = X86_VENDOR_UMC;
3830 else if (!strcmp(v, "CentaurHauls"))
3831 c->x86_vendor = X86_VENDOR_CENTAUR;
3832 else if (!strcmp(v, "NexGenDriven"))
3833 c->x86_vendor = X86_VENDOR_NEXGEN;
3834 else if (!strcmp(v, "RiseRiseRise"))
3835 c->x86_vendor = X86_VENDOR_RISE;
3836 else if (!strcmp(v, "GenuineTMx86") ||
3837 !strcmp(v, "TransmetaCPU"))
3838 c->x86_vendor = X86_VENDOR_TRANSMETA;
3839 else
3840 c->x86_vendor = X86_VENDOR_UNKNOWN;
3841 }
3842
3843 static void
3844 cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3845 {
3846 const int CPUID_SPACE = 4096;
3847 uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3848 if (cpuid_space == VM_MAP_FAILED)
3849 abort();
3850 vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3851
3852 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3853 uae_u8* tmp=get_target();
3854
3855 s_op = op;
3856 set_target(cpuid_space);
3857 raw_push_l_r(0); /* eax */
3858 raw_push_l_r(1); /* ecx */
3859 raw_push_l_r(2); /* edx */
3860 raw_push_l_r(3); /* ebx */
3861 raw_mov_l_rm(0,(uintptr)&s_op);
3862 raw_cpuid(0);
3863 raw_mov_l_mr((uintptr)&s_eax,0);
3864 raw_mov_l_mr((uintptr)&s_ebx,3);
3865 raw_mov_l_mr((uintptr)&s_ecx,1);
3866 raw_mov_l_mr((uintptr)&s_edx,2);
3867 raw_pop_l_r(3);
3868 raw_pop_l_r(2);
3869 raw_pop_l_r(1);
3870 raw_pop_l_r(0);
3871 raw_ret();
3872 set_target(tmp);
3873
3874 ((cpuop_func*)cpuid_space)(0);
3875 if (eax != NULL) *eax = s_eax;
3876 if (ebx != NULL) *ebx = s_ebx;
3877 if (ecx != NULL) *ecx = s_ecx;
3878 if (edx != NULL) *edx = s_edx;
3879
3880 vm_release(cpuid_space, CPUID_SPACE);
3881 }
3882
3883 static void
3884 raw_init_cpu(void)
3885 {
3886 struct cpuinfo_x86 *c = &cpuinfo;
3887
3888 /* Defaults */
3889 c->x86_processor = X86_PROCESSOR_max;
3890 c->x86_vendor = X86_VENDOR_UNKNOWN;
3891 c->cpuid_level = -1; /* CPUID not detected */
3892 c->x86_model = c->x86_mask = 0; /* So far unknown... */
3893 c->x86_vendor_id[0] = '\0'; /* Unset */
3894 c->x86_hwcap = 0;
3895
3896 /* Get vendor name */
3897 c->x86_vendor_id[12] = '\0';
3898 cpuid(0x00000000,
3899 (uae_u32 *)&c->cpuid_level,
3900 (uae_u32 *)&c->x86_vendor_id[0],
3901 (uae_u32 *)&c->x86_vendor_id[8],
3902 (uae_u32 *)&c->x86_vendor_id[4]);
3903 x86_get_cpu_vendor(c);
3904
3905 /* Intel-defined flags: level 0x00000001 */
3906 c->x86_brand_id = 0;
3907 if ( c->cpuid_level >= 0x00000001 ) {
3908 uae_u32 tfms, brand_id;
3909 cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3910 c->x86 = (tfms >> 8) & 15;
3911 if (c->x86 == 0xf)
3912 c->x86 += (tfms >> 20) & 0xff; /* extended family */
3913 c->x86_model = (tfms >> 4) & 15;
3914 if (c->x86_model == 0xf)
3915 c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3916 c->x86_brand_id = brand_id & 0xff;
3917 c->x86_mask = tfms & 15;
3918 } else {
3919 /* Have CPUID level 0 only - unheard of */
3920 c->x86 = 4;
3921 }
3922
3923 /* AMD-defined flags: level 0x80000001 */
3924 uae_u32 xlvl;
3925 cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3926 if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3927 if ( xlvl >= 0x80000001 ) {
3928 uae_u32 features, extra_features;
3929 cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3930 if (features & (1 << 29)) {
3931 /* Assume x86-64 if long mode is supported */
3932 c->x86_processor = X86_PROCESSOR_X86_64;
3933 }
3934 if (extra_features & (1 << 0))
3935 have_lahf_lm = true;
3936 }
3937 }
3938
3939 /* Canonicalize processor ID */
3940 switch (c->x86) {
3941 case 3:
3942 c->x86_processor = X86_PROCESSOR_I386;
3943 break;
3944 case 4:
3945 c->x86_processor = X86_PROCESSOR_I486;
3946 break;
3947 case 5:
3948 if (c->x86_vendor == X86_VENDOR_AMD)
3949 c->x86_processor = X86_PROCESSOR_K6;
3950 else
3951 c->x86_processor = X86_PROCESSOR_PENTIUM;
3952 break;
3953 case 6:
3954 if (c->x86_vendor == X86_VENDOR_AMD)
3955 c->x86_processor = X86_PROCESSOR_ATHLON;
3956 else
3957 c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3958 break;
3959 case 15:
3960 if (c->x86_processor == X86_PROCESSOR_max) {
3961 switch (c->x86_vendor) {
3962 case X86_VENDOR_INTEL:
3963 c->x86_processor = X86_PROCESSOR_PENTIUM4;
3964 break;
3965 case X86_VENDOR_AMD:
3966 /* Assume a 32-bit Athlon processor if not in long mode */
3967 c->x86_processor = X86_PROCESSOR_ATHLON;
3968 break;
3969 }
3970 }
3971 break;
3972 }
3973 if (c->x86_processor == X86_PROCESSOR_max) {
3974 c->x86_processor = X86_PROCESSOR_I386;
3975 fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3976 fprintf(stderr, " Family : %d\n", c->x86);
3977 fprintf(stderr, " Model : %d\n", c->x86_model);
3978 fprintf(stderr, " Mask : %d\n", c->x86_mask);
3979 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3980 if (c->x86_brand_id)
3981 fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3982 }
3983
3984 /* Have CMOV support? */
3985 have_cmov = c->x86_hwcap & (1 << 15);
3986 #if defined(__x86_64__)
3987 if (!have_cmov) {
3988 write_log("x86-64 implementations are bound to have CMOV!\n");
3989 abort();
3990 }
3991 #endif
3992
3993 /* Can the host CPU suffer from partial register stalls? */
3994 have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3995 #if 1
3996 /* It appears that partial register writes are a bad idea even on
3997 AMD K7 cores, even though they are not supposed to have the
3998 dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3999 if (c->x86_processor == X86_PROCESSOR_ATHLON)
4000 have_rat_stall = true;
4001 #endif
4002
4003 /* Alignments */
4004 if (tune_alignment) {
4005 align_loops = x86_alignments[c->x86_processor].align_loop;
4006 align_jumps = x86_alignments[c->x86_processor].align_jump;
4007 }
4008
4009 write_log("Max CPUID level=%d Processor is %s [%s]\n",
4010 c->cpuid_level, c->x86_vendor_id,
4011 x86_processor_string_table[c->x86_processor]);
4012
4013 raw_flags_init();
4014 }
4015
4016 static bool target_check_bsf(void)
4017 {
4018 bool mismatch = false;
4019 for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
4020 for (int g_CF = 0; g_CF <= 1; g_CF++) {
4021 for (int g_OF = 0; g_OF <= 1; g_OF++) {
4022 for (int g_SF = 0; g_SF <= 1; g_SF++) {
4023 for (int value = -1; value <= 1; value++) {
4024 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
4025 unsigned long tmp = value;
4026 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
4027 : "+r" (flags), "+r" (tmp) : : "cc");
4028 int OF = (flags >> 11) & 1;
4029 int SF = (flags >> 7) & 1;
4030 int ZF = (flags >> 6) & 1;
4031 int CF = flags & 1;
4032 tmp = (value == 0);
4033 if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
4034 mismatch = true;
4035 }
4036 }}}}
4037 if (mismatch)
4038 write_log("Target CPU defines all flags on BSF instruction\n");
4039 return !mismatch;
4040 }
4041
4042
4043 /*************************************************************************
4044 * FPU stuff *
4045 *************************************************************************/
4046
4047
4048 static __inline__ void raw_fp_init(void)
4049 {
4050 int i;
4051
4052 for (i=0;i<N_FREGS;i++)
4053 live.spos[i]=-2;
4054 live.tos=-1; /* Stack is empty */
4055 }
4056
4057 static __inline__ void raw_fp_cleanup_drop(void)
4058 {
4059 #if 0
4060 /* using FINIT instead of popping all the entries.
4061 Seems to have side effects --- there is display corruption in
4062 Quake when this is used */
4063 if (live.tos>1) {
4064 emit_byte(0x9b);
4065 emit_byte(0xdb);
4066 emit_byte(0xe3);
4067 live.tos=-1;
4068 }
4069 #endif
4070 while (live.tos>=1) {
4071 emit_byte(0xde);
4072 emit_byte(0xd9);
4073 live.tos-=2;
4074 }
4075 while (live.tos>=0) {
4076 emit_byte(0xdd);
4077 emit_byte(0xd8);
4078 live.tos--;
4079 }
4080 raw_fp_init();
4081 }
4082
4083 static __inline__ void make_tos(int r)
4084 {
4085 int p,q;
4086
4087 if (live.spos[r]<0) { /* Register not yet on stack */
4088 emit_byte(0xd9);
4089 emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
4090 live.tos++;
4091 live.spos[r]=live.tos;
4092 live.onstack[live.tos]=r;
4093 return;
4094 }
4095 /* Register is on stack */
4096 if (live.tos==live.spos[r])
4097 return;
4098 p=live.spos[r];
4099 q=live.onstack[live.tos];
4100
4101 emit_byte(0xd9);
4102 emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
4103 live.onstack[live.tos]=r;
4104 live.spos[r]=live.tos;
4105 live.onstack[p]=q;
4106 live.spos[q]=p;
4107 }
4108
4109 static __inline__ void make_tos2(int r, int r2)
4110 {
4111 int q;
4112
4113 make_tos(r2); /* Put the reg that's supposed to end up in position2
4114 on top */
4115
4116 if (live.spos[r]<0) { /* Register not yet on stack */
4117 make_tos(r); /* This will extend the stack */
4118 return;
4119 }
4120 /* Register is on stack */
4121 emit_byte(0xd9);
4122 emit_byte(0xc9); /* Move r2 into position 2 */
4123
4124 q=live.onstack[live.tos-1];
4125 live.onstack[live.tos]=q;
4126 live.spos[q]=live.tos;
4127 live.onstack[live.tos-1]=r2;
4128 live.spos[r2]=live.tos-1;
4129
4130 make_tos(r); /* And r into 1 */
4131 }
4132
4133 static __inline__ int stackpos(int r)
4134 {
4135 if (live.spos[r]<0)
4136 abort();
4137 if (live.tos<live.spos[r]) {
4138 printf("Looking for spos for fnreg %d\n",r);
4139 abort();
4140 }
4141 return live.tos-live.spos[r];
4142 }
4143
4144 static __inline__ void usereg(int r)
4145 {
4146 if (live.spos[r]<0)
4147 make_tos(r);
4148 }
4149
4150 /* This is called with one FP value in a reg *above* tos, which it will
4151 pop off the stack if necessary */
4152 static __inline__ void tos_make(int r)
4153 {
4154 if (live.spos[r]<0) {
4155 live.tos++;
4156 live.spos[r]=live.tos;
4157 live.onstack[live.tos]=r;
4158 return;
4159 }
4160 emit_byte(0xdd);
4161 emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
4162 and pop it*/
4163 }
4164
4165 /* FP helper functions */
4166 #if USE_NEW_RTASM
4167 #define DEFINE_OP(NAME, GEN) \
4168 static inline void raw_##NAME(uint32 m) \
4169 { \
4170 GEN(m, X86_NOREG, X86_NOREG, 1); \
4171 }
4172 DEFINE_OP(fstl, FSTDm);
4173 DEFINE_OP(fstpl, FSTPDm);
4174 DEFINE_OP(fldl, FLDDm);
4175 DEFINE_OP(fildl, FILDLm);
4176 DEFINE_OP(fistl, FISTLm);
4177 DEFINE_OP(flds, FLDSm);
4178 DEFINE_OP(fsts, FSTSm);
4179 DEFINE_OP(fstpt, FSTPTm);
4180 DEFINE_OP(fldt, FLDTm);
4181 #else
4182 #define DEFINE_OP(NAME, OP1, OP2) \
4183 static inline void raw_##NAME(uint32 m) \
4184 { \
4185 emit_byte(OP1); \
4186 emit_byte(OP2); \
4187 emit_long(m); \
4188 }
4189 DEFINE_OP(fstl, 0xdd, 0x15);
4190 DEFINE_OP(fstpl, 0xdd, 0x1d);
4191 DEFINE_OP(fldl, 0xdd, 0x05);
4192 DEFINE_OP(fildl, 0xdb, 0x05);
4193 DEFINE_OP(fistl, 0xdb, 0x15);
4194 DEFINE_OP(flds, 0xd9, 0x05);
4195 DEFINE_OP(fsts, 0xd9, 0x15);
4196 DEFINE_OP(fstpt, 0xdb, 0x3d);
4197 DEFINE_OP(fldt, 0xdb, 0x2d);
4198 #endif
4199 #undef DEFINE_OP
4200
4201 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4202 {
4203 make_tos(r);
4204 raw_fstl(m);
4205 }
4206 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4207
4208 LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4209 {
4210 make_tos(r);
4211 raw_fstpl(m);
4212 live.onstack[live.tos]=-1;
4213 live.tos--;
4214 live.spos[r]=-2;
4215 }
4216 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4217
4218 LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4219 {
4220 raw_fldl(m);
4221 tos_make(r);
4222 }
4223 LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4224
4225 LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4226 {
4227 raw_fildl(m);
4228 tos_make(r);
4229 }
4230 LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4231
4232 LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4233 {
4234 make_tos(r);
4235 raw_fistl(m);
4236 }
4237 LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4238
4239 LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4240 {
4241 raw_flds(m);
4242 tos_make(r);
4243 }
4244 LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4245
4246 LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4247 {
4248 make_tos(r);
4249 raw_fsts(m);
4250 }
4251 LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4252
4253 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4254 {
4255 int rs;
4256
4257 /* Stupid x87 can't write a long double to mem without popping the
4258 stack! */
4259 usereg(r);
4260 rs=stackpos(r);
4261 emit_byte(0xd9); /* Get a copy to the top of stack */
4262 emit_byte(0xc0+rs);
4263
4264 raw_fstpt(m); /* store and pop it */
4265 }
4266 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4267
4268 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4269 {
4270 int rs;
4271
4272 make_tos(r);
4273 raw_fstpt(m); /* store and pop it */
4274 live.onstack[live.tos]=-1;
4275 live.tos--;
4276 live.spos[r]=-2;
4277 }
4278 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4279
4280 LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4281 {
4282 raw_fldt(m);
4283 tos_make(r);
4284 }
4285 LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4286
4287 LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4288 {
4289 emit_byte(0xd9);
4290 emit_byte(0xeb);
4291 tos_make(r);
4292 }
4293 LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4294
4295 LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4296 {
4297 emit_byte(0xd9);
4298 emit_byte(0xec);
4299 tos_make(r);
4300 }
4301 LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4302
4303 LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4304 {
4305 emit_byte(0xd9);
4306 emit_byte(0xea);
4307 tos_make(r);
4308 }
4309 LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4310
4311 LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4312 {
4313 emit_byte(0xd9);
4314 emit_byte(0xed);
4315 tos_make(r);
4316 }
4317 LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4318
4319 LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4320 {
4321 emit_byte(0xd9);
4322 emit_byte(0xe8);
4323 tos_make(r);
4324 }
4325 LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4326
4327 LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4328 {
4329 emit_byte(0xd9);
4330 emit_byte(0xee);
4331 tos_make(r);
4332 }
4333 LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4334
4335 LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4336 {
4337 int ds;
4338
4339 usereg(s);
4340 ds=stackpos(s);
4341 if (ds==0 && live.spos[d]>=0) {
4342 /* source is on top of stack, and we already have the dest */
4343 int dd=stackpos(d);
4344 emit_byte(0xdd);
4345 emit_byte(0xd0+dd);
4346 }
4347 else {
4348 emit_byte(0xd9);
4349 emit_byte(0xc0+ds); /* duplicate source on tos */
4350 tos_make(d); /* store to destination, pop if necessary */
4351 }
4352 }
4353 LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4354
4355 LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4356 {
4357 emit_byte(0xd9);
4358 emit_byte(0xa8+index);
4359 emit_long(base);
4360 }
4361 LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4362
4363
4364 LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4365 {
4366 int ds;
4367
4368 if (d!=s) {
4369 usereg(s);
4370 ds=stackpos(s);
4371 emit_byte(0xd9);
4372 emit_byte(0xc0+ds); /* duplicate source */
4373 emit_byte(0xd9);
4374 emit_byte(0xfa); /* take square root */
4375 tos_make(d); /* store to destination */
4376 }
4377 else {
4378 make_tos(d);
4379 emit_byte(0xd9);
4380 emit_byte(0xfa); /* take square root */
4381 }
4382 }
4383 LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4384
4385 LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4386 {
4387 int ds;
4388
4389 if (d!=s) {
4390 usereg(s);
4391 ds=stackpos(s);
4392 emit_byte(0xd9);
4393 emit_byte(0xc0+ds); /* duplicate source */
4394 emit_byte(0xd9);
4395 emit_byte(0xe1); /* take fabs */
4396 tos_make(d); /* store to destination */
4397 }
4398 else {
4399 make_tos(d);
4400 emit_byte(0xd9);
4401 emit_byte(0xe1); /* take fabs */
4402 }
4403 }
4404 LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4405
4406 LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4407 {
4408 int ds;
4409
4410 if (d!=s) {
4411 usereg(s);
4412 ds=stackpos(s);
4413 emit_byte(0xd9);
4414 emit_byte(0xc0+ds); /* duplicate source */
4415 emit_byte(0xd9);
4416 emit_byte(0xfc); /* take frndint */
4417 tos_make(d); /* store to destination */
4418 }
4419 else {
4420 make_tos(d);
4421 emit_byte(0xd9);
4422 emit_byte(0xfc); /* take frndint */
4423 }
4424 }
4425 LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4426
4427 LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4428 {
4429 int ds;
4430
4431 if (d!=s) {
4432 usereg(s);
4433 ds=stackpos(s);
4434 emit_byte(0xd9);
4435 emit_byte(0xc0+ds); /* duplicate source */
4436 emit_byte(0xd9);
4437 emit_byte(0xff); /* take cos */
4438 tos_make(d); /* store to destination */
4439 }
4440 else {
4441 make_tos(d);
4442 emit_byte(0xd9);
4443 emit_byte(0xff); /* take cos */
4444 }
4445 }
4446 LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4447
4448 LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4449 {
4450 int ds;
4451
4452 if (d!=s) {
4453 usereg(s);
4454 ds=stackpos(s);
4455 emit_byte(0xd9);
4456 emit_byte(0xc0+ds); /* duplicate source */
4457 emit_byte(0xd9);
4458 emit_byte(0xfe); /* take sin */
4459 tos_make(d); /* store to destination */
4460 }
4461 else {
4462 make_tos(d);
4463 emit_byte(0xd9);
4464 emit_byte(0xfe); /* take sin */
4465 }
4466 }
4467 LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4468
4469 static const double one=1;
4470 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4471 {
4472 int ds;
4473
4474 usereg(s);
4475 ds=stackpos(s);
4476 emit_byte(0xd9);
4477 emit_byte(0xc0+ds); /* duplicate source */
4478
4479 emit_byte(0xd9);
4480 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4481 emit_byte(0xd9);
4482 emit_byte(0xfc); /* rndint */
4483 emit_byte(0xd9);
4484 emit_byte(0xc9); /* swap top two elements */
4485 emit_byte(0xd8);
4486 emit_byte(0xe1); /* subtract rounded from original */
4487 emit_byte(0xd9);
4488 emit_byte(0xf0); /* f2xm1 */
4489 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4490 emit_byte(0xd9);
4491 emit_byte(0xfd); /* and scale it */
4492 emit_byte(0xdd);
4493 emit_byte(0xd9); /* take he rounded value off */
4494 tos_make(d); /* store to destination */
4495 }
4496 LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4497
4498 LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4499 {
4500 int ds;
4501
4502 usereg(s);
4503 ds=stackpos(s);
4504 emit_byte(0xd9);
4505 emit_byte(0xc0+ds); /* duplicate source */
4506 emit_byte(0xd9);
4507 emit_byte(0xea); /* fldl2e */
4508 emit_byte(0xde);
4509 emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4510
4511 emit_byte(0xd9);
4512 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4513 emit_byte(0xd9);
4514 emit_byte(0xfc); /* rndint */
4515 emit_byte(0xd9);
4516 emit_byte(0xc9); /* swap top two elements */
4517 emit_byte(0xd8);
4518 emit_byte(0xe1); /* subtract rounded from original */
4519 emit_byte(0xd9);
4520 emit_byte(0xf0); /* f2xm1 */
4521 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4522 emit_byte(0xd9);
4523 emit_byte(0xfd); /* and scale it */
4524 emit_byte(0xdd);
4525 emit_byte(0xd9); /* take he rounded value off */
4526 tos_make(d); /* store to destination */
4527 }
4528 LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4529
4530 LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4531 {
4532 int ds;
4533
4534 usereg(s);
4535 ds=stackpos(s);
4536 emit_byte(0xd9);
4537 emit_byte(0xc0+ds); /* duplicate source */
4538 emit_byte(0xd9);
4539 emit_byte(0xe8); /* push '1' */
4540 emit_byte(0xd9);
4541 emit_byte(0xc9); /* swap top two */
4542 emit_byte(0xd9);
4543 emit_byte(0xf1); /* take 1*log2(x) */
4544 tos_make(d); /* store to destination */
4545 }
4546 LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4547
4548
4549 LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4550 {
4551 int ds;
4552
4553 if (d!=s) {
4554 usereg(s);
4555 ds=stackpos(s);
4556 emit_byte(0xd9);
4557 emit_byte(0xc0+ds); /* duplicate source */
4558 emit_byte(0xd9);
4559 emit_byte(0xe0); /* take fchs */
4560 tos_make(d); /* store to destination */
4561 }
4562 else {
4563 make_tos(d);
4564 emit_byte(0xd9);
4565 emit_byte(0xe0); /* take fchs */
4566 }
4567 }
4568 LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4569
4570 LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4571 {
4572 int ds;
4573
4574 usereg(s);
4575 usereg(d);
4576
4577 if (live.spos[s]==live.tos) {
4578 /* Source is on top of stack */
4579 ds=stackpos(d);
4580 emit_byte(0xdc);
4581 emit_byte(0xc0+ds); /* add source to dest*/
4582 }
4583 else {
4584 make_tos(d);
4585 ds=stackpos(s);
4586
4587 emit_byte(0xd8);
4588 emit_byte(0xc0+ds); /* add source to dest*/
4589 }
4590 }
4591 LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4592
4593 LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4594 {
4595 int ds;
4596
4597 usereg(s);
4598 usereg(d);
4599
4600 if (live.spos[s]==live.tos) {
4601 /* Source is on top of stack */
4602 ds=stackpos(d);
4603 emit_byte(0xdc);
4604 emit_byte(0xe8+ds); /* sub source from dest*/
4605 }
4606 else {
4607 make_tos(d);
4608 ds=stackpos(s);
4609
4610 emit_byte(0xd8);
4611 emit_byte(0xe0+ds); /* sub src from dest */
4612 }
4613 }
4614 LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4615
4616 LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4617 {
4618 int ds;
4619
4620 usereg(s);
4621 usereg(d);
4622
4623 make_tos(d);
4624 ds=stackpos(s);
4625
4626 emit_byte(0xdd);
4627 emit_byte(0xe0+ds); /* cmp dest with source*/
4628 }
4629 LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4630
4631 LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4632 {
4633 int ds;
4634
4635 usereg(s);
4636 usereg(d);
4637
4638 if (live.spos[s]==live.tos) {
4639 /* Source is on top of stack */
4640 ds=stackpos(d);
4641 emit_byte(0xdc);
4642 emit_byte(0xc8+ds); /* mul dest by source*/
4643 }
4644 else {
4645 make_tos(d);
4646 ds=stackpos(s);
4647
4648 emit_byte(0xd8);
4649 emit_byte(0xc8+ds); /* mul dest by source*/
4650 }
4651 }
4652 LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4653
4654 LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4655 {
4656 int ds;
4657
4658 usereg(s);
4659 usereg(d);
4660
4661 if (live.spos[s]==live.tos) {
4662 /* Source is on top of stack */
4663 ds=stackpos(d);
4664 emit_byte(0xdc);
4665 emit_byte(0xf8+ds); /* div dest by source */
4666 }
4667 else {
4668 make_tos(d);
4669 ds=stackpos(s);
4670
4671 emit_byte(0xd8);
4672 emit_byte(0xf0+ds); /* div dest by source*/
4673 }
4674 }
4675 LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4676
4677 LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4678 {
4679 int ds;
4680
4681 usereg(s);
4682 usereg(d);
4683
4684 make_tos2(d,s);
4685 ds=stackpos(s);
4686
4687 if (ds!=1) {
4688 printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4689 abort();
4690 }
4691 emit_byte(0xd9);
4692 emit_byte(0xf8); /* take rem from dest by source */
4693 }
4694 LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4695
4696 LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4697 {
4698 int ds;
4699
4700 usereg(s);
4701 usereg(d);
4702
4703 make_tos2(d,s);
4704 ds=stackpos(s);
4705
4706 if (ds!=1) {
4707 printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4708 abort();
4709 }
4710 emit_byte(0xd9);
4711 emit_byte(0xf5); /* take rem1 from dest by source */
4712 }
4713 LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4714
4715
4716 LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4717 {
4718 make_tos(r);
4719 emit_byte(0xd9); /* ftst */
4720 emit_byte(0xe4);
4721 }
4722 LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4723
4724 /* %eax register is clobbered if target processor doesn't support fucomi */
4725 #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4726 #define FFLAG_NREG EAX_INDEX
4727
4728 static __inline__ void raw_fflags_into_flags(int r)
4729 {
4730 int p;
4731
4732 usereg(r);
4733 p=stackpos(r);
4734
4735 emit_byte(0xd9);
4736 emit_byte(0xee); /* Push 0 */
4737 emit_byte(0xd9);
4738 emit_byte(0xc9+p); /* swap top two around */
4739 if (have_cmov) {
4740 // gb-- fucomi is for P6 cores only, not K6-2 then...
4741 emit_byte(0xdb);
4742 emit_byte(0xe9+p); /* fucomi them */
4743 }
4744 else {
4745 emit_byte(0xdd);
4746 emit_byte(0xe1+p); /* fucom them */
4747 emit_byte(0x9b);
4748 emit_byte(0xdf);
4749 emit_byte(0xe0); /* fstsw ax */
4750 raw_sahf(0); /* sahf */
4751 }
4752 emit_byte(0xdd);
4753 emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4754 }