ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.34
Committed: 2006-02-26T18:49:55Z (18 years, 4 months ago) by gbeauche
Branch: MAIN
CVS Tags: nigel-build-19
Changes since 1.33: +17 -7 lines
Log Message:
fix FETOX & FTWOTOX translations for x86_64

File Contents

# Content
1 /*
2 * compiler/codegen_x86.cpp - IA-32 code generator
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2005 Christian Bauer
10 *
11 * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 /* This should eventually end up in machdep/, but for now, x86 is the
29 only target, and it's easier this way... */
30
31 #include "flags_x86.h"
32
33 /*************************************************************************
34 * Some basic information about the the target CPU *
35 *************************************************************************/
36
37 #define EAX_INDEX 0
38 #define ECX_INDEX 1
39 #define EDX_INDEX 2
40 #define EBX_INDEX 3
41 #define ESP_INDEX 4
42 #define EBP_INDEX 5
43 #define ESI_INDEX 6
44 #define EDI_INDEX 7
45 #if defined(__x86_64__)
46 #define R8_INDEX 8
47 #define R9_INDEX 9
48 #define R10_INDEX 10
49 #define R11_INDEX 11
50 #define R12_INDEX 12
51 #define R13_INDEX 13
52 #define R14_INDEX 14
53 #define R15_INDEX 15
54 #endif
55 /* XXX this has to match X86_Reg8H_Base + 4 */
56 #define AH_INDEX (0x10+4+EAX_INDEX)
57 #define CH_INDEX (0x10+4+ECX_INDEX)
58 #define DH_INDEX (0x10+4+EDX_INDEX)
59 #define BH_INDEX (0x10+4+EBX_INDEX)
60
61 /* The register in which subroutines return an integer return value */
62 #define REG_RESULT EAX_INDEX
63
64 /* The registers subroutines take their first and second argument in */
65 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
66 /* Handle the _fastcall parameters of ECX and EDX */
67 #define REG_PAR1 ECX_INDEX
68 #define REG_PAR2 EDX_INDEX
69 #elif defined(__x86_64__)
70 #define REG_PAR1 EDI_INDEX
71 #define REG_PAR2 ESI_INDEX
72 #else
73 #define REG_PAR1 EAX_INDEX
74 #define REG_PAR2 EDX_INDEX
75 #endif
76
77 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
78 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
79 #define REG_PC_TMP EAX_INDEX
80 #else
81 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
82 #endif
83
84 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
85 -1 if any reg will do */
86 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
87 #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
88
89 #define STACK_ALIGN 16
90 #define STACK_OFFSET sizeof(void *)
91
92 uae_s8 always_used[]={4,-1};
93 #if defined(__x86_64__)
94 uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
95 uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
96 #else
97 uae_s8 can_byte[]={0,1,2,3,-1};
98 uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
99 #endif
100
101 #if USE_OPTIMIZED_CALLS
102 /* Make sure interpretive core does not use cpuopti */
103 uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
104 #error FIXME: code not ready
105 #else
106 /* cpuopti mutate instruction handlers to assume registers are saved
107 by the caller */
108 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
109 #endif
110
111 /* This *should* be the same as call_saved. But:
112 - We might not really know which registers are saved, and which aren't,
113 so we need to preserve some, but don't want to rely on everyone else
114 also saving those registers
115 - Special registers (such like the stack pointer) should not be "preserved"
116 by pushing, even though they are "saved" across function calls
117 */
118 #if defined(__x86_64__)
119 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
120 /* preserve r11 because it's generally used to hold pointers to functions */
121 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
122 #else
123 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
124 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
125 #endif
126
127 /* Whether classes of instructions do or don't clobber the native flags */
128 #define CLOBBER_MOV
129 #define CLOBBER_LEA
130 #define CLOBBER_CMOV
131 #define CLOBBER_POP
132 #define CLOBBER_PUSH
133 #define CLOBBER_SUB clobber_flags()
134 #define CLOBBER_SBB clobber_flags()
135 #define CLOBBER_CMP clobber_flags()
136 #define CLOBBER_ADD clobber_flags()
137 #define CLOBBER_ADC clobber_flags()
138 #define CLOBBER_AND clobber_flags()
139 #define CLOBBER_OR clobber_flags()
140 #define CLOBBER_XOR clobber_flags()
141
142 #define CLOBBER_ROL clobber_flags()
143 #define CLOBBER_ROR clobber_flags()
144 #define CLOBBER_SHLL clobber_flags()
145 #define CLOBBER_SHRL clobber_flags()
146 #define CLOBBER_SHRA clobber_flags()
147 #define CLOBBER_TEST clobber_flags()
148 #define CLOBBER_CL16
149 #define CLOBBER_CL8
150 #define CLOBBER_SE32
151 #define CLOBBER_SE16
152 #define CLOBBER_SE8
153 #define CLOBBER_ZE32
154 #define CLOBBER_ZE16
155 #define CLOBBER_ZE8
156 #define CLOBBER_SW16 clobber_flags()
157 #define CLOBBER_SW32
158 #define CLOBBER_SETCC
159 #define CLOBBER_MUL clobber_flags()
160 #define CLOBBER_BT clobber_flags()
161 #define CLOBBER_BSF clobber_flags()
162
163 /* FIXME: disabled until that's proofread. */
164 #if defined(__x86_64__)
165 #define USE_NEW_RTASM 1
166 #endif
167
168 #if USE_NEW_RTASM
169
170 #if defined(__x86_64__)
171 #define X86_TARGET_64BIT 1
172 #endif
173 #define X86_FLAT_REGISTERS 0
174 #define X86_OPTIMIZE_ALU 1
175 #define X86_OPTIMIZE_ROTSHI 1
176 #include "codegen_x86.h"
177
178 #define x86_emit_byte(B) emit_byte(B)
179 #define x86_emit_word(W) emit_word(W)
180 #define x86_emit_long(L) emit_long(L)
181 #define x86_emit_quad(Q) emit_quad(Q)
182 #define x86_get_target() get_target()
183 #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
184
185 static void jit_fail(const char *msg, const char *file, int line, const char *function)
186 {
187 fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
188 function, file, line, msg);
189 abort();
190 }
191
192 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
193 {
194 #if defined(__x86_64__)
195 PUSHQr(r);
196 #else
197 PUSHLr(r);
198 #endif
199 }
200 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
201
202 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
203 {
204 #if defined(__x86_64__)
205 POPQr(r);
206 #else
207 POPLr(r);
208 #endif
209 }
210 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
211
212 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
213 {
214 #if defined(__x86_64__)
215 POPQm(d, X86_NOREG, X86_NOREG, 1);
216 #else
217 POPLm(d, X86_NOREG, X86_NOREG, 1);
218 #endif
219 }
220 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
221
222 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
223 {
224 BTLir(i, r);
225 }
226 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
227
228 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
229 {
230 BTLrr(b, r);
231 }
232 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
233
234 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
235 {
236 BTCLir(i, r);
237 }
238 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
239
240 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
241 {
242 BTCLrr(b, r);
243 }
244 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
245
246 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
247 {
248 BTRLir(i, r);
249 }
250 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
251
252 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
253 {
254 BTRLrr(b, r);
255 }
256 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
257
258 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
259 {
260 BTSLir(i, r);
261 }
262 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
263
264 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
265 {
266 BTSLrr(b, r);
267 }
268 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
269
270 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
271 {
272 SUBWir(i, d);
273 }
274 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
275
276 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
277 {
278 MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
279 }
280 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
281
282 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
283 {
284 MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
285 }
286 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
287
288 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
289 {
290 MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
291 }
292 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
293
294 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
295 {
296 MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
297 }
298 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
299
300 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
301 {
302 ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
303 }
304 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
305
306 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
307 {
308 ROLBir(i, r);
309 }
310 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
311
312 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
313 {
314 ROLWir(i, r);
315 }
316 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
317
318 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
319 {
320 ROLLir(i, r);
321 }
322 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
323
324 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
325 {
326 ROLLrr(r, d);
327 }
328 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
329
330 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
331 {
332 ROLWrr(r, d);
333 }
334 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
335
336 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
337 {
338 ROLBrr(r, d);
339 }
340 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
341
342 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
343 {
344 SHLLrr(r, d);
345 }
346 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
347
348 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
349 {
350 SHLWrr(r, d);
351 }
352 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
353
354 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
355 {
356 SHLBrr(r, d);
357 }
358 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
359
360 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
361 {
362 RORBir(i, r);
363 }
364 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
365
366 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
367 {
368 RORWir(i, r);
369 }
370 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
371
372 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
373 {
374 ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
375 }
376 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
377
378 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
379 {
380 RORLir(i, r);
381 }
382 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
383
384 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
385 {
386 RORLrr(r, d);
387 }
388 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
389
390 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
391 {
392 RORWrr(r, d);
393 }
394 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
395
396 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
397 {
398 RORBrr(r, d);
399 }
400 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
401
402 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
403 {
404 SHRLrr(r, d);
405 }
406 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
407
408 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
409 {
410 SHRWrr(r, d);
411 }
412 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
413
414 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
415 {
416 SHRBrr(r, d);
417 }
418 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
419
420 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
421 {
422 SARLrr(r, d);
423 }
424 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
425
426 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
427 {
428 SARWrr(r, d);
429 }
430 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
431
432 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
433 {
434 SARBrr(r, d);
435 }
436 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
437
438 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
439 {
440 SHLLir(i, r);
441 }
442 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
443
444 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
445 {
446 SHLWir(i, r);
447 }
448 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
449
450 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
451 {
452 SHLBir(i, r);
453 }
454 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
455
456 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
457 {
458 SHRLir(i, r);
459 }
460 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
461
462 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
463 {
464 SHRWir(i, r);
465 }
466 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
467
468 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
469 {
470 SHRBir(i, r);
471 }
472 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
473
474 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
475 {
476 SARLir(i, r);
477 }
478 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
479
480 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
481 {
482 SARWir(i, r);
483 }
484 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
485
486 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
487 {
488 SARBir(i, r);
489 }
490 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
491
492 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
493 {
494 SAHF();
495 }
496 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
497
498 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
499 {
500 CPUID();
501 }
502 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
503
504 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
505 {
506 LAHF();
507 }
508 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
509
510 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
511 {
512 SETCCir(cc, d);
513 }
514 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
515
516 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
517 {
518 SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
519 }
520 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
521
522 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
523 {
524 if (have_cmov)
525 CMOVLrr(cc, s, d);
526 else { /* replacement using branch and mov */
527 #if defined(__x86_64__)
528 write_log("x86-64 implementations are bound to have CMOV!\n");
529 abort();
530 #endif
531 JCCSii(cc^1, 2);
532 MOVLrr(s, d);
533 }
534 }
535 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
536
537 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
538 {
539 BSFLrr(s, d);
540 }
541 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
542
543 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
544 {
545 MOVSLQrr(s, d);
546 }
547 LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
548
549 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
550 {
551 MOVSWLrr(s, d);
552 }
553 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
554
555 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
556 {
557 MOVSBLrr(s, d);
558 }
559 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
560
561 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
562 {
563 MOVZWLrr(s, d);
564 }
565 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
566
567 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
568 {
569 MOVZBLrr(s, d);
570 }
571 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
572
573 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
574 {
575 IMULLrr(s, d);
576 }
577 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
578
579 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
580 {
581 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
582 write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
583 abort();
584 }
585 IMULLr(s);
586 }
587 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
588
589 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
590 {
591 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
592 write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
593 abort();
594 }
595 MULLr(s);
596 }
597 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
598
599 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
600 {
601 abort(); /* %^$&%^$%#^ x86! */
602 }
603 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
604
605 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
606 {
607 MOVBrr(s, d);
608 }
609 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
610
611 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
612 {
613 MOVWrr(s, d);
614 }
615 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
616
617 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
618 {
619 MOVLmr(0, baser, index, factor, d);
620 }
621 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
622
623 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
624 {
625 MOVWmr(0, baser, index, factor, d);
626 }
627 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
628
629 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
630 {
631 MOVBmr(0, baser, index, factor, d);
632 }
633 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
634
635 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
636 {
637 MOVLrm(s, 0, baser, index, factor);
638 }
639 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
640
641 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
642 {
643 MOVWrm(s, 0, baser, index, factor);
644 }
645 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
646
647 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
648 {
649 MOVBrm(s, 0, baser, index, factor);
650 }
651 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
652
653 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
654 {
655 MOVLrm(s, base, baser, index, factor);
656 }
657 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
658
659 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
660 {
661 MOVWrm(s, base, baser, index, factor);
662 }
663 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
664
665 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
666 {
667 MOVBrm(s, base, baser, index, factor);
668 }
669 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
670
671 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
672 {
673 MOVLmr(base, baser, index, factor, d);
674 }
675 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
676
677 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
678 {
679 MOVWmr(base, baser, index, factor, d);
680 }
681 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
682
683 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
684 {
685 MOVBmr(base, baser, index, factor, d);
686 }
687 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
688
689 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
690 {
691 MOVLmr(base, X86_NOREG, index, factor, d);
692 }
693 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
694
695 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
696 {
697 if (have_cmov)
698 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
699 else { /* replacement using branch and mov */
700 #if defined(__x86_64__)
701 write_log("x86-64 implementations are bound to have CMOV!\n");
702 abort();
703 #endif
704 JCCSii(cond^1, 7);
705 MOVLmr(base, X86_NOREG, index, factor, d);
706 }
707 }
708 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
709
710 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
711 {
712 if (have_cmov)
713 CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
714 else { /* replacement using branch and mov */
715 #if defined(__x86_64__)
716 write_log("x86-64 implementations are bound to have CMOV!\n");
717 abort();
718 #endif
719 JCCSii(cond^1, 6);
720 MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
721 }
722 }
723 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
724
725 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
726 {
727 MOVLmr(offset, s, X86_NOREG, 1, d);
728 }
729 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
730
731 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
732 {
733 MOVWmr(offset, s, X86_NOREG, 1, d);
734 }
735 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
736
737 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
738 {
739 MOVBmr(offset, s, X86_NOREG, 1, d);
740 }
741 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
742
743 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
744 {
745 MOVLmr(offset, s, X86_NOREG, 1, d);
746 }
747 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
748
749 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
750 {
751 MOVWmr(offset, s, X86_NOREG, 1, d);
752 }
753 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
754
755 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
756 {
757 MOVBmr(offset, s, X86_NOREG, 1, d);
758 }
759 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
760
761 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
762 {
763 MOVLim(i, offset, d, X86_NOREG, 1);
764 }
765 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
766
767 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
768 {
769 MOVWim(i, offset, d, X86_NOREG, 1);
770 }
771 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
772
773 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
774 {
775 MOVBim(i, offset, d, X86_NOREG, 1);
776 }
777 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
778
779 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
780 {
781 MOVLrm(s, offset, d, X86_NOREG, 1);
782 }
783 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
784
785 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
786 {
787 MOVWrm(s, offset, d, X86_NOREG, 1);
788 }
789 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
790
791 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
792 {
793 MOVBrm(s, offset, d, X86_NOREG, 1);
794 }
795 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
796
797 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
798 {
799 LEALmr(offset, s, X86_NOREG, 1, d);
800 }
801 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
802
803 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
804 {
805 LEALmr(offset, s, index, factor, d);
806 }
807 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
808
809 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
810 {
811 LEALmr(0, s, index, factor, d);
812 }
813 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
814
815 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
816 {
817 MOVLrm(s, offset, d, X86_NOREG, 1);
818 }
819 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
820
821 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
822 {
823 MOVWrm(s, offset, d, X86_NOREG, 1);
824 }
825 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
826
827 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
828 {
829 MOVBrm(s, offset, d, X86_NOREG, 1);
830 }
831 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
832
833 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
834 {
835 BSWAPLr(r);
836 }
837 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
838
839 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
840 {
841 ROLWir(8, r);
842 }
843 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
844
845 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
846 {
847 MOVLrr(s, d);
848 }
849 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
850
851 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
852 {
853 MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
854 }
855 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
856
857 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
858 {
859 MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
860 }
861 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
862
863 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
864 {
865 MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
866 }
867 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
868
869 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
870 {
871 MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
872 }
873 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
874
875 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
876 {
877 MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
878 }
879 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
880
881 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
882 {
883 MOVLir(s, d);
884 }
885 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
886
887 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
888 {
889 MOVWir(s, d);
890 }
891 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
892
893 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
894 {
895 MOVBir(s, d);
896 }
897 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
898
899 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
900 {
901 ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
902 }
903 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
904
905 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
906 {
907 ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
908 }
909 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
910
911 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
912 {
913 ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
914 }
915 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
916
917 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
918 {
919 ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
920 }
921 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
922
923 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
924 {
925 TESTLir(i, d);
926 }
927 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
928
929 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
930 {
931 TESTLrr(s, d);
932 }
933 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
934
935 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
936 {
937 TESTWrr(s, d);
938 }
939 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
940
941 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
942 {
943 TESTBrr(s, d);
944 }
945 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
946
947 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
948 {
949 XORLir(i, d);
950 }
951 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
952
953 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
954 {
955 ANDLir(i, d);
956 }
957 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
958
959 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
960 {
961 ANDWir(i, d);
962 }
963 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
964
965 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
966 {
967 ANDLrr(s, d);
968 }
969 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
970
971 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
972 {
973 ANDWrr(s, d);
974 }
975 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
976
977 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
978 {
979 ANDBrr(s, d);
980 }
981 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
982
983 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
984 {
985 ORLir(i, d);
986 }
987 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
988
989 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
990 {
991 ORLrr(s, d);
992 }
993 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
994
995 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
996 {
997 ORWrr(s, d);
998 }
999 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1000
1001 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1002 {
1003 ORBrr(s, d);
1004 }
1005 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1006
1007 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1008 {
1009 ADCLrr(s, d);
1010 }
1011 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1012
1013 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1014 {
1015 ADCWrr(s, d);
1016 }
1017 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1018
1019 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1020 {
1021 ADCBrr(s, d);
1022 }
1023 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1024
1025 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1026 {
1027 ADDLrr(s, d);
1028 }
1029 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1030
1031 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1032 {
1033 ADDWrr(s, d);
1034 }
1035 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1036
1037 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1038 {
1039 ADDBrr(s, d);
1040 }
1041 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1042
1043 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1044 {
1045 SUBLir(i, d);
1046 }
1047 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1048
1049 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1050 {
1051 SUBBir(i, d);
1052 }
1053 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1054
1055 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1056 {
1057 ADDLir(i, d);
1058 }
1059 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1060
1061 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1062 {
1063 ADDWir(i, d);
1064 }
1065 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1066
1067 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1068 {
1069 ADDBir(i, d);
1070 }
1071 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1072
1073 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1074 {
1075 SBBLrr(s, d);
1076 }
1077 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1078
1079 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1080 {
1081 SBBWrr(s, d);
1082 }
1083 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1084
1085 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1086 {
1087 SBBBrr(s, d);
1088 }
1089 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1090
1091 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1092 {
1093 SUBLrr(s, d);
1094 }
1095 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1096
1097 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1098 {
1099 SUBWrr(s, d);
1100 }
1101 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1102
1103 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1104 {
1105 SUBBrr(s, d);
1106 }
1107 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1108
1109 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1110 {
1111 CMPLrr(s, d);
1112 }
1113 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1114
1115 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1116 {
1117 CMPLir(i, r);
1118 }
1119 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1120
1121 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1122 {
1123 CMPWrr(s, d);
1124 }
1125 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1126
1127 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1128 {
1129 CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1130 }
1131 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1132
1133 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1134 {
1135 CMPBir(i, d);
1136 }
1137 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1138
1139 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1140 {
1141 CMPBrr(s, d);
1142 }
1143 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1144
1145 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1146 {
1147 CMPLmr(offset, X86_NOREG, index, factor, d);
1148 }
1149 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1150
1151 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1152 {
1153 XORLrr(s, d);
1154 }
1155 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1156
1157 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1158 {
1159 XORWrr(s, d);
1160 }
1161 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1162
1163 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1164 {
1165 XORBrr(s, d);
1166 }
1167 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1168
1169 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1170 {
1171 SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1172 }
1173 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1174
1175 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1176 {
1177 CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1178 }
1179 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1180
1181 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1182 {
1183 XCHGLrr(r2, r1);
1184 }
1185 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1186
1187 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1188 {
1189 PUSHF();
1190 }
1191 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1192
1193 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1194 {
1195 POPF();
1196 }
1197 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1198
1199 /* Generate floating-point instructions */
1200 static inline void x86_fadd_m(MEMR s)
1201 {
1202 FADDLm(s,X86_NOREG,X86_NOREG,1);
1203 }
1204
1205 #else
1206
1207 const bool optimize_accum = true;
1208 const bool optimize_imm8 = true;
1209 const bool optimize_shift_once = true;
1210
1211 /*************************************************************************
1212 * Actual encoding of the instructions on the target CPU *
1213 *************************************************************************/
1214
1215 static __inline__ int isaccum(int r)
1216 {
1217 return (r == EAX_INDEX);
1218 }
1219
1220 static __inline__ int isbyte(uae_s32 x)
1221 {
1222 return (x>=-128 && x<=127);
1223 }
1224
1225 static __inline__ int isword(uae_s32 x)
1226 {
1227 return (x>=-32768 && x<=32767);
1228 }
1229
1230 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1231 {
1232 emit_byte(0x50+r);
1233 }
1234 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1235
1236 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1237 {
1238 emit_byte(0x58+r);
1239 }
1240 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1241
1242 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1243 {
1244 emit_byte(0x8f);
1245 emit_byte(0x05);
1246 emit_long(d);
1247 }
1248 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1249
1250 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1251 {
1252 emit_byte(0x0f);
1253 emit_byte(0xba);
1254 emit_byte(0xe0+r);
1255 emit_byte(i);
1256 }
1257 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1258
1259 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1260 {
1261 emit_byte(0x0f);
1262 emit_byte(0xa3);
1263 emit_byte(0xc0+8*b+r);
1264 }
1265 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1266
1267 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1268 {
1269 emit_byte(0x0f);
1270 emit_byte(0xba);
1271 emit_byte(0xf8+r);
1272 emit_byte(i);
1273 }
1274 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1275
1276 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1277 {
1278 emit_byte(0x0f);
1279 emit_byte(0xbb);
1280 emit_byte(0xc0+8*b+r);
1281 }
1282 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1283
1284
1285 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1286 {
1287 emit_byte(0x0f);
1288 emit_byte(0xba);
1289 emit_byte(0xf0+r);
1290 emit_byte(i);
1291 }
1292 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1293
1294 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1295 {
1296 emit_byte(0x0f);
1297 emit_byte(0xb3);
1298 emit_byte(0xc0+8*b+r);
1299 }
1300 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1301
1302 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1303 {
1304 emit_byte(0x0f);
1305 emit_byte(0xba);
1306 emit_byte(0xe8+r);
1307 emit_byte(i);
1308 }
1309 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1310
1311 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1312 {
1313 emit_byte(0x0f);
1314 emit_byte(0xab);
1315 emit_byte(0xc0+8*b+r);
1316 }
1317 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1318
1319 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1320 {
1321 emit_byte(0x66);
1322 if (isbyte(i)) {
1323 emit_byte(0x83);
1324 emit_byte(0xe8+d);
1325 emit_byte(i);
1326 }
1327 else {
1328 if (optimize_accum && isaccum(d))
1329 emit_byte(0x2d);
1330 else {
1331 emit_byte(0x81);
1332 emit_byte(0xe8+d);
1333 }
1334 emit_word(i);
1335 }
1336 }
1337 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1338
1339
1340 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1341 {
1342 emit_byte(0x8b);
1343 emit_byte(0x05+8*d);
1344 emit_long(s);
1345 }
1346 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1347
1348 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1349 {
1350 emit_byte(0xc7);
1351 emit_byte(0x05);
1352 emit_long(d);
1353 emit_long(s);
1354 }
1355 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1356
1357 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1358 {
1359 emit_byte(0x66);
1360 emit_byte(0xc7);
1361 emit_byte(0x05);
1362 emit_long(d);
1363 emit_word(s);
1364 }
1365 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1366
1367 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1368 {
1369 emit_byte(0xc6);
1370 emit_byte(0x05);
1371 emit_long(d);
1372 emit_byte(s);
1373 }
1374 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1375
1376 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1377 {
1378 if (optimize_shift_once && (i == 1)) {
1379 emit_byte(0xd0);
1380 emit_byte(0x05);
1381 emit_long(d);
1382 }
1383 else {
1384 emit_byte(0xc0);
1385 emit_byte(0x05);
1386 emit_long(d);
1387 emit_byte(i);
1388 }
1389 }
1390 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1391
1392 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1393 {
1394 if (optimize_shift_once && (i == 1)) {
1395 emit_byte(0xd0);
1396 emit_byte(0xc0+r);
1397 }
1398 else {
1399 emit_byte(0xc0);
1400 emit_byte(0xc0+r);
1401 emit_byte(i);
1402 }
1403 }
1404 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1405
1406 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1407 {
1408 emit_byte(0x66);
1409 emit_byte(0xc1);
1410 emit_byte(0xc0+r);
1411 emit_byte(i);
1412 }
1413 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1414
1415 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1416 {
1417 if (optimize_shift_once && (i == 1)) {
1418 emit_byte(0xd1);
1419 emit_byte(0xc0+r);
1420 }
1421 else {
1422 emit_byte(0xc1);
1423 emit_byte(0xc0+r);
1424 emit_byte(i);
1425 }
1426 }
1427 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1428
1429 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1430 {
1431 emit_byte(0xd3);
1432 emit_byte(0xc0+d);
1433 }
1434 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1435
1436 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1437 {
1438 emit_byte(0x66);
1439 emit_byte(0xd3);
1440 emit_byte(0xc0+d);
1441 }
1442 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1443
1444 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1445 {
1446 emit_byte(0xd2);
1447 emit_byte(0xc0+d);
1448 }
1449 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1450
1451 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1452 {
1453 emit_byte(0xd3);
1454 emit_byte(0xe0+d);
1455 }
1456 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1457
1458 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1459 {
1460 emit_byte(0x66);
1461 emit_byte(0xd3);
1462 emit_byte(0xe0+d);
1463 }
1464 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1465
1466 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1467 {
1468 emit_byte(0xd2);
1469 emit_byte(0xe0+d);
1470 }
1471 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1472
1473 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1474 {
1475 if (optimize_shift_once && (i == 1)) {
1476 emit_byte(0xd0);
1477 emit_byte(0xc8+r);
1478 }
1479 else {
1480 emit_byte(0xc0);
1481 emit_byte(0xc8+r);
1482 emit_byte(i);
1483 }
1484 }
1485 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1486
1487 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1488 {
1489 emit_byte(0x66);
1490 emit_byte(0xc1);
1491 emit_byte(0xc8+r);
1492 emit_byte(i);
1493 }
1494 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1495
1496 // gb-- used for making an fpcr value in compemu_fpp.cpp
1497 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1498 {
1499 emit_byte(0x0b);
1500 emit_byte(0x05+8*d);
1501 emit_long(s);
1502 }
1503 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1504
1505 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1506 {
1507 if (optimize_shift_once && (i == 1)) {
1508 emit_byte(0xd1);
1509 emit_byte(0xc8+r);
1510 }
1511 else {
1512 emit_byte(0xc1);
1513 emit_byte(0xc8+r);
1514 emit_byte(i);
1515 }
1516 }
1517 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1518
1519 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1520 {
1521 emit_byte(0xd3);
1522 emit_byte(0xc8+d);
1523 }
1524 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1525
1526 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1527 {
1528 emit_byte(0x66);
1529 emit_byte(0xd3);
1530 emit_byte(0xc8+d);
1531 }
1532 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1533
1534 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1535 {
1536 emit_byte(0xd2);
1537 emit_byte(0xc8+d);
1538 }
1539 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1540
1541 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1542 {
1543 emit_byte(0xd3);
1544 emit_byte(0xe8+d);
1545 }
1546 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1547
1548 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1549 {
1550 emit_byte(0x66);
1551 emit_byte(0xd3);
1552 emit_byte(0xe8+d);
1553 }
1554 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1555
1556 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1557 {
1558 emit_byte(0xd2);
1559 emit_byte(0xe8+d);
1560 }
1561 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1562
1563 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1564 {
1565 emit_byte(0xd3);
1566 emit_byte(0xf8+d);
1567 }
1568 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1569
1570 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1571 {
1572 emit_byte(0x66);
1573 emit_byte(0xd3);
1574 emit_byte(0xf8+d);
1575 }
1576 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1577
1578 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1579 {
1580 emit_byte(0xd2);
1581 emit_byte(0xf8+d);
1582 }
1583 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1584
1585 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1586 {
1587 if (optimize_shift_once && (i == 1)) {
1588 emit_byte(0xd1);
1589 emit_byte(0xe0+r);
1590 }
1591 else {
1592 emit_byte(0xc1);
1593 emit_byte(0xe0+r);
1594 emit_byte(i);
1595 }
1596 }
1597 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1598
1599 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1600 {
1601 emit_byte(0x66);
1602 emit_byte(0xc1);
1603 emit_byte(0xe0+r);
1604 emit_byte(i);
1605 }
1606 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1607
1608 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1609 {
1610 if (optimize_shift_once && (i == 1)) {
1611 emit_byte(0xd0);
1612 emit_byte(0xe0+r);
1613 }
1614 else {
1615 emit_byte(0xc0);
1616 emit_byte(0xe0+r);
1617 emit_byte(i);
1618 }
1619 }
1620 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1621
1622 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1623 {
1624 if (optimize_shift_once && (i == 1)) {
1625 emit_byte(0xd1);
1626 emit_byte(0xe8+r);
1627 }
1628 else {
1629 emit_byte(0xc1);
1630 emit_byte(0xe8+r);
1631 emit_byte(i);
1632 }
1633 }
1634 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1635
1636 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1637 {
1638 emit_byte(0x66);
1639 emit_byte(0xc1);
1640 emit_byte(0xe8+r);
1641 emit_byte(i);
1642 }
1643 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1644
1645 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1646 {
1647 if (optimize_shift_once && (i == 1)) {
1648 emit_byte(0xd0);
1649 emit_byte(0xe8+r);
1650 }
1651 else {
1652 emit_byte(0xc0);
1653 emit_byte(0xe8+r);
1654 emit_byte(i);
1655 }
1656 }
1657 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1658
1659 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1660 {
1661 if (optimize_shift_once && (i == 1)) {
1662 emit_byte(0xd1);
1663 emit_byte(0xf8+r);
1664 }
1665 else {
1666 emit_byte(0xc1);
1667 emit_byte(0xf8+r);
1668 emit_byte(i);
1669 }
1670 }
1671 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1672
1673 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1674 {
1675 emit_byte(0x66);
1676 emit_byte(0xc1);
1677 emit_byte(0xf8+r);
1678 emit_byte(i);
1679 }
1680 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1681
1682 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1683 {
1684 if (optimize_shift_once && (i == 1)) {
1685 emit_byte(0xd0);
1686 emit_byte(0xf8+r);
1687 }
1688 else {
1689 emit_byte(0xc0);
1690 emit_byte(0xf8+r);
1691 emit_byte(i);
1692 }
1693 }
1694 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1695
1696 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1697 {
1698 emit_byte(0x9e);
1699 }
1700 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1701
1702 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1703 {
1704 emit_byte(0x0f);
1705 emit_byte(0xa2);
1706 }
1707 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1708
1709 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1710 {
1711 emit_byte(0x9f);
1712 }
1713 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1714
1715 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1716 {
1717 emit_byte(0x0f);
1718 emit_byte(0x90+cc);
1719 emit_byte(0xc0+d);
1720 }
1721 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1722
1723 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1724 {
1725 emit_byte(0x0f);
1726 emit_byte(0x90+cc);
1727 emit_byte(0x05);
1728 emit_long(d);
1729 }
1730 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1731
1732 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1733 {
1734 if (have_cmov) {
1735 emit_byte(0x0f);
1736 emit_byte(0x40+cc);
1737 emit_byte(0xc0+8*d+s);
1738 }
1739 else { /* replacement using branch and mov */
1740 int uncc=(cc^1);
1741 emit_byte(0x70+uncc);
1742 emit_byte(2); /* skip next 2 bytes if not cc=true */
1743 emit_byte(0x89);
1744 emit_byte(0xc0+8*s+d);
1745 }
1746 }
1747 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1748
1749 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1750 {
1751 emit_byte(0x0f);
1752 emit_byte(0xbc);
1753 emit_byte(0xc0+8*d+s);
1754 }
1755 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1756
1757 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1758 {
1759 emit_byte(0x0f);
1760 emit_byte(0xbf);
1761 emit_byte(0xc0+8*d+s);
1762 }
1763 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1764
1765 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1766 {
1767 emit_byte(0x0f);
1768 emit_byte(0xbe);
1769 emit_byte(0xc0+8*d+s);
1770 }
1771 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1772
1773 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1774 {
1775 emit_byte(0x0f);
1776 emit_byte(0xb7);
1777 emit_byte(0xc0+8*d+s);
1778 }
1779 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1780
1781 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1782 {
1783 emit_byte(0x0f);
1784 emit_byte(0xb6);
1785 emit_byte(0xc0+8*d+s);
1786 }
1787 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1788
1789 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1790 {
1791 emit_byte(0x0f);
1792 emit_byte(0xaf);
1793 emit_byte(0xc0+8*d+s);
1794 }
1795 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1796
1797 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1798 {
1799 if (d!=MUL_NREG1 || s!=MUL_NREG2)
1800 abort();
1801 emit_byte(0xf7);
1802 emit_byte(0xea);
1803 }
1804 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1805
1806 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1807 {
1808 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1809 printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1810 abort();
1811 }
1812 emit_byte(0xf7);
1813 emit_byte(0xe2);
1814 }
1815 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1816
1817 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1818 {
1819 abort(); /* %^$&%^$%#^ x86! */
1820 emit_byte(0x0f);
1821 emit_byte(0xaf);
1822 emit_byte(0xc0+8*d+s);
1823 }
1824 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1825
1826 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1827 {
1828 emit_byte(0x88);
1829 emit_byte(0xc0+8*s+d);
1830 }
1831 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1832
1833 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1834 {
1835 emit_byte(0x66);
1836 emit_byte(0x89);
1837 emit_byte(0xc0+8*s+d);
1838 }
1839 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1840
1841 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1842 {
1843 int isebp=(baser==5)?0x40:0;
1844 int fi;
1845
1846 switch(factor) {
1847 case 1: fi=0; break;
1848 case 2: fi=1; break;
1849 case 4: fi=2; break;
1850 case 8: fi=3; break;
1851 default: abort();
1852 }
1853
1854
1855 emit_byte(0x8b);
1856 emit_byte(0x04+8*d+isebp);
1857 emit_byte(baser+8*index+0x40*fi);
1858 if (isebp)
1859 emit_byte(0x00);
1860 }
1861 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1862
1863 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1864 {
1865 int fi;
1866 int isebp;
1867
1868 switch(factor) {
1869 case 1: fi=0; break;
1870 case 2: fi=1; break;
1871 case 4: fi=2; break;
1872 case 8: fi=3; break;
1873 default: abort();
1874 }
1875 isebp=(baser==5)?0x40:0;
1876
1877 emit_byte(0x66);
1878 emit_byte(0x8b);
1879 emit_byte(0x04+8*d+isebp);
1880 emit_byte(baser+8*index+0x40*fi);
1881 if (isebp)
1882 emit_byte(0x00);
1883 }
1884 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1885
1886 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1887 {
1888 int fi;
1889 int isebp;
1890
1891 switch(factor) {
1892 case 1: fi=0; break;
1893 case 2: fi=1; break;
1894 case 4: fi=2; break;
1895 case 8: fi=3; break;
1896 default: abort();
1897 }
1898 isebp=(baser==5)?0x40:0;
1899
1900 emit_byte(0x8a);
1901 emit_byte(0x04+8*d+isebp);
1902 emit_byte(baser+8*index+0x40*fi);
1903 if (isebp)
1904 emit_byte(0x00);
1905 }
1906 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1907
1908 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1909 {
1910 int fi;
1911 int isebp;
1912
1913 switch(factor) {
1914 case 1: fi=0; break;
1915 case 2: fi=1; break;
1916 case 4: fi=2; break;
1917 case 8: fi=3; break;
1918 default: abort();
1919 }
1920
1921
1922 isebp=(baser==5)?0x40:0;
1923
1924 emit_byte(0x89);
1925 emit_byte(0x04+8*s+isebp);
1926 emit_byte(baser+8*index+0x40*fi);
1927 if (isebp)
1928 emit_byte(0x00);
1929 }
1930 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1931
1932 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1933 {
1934 int fi;
1935 int isebp;
1936
1937 switch(factor) {
1938 case 1: fi=0; break;
1939 case 2: fi=1; break;
1940 case 4: fi=2; break;
1941 case 8: fi=3; break;
1942 default: abort();
1943 }
1944 isebp=(baser==5)?0x40:0;
1945
1946 emit_byte(0x66);
1947 emit_byte(0x89);
1948 emit_byte(0x04+8*s+isebp);
1949 emit_byte(baser+8*index+0x40*fi);
1950 if (isebp)
1951 emit_byte(0x00);
1952 }
1953 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1954
1955 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1956 {
1957 int fi;
1958 int isebp;
1959
1960 switch(factor) {
1961 case 1: fi=0; break;
1962 case 2: fi=1; break;
1963 case 4: fi=2; break;
1964 case 8: fi=3; break;
1965 default: abort();
1966 }
1967 isebp=(baser==5)?0x40:0;
1968
1969 emit_byte(0x88);
1970 emit_byte(0x04+8*s+isebp);
1971 emit_byte(baser+8*index+0x40*fi);
1972 if (isebp)
1973 emit_byte(0x00);
1974 }
1975 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1976
1977 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1978 {
1979 int fi;
1980
1981 switch(factor) {
1982 case 1: fi=0; break;
1983 case 2: fi=1; break;
1984 case 4: fi=2; break;
1985 case 8: fi=3; break;
1986 default: abort();
1987 }
1988
1989 emit_byte(0x89);
1990 emit_byte(0x84+8*s);
1991 emit_byte(baser+8*index+0x40*fi);
1992 emit_long(base);
1993 }
1994 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1995
1996 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1997 {
1998 int fi;
1999
2000 switch(factor) {
2001 case 1: fi=0; break;
2002 case 2: fi=1; break;
2003 case 4: fi=2; break;
2004 case 8: fi=3; break;
2005 default: abort();
2006 }
2007
2008 emit_byte(0x66);
2009 emit_byte(0x89);
2010 emit_byte(0x84+8*s);
2011 emit_byte(baser+8*index+0x40*fi);
2012 emit_long(base);
2013 }
2014 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2015
2016 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2017 {
2018 int fi;
2019
2020 switch(factor) {
2021 case 1: fi=0; break;
2022 case 2: fi=1; break;
2023 case 4: fi=2; break;
2024 case 8: fi=3; break;
2025 default: abort();
2026 }
2027
2028 emit_byte(0x88);
2029 emit_byte(0x84+8*s);
2030 emit_byte(baser+8*index+0x40*fi);
2031 emit_long(base);
2032 }
2033 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2034
2035 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2036 {
2037 int fi;
2038
2039 switch(factor) {
2040 case 1: fi=0; break;
2041 case 2: fi=1; break;
2042 case 4: fi=2; break;
2043 case 8: fi=3; break;
2044 default: abort();
2045 }
2046
2047 emit_byte(0x8b);
2048 emit_byte(0x84+8*d);
2049 emit_byte(baser+8*index+0x40*fi);
2050 emit_long(base);
2051 }
2052 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2053
2054 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2055 {
2056 int fi;
2057
2058 switch(factor) {
2059 case 1: fi=0; break;
2060 case 2: fi=1; break;
2061 case 4: fi=2; break;
2062 case 8: fi=3; break;
2063 default: abort();
2064 }
2065
2066 emit_byte(0x66);
2067 emit_byte(0x8b);
2068 emit_byte(0x84+8*d);
2069 emit_byte(baser+8*index+0x40*fi);
2070 emit_long(base);
2071 }
2072 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2073
2074 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2075 {
2076 int fi;
2077
2078 switch(factor) {
2079 case 1: fi=0; break;
2080 case 2: fi=1; break;
2081 case 4: fi=2; break;
2082 case 8: fi=3; break;
2083 default: abort();
2084 }
2085
2086 emit_byte(0x8a);
2087 emit_byte(0x84+8*d);
2088 emit_byte(baser+8*index+0x40*fi);
2089 emit_long(base);
2090 }
2091 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2092
2093 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2094 {
2095 int fi;
2096 switch(factor) {
2097 case 1: fi=0; break;
2098 case 2: fi=1; break;
2099 case 4: fi=2; break;
2100 case 8: fi=3; break;
2101 default:
2102 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2103 abort();
2104 }
2105 emit_byte(0x8b);
2106 emit_byte(0x04+8*d);
2107 emit_byte(0x05+8*index+64*fi);
2108 emit_long(base);
2109 }
2110 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2111
2112 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2113 {
2114 int fi;
2115 switch(factor) {
2116 case 1: fi=0; break;
2117 case 2: fi=1; break;
2118 case 4: fi=2; break;
2119 case 8: fi=3; break;
2120 default:
2121 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2122 abort();
2123 }
2124 if (have_cmov) {
2125 emit_byte(0x0f);
2126 emit_byte(0x40+cond);
2127 emit_byte(0x04+8*d);
2128 emit_byte(0x05+8*index+64*fi);
2129 emit_long(base);
2130 }
2131 else { /* replacement using branch and mov */
2132 int uncc=(cond^1);
2133 emit_byte(0x70+uncc);
2134 emit_byte(7); /* skip next 7 bytes if not cc=true */
2135 emit_byte(0x8b);
2136 emit_byte(0x04+8*d);
2137 emit_byte(0x05+8*index+64*fi);
2138 emit_long(base);
2139 }
2140 }
2141 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2142
2143 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2144 {
2145 if (have_cmov) {
2146 emit_byte(0x0f);
2147 emit_byte(0x40+cond);
2148 emit_byte(0x05+8*d);
2149 emit_long(mem);
2150 }
2151 else { /* replacement using branch and mov */
2152 int uncc=(cond^1);
2153 emit_byte(0x70+uncc);
2154 emit_byte(6); /* skip next 6 bytes if not cc=true */
2155 emit_byte(0x8b);
2156 emit_byte(0x05+8*d);
2157 emit_long(mem);
2158 }
2159 }
2160 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2161
2162 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2163 {
2164 Dif(!isbyte(offset)) abort();
2165 emit_byte(0x8b);
2166 emit_byte(0x40+8*d+s);
2167 emit_byte(offset);
2168 }
2169 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2170
2171 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2172 {
2173 Dif(!isbyte(offset)) abort();
2174 emit_byte(0x66);
2175 emit_byte(0x8b);
2176 emit_byte(0x40+8*d+s);
2177 emit_byte(offset);
2178 }
2179 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2180
2181 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2182 {
2183 Dif(!isbyte(offset)) abort();
2184 emit_byte(0x8a);
2185 emit_byte(0x40+8*d+s);
2186 emit_byte(offset);
2187 }
2188 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2189
2190 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2191 {
2192 emit_byte(0x8b);
2193 emit_byte(0x80+8*d+s);
2194 emit_long(offset);
2195 }
2196 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2197
2198 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2199 {
2200 emit_byte(0x66);
2201 emit_byte(0x8b);
2202 emit_byte(0x80+8*d+s);
2203 emit_long(offset);
2204 }
2205 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2206
2207 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2208 {
2209 emit_byte(0x8a);
2210 emit_byte(0x80+8*d+s);
2211 emit_long(offset);
2212 }
2213 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2214
2215 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2216 {
2217 Dif(!isbyte(offset)) abort();
2218 emit_byte(0xc7);
2219 emit_byte(0x40+d);
2220 emit_byte(offset);
2221 emit_long(i);
2222 }
2223 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2224
2225 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2226 {
2227 Dif(!isbyte(offset)) abort();
2228 emit_byte(0x66);
2229 emit_byte(0xc7);
2230 emit_byte(0x40+d);
2231 emit_byte(offset);
2232 emit_word(i);
2233 }
2234 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2235
2236 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2237 {
2238 Dif(!isbyte(offset)) abort();
2239 emit_byte(0xc6);
2240 emit_byte(0x40+d);
2241 emit_byte(offset);
2242 emit_byte(i);
2243 }
2244 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2245
2246 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2247 {
2248 Dif(!isbyte(offset)) abort();
2249 emit_byte(0x89);
2250 emit_byte(0x40+8*s+d);
2251 emit_byte(offset);
2252 }
2253 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2254
2255 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2256 {
2257 Dif(!isbyte(offset)) abort();
2258 emit_byte(0x66);
2259 emit_byte(0x89);
2260 emit_byte(0x40+8*s+d);
2261 emit_byte(offset);
2262 }
2263 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2264
2265 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2266 {
2267 Dif(!isbyte(offset)) abort();
2268 emit_byte(0x88);
2269 emit_byte(0x40+8*s+d);
2270 emit_byte(offset);
2271 }
2272 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2273
2274 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2275 {
2276 if (optimize_imm8 && isbyte(offset)) {
2277 emit_byte(0x8d);
2278 emit_byte(0x40+8*d+s);
2279 emit_byte(offset);
2280 }
2281 else {
2282 emit_byte(0x8d);
2283 emit_byte(0x80+8*d+s);
2284 emit_long(offset);
2285 }
2286 }
2287 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2288
2289 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2290 {
2291 int fi;
2292
2293 switch(factor) {
2294 case 1: fi=0; break;
2295 case 2: fi=1; break;
2296 case 4: fi=2; break;
2297 case 8: fi=3; break;
2298 default: abort();
2299 }
2300
2301 if (optimize_imm8 && isbyte(offset)) {
2302 emit_byte(0x8d);
2303 emit_byte(0x44+8*d);
2304 emit_byte(0x40*fi+8*index+s);
2305 emit_byte(offset);
2306 }
2307 else {
2308 emit_byte(0x8d);
2309 emit_byte(0x84+8*d);
2310 emit_byte(0x40*fi+8*index+s);
2311 emit_long(offset);
2312 }
2313 }
2314 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2315
2316 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2317 {
2318 int isebp=(s==5)?0x40:0;
2319 int fi;
2320
2321 switch(factor) {
2322 case 1: fi=0; break;
2323 case 2: fi=1; break;
2324 case 4: fi=2; break;
2325 case 8: fi=3; break;
2326 default: abort();
2327 }
2328
2329 emit_byte(0x8d);
2330 emit_byte(0x04+8*d+isebp);
2331 emit_byte(0x40*fi+8*index+s);
2332 if (isebp)
2333 emit_byte(0);
2334 }
2335 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2336
2337 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2338 {
2339 if (optimize_imm8 && isbyte(offset)) {
2340 emit_byte(0x89);
2341 emit_byte(0x40+8*s+d);
2342 emit_byte(offset);
2343 }
2344 else {
2345 emit_byte(0x89);
2346 emit_byte(0x80+8*s+d);
2347 emit_long(offset);
2348 }
2349 }
2350 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2351
2352 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2353 {
2354 emit_byte(0x66);
2355 emit_byte(0x89);
2356 emit_byte(0x80+8*s+d);
2357 emit_long(offset);
2358 }
2359 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2360
2361 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2362 {
2363 if (optimize_imm8 && isbyte(offset)) {
2364 emit_byte(0x88);
2365 emit_byte(0x40+8*s+d);
2366 emit_byte(offset);
2367 }
2368 else {
2369 emit_byte(0x88);
2370 emit_byte(0x80+8*s+d);
2371 emit_long(offset);
2372 }
2373 }
2374 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2375
2376 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2377 {
2378 emit_byte(0x0f);
2379 emit_byte(0xc8+r);
2380 }
2381 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2382
2383 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2384 {
2385 emit_byte(0x66);
2386 emit_byte(0xc1);
2387 emit_byte(0xc0+r);
2388 emit_byte(0x08);
2389 }
2390 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2391
2392 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2393 {
2394 emit_byte(0x89);
2395 emit_byte(0xc0+8*s+d);
2396 }
2397 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2398
2399 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2400 {
2401 emit_byte(0x89);
2402 emit_byte(0x05+8*s);
2403 emit_long(d);
2404 }
2405 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2406
2407 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2408 {
2409 emit_byte(0x66);
2410 emit_byte(0x89);
2411 emit_byte(0x05+8*s);
2412 emit_long(d);
2413 }
2414 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2415
2416 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2417 {
2418 emit_byte(0x66);
2419 emit_byte(0x8b);
2420 emit_byte(0x05+8*d);
2421 emit_long(s);
2422 }
2423 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2424
2425 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2426 {
2427 emit_byte(0x88);
2428 emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
2429 emit_long(d);
2430 }
2431 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2432
2433 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2434 {
2435 emit_byte(0x8a);
2436 emit_byte(0x05+8*d);
2437 emit_long(s);
2438 }
2439 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2440
2441 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2442 {
2443 emit_byte(0xb8+d);
2444 emit_long(s);
2445 }
2446 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2447
2448 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2449 {
2450 emit_byte(0x66);
2451 emit_byte(0xb8+d);
2452 emit_word(s);
2453 }
2454 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2455
2456 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2457 {
2458 emit_byte(0xb0+d);
2459 emit_byte(s);
2460 }
2461 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2462
2463 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2464 {
2465 emit_byte(0x81);
2466 emit_byte(0x15);
2467 emit_long(d);
2468 emit_long(s);
2469 }
2470 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2471
2472 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2473 {
2474 if (optimize_imm8 && isbyte(s)) {
2475 emit_byte(0x83);
2476 emit_byte(0x05);
2477 emit_long(d);
2478 emit_byte(s);
2479 }
2480 else {
2481 emit_byte(0x81);
2482 emit_byte(0x05);
2483 emit_long(d);
2484 emit_long(s);
2485 }
2486 }
2487 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2488
2489 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2490 {
2491 emit_byte(0x66);
2492 emit_byte(0x81);
2493 emit_byte(0x05);
2494 emit_long(d);
2495 emit_word(s);
2496 }
2497 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2498
2499 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2500 {
2501 emit_byte(0x80);
2502 emit_byte(0x05);
2503 emit_long(d);
2504 emit_byte(s);
2505 }
2506 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2507
2508 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2509 {
2510 if (optimize_accum && isaccum(d))
2511 emit_byte(0xa9);
2512 else {
2513 emit_byte(0xf7);
2514 emit_byte(0xc0+d);
2515 }
2516 emit_long(i);
2517 }
2518 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2519
2520 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2521 {
2522 emit_byte(0x85);
2523 emit_byte(0xc0+8*s+d);
2524 }
2525 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2526
2527 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2528 {
2529 emit_byte(0x66);
2530 emit_byte(0x85);
2531 emit_byte(0xc0+8*s+d);
2532 }
2533 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2534
2535 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2536 {
2537 emit_byte(0x84);
2538 emit_byte(0xc0+8*s+d);
2539 }
2540 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2541
2542 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2543 {
2544 emit_byte(0x81);
2545 emit_byte(0xf0+d);
2546 emit_long(i);
2547 }
2548 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2549
2550 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2551 {
2552 if (optimize_imm8 && isbyte(i)) {
2553 emit_byte(0x83);
2554 emit_byte(0xe0+d);
2555 emit_byte(i);
2556 }
2557 else {
2558 if (optimize_accum && isaccum(d))
2559 emit_byte(0x25);
2560 else {
2561 emit_byte(0x81);
2562 emit_byte(0xe0+d);
2563 }
2564 emit_long(i);
2565 }
2566 }
2567 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2568
2569 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2570 {
2571 emit_byte(0x66);
2572 if (optimize_imm8 && isbyte(i)) {
2573 emit_byte(0x83);
2574 emit_byte(0xe0+d);
2575 emit_byte(i);
2576 }
2577 else {
2578 if (optimize_accum && isaccum(d))
2579 emit_byte(0x25);
2580 else {
2581 emit_byte(0x81);
2582 emit_byte(0xe0+d);
2583 }
2584 emit_word(i);
2585 }
2586 }
2587 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2588
2589 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2590 {
2591 emit_byte(0x21);
2592 emit_byte(0xc0+8*s+d);
2593 }
2594 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2595
2596 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2597 {
2598 emit_byte(0x66);
2599 emit_byte(0x21);
2600 emit_byte(0xc0+8*s+d);
2601 }
2602 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2603
2604 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2605 {
2606 emit_byte(0x20);
2607 emit_byte(0xc0+8*s+d);
2608 }
2609 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2610
2611 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2612 {
2613 if (optimize_imm8 && isbyte(i)) {
2614 emit_byte(0x83);
2615 emit_byte(0xc8+d);
2616 emit_byte(i);
2617 }
2618 else {
2619 if (optimize_accum && isaccum(d))
2620 emit_byte(0x0d);
2621 else {
2622 emit_byte(0x81);
2623 emit_byte(0xc8+d);
2624 }
2625 emit_long(i);
2626 }
2627 }
2628 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2629
2630 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2631 {
2632 emit_byte(0x09);
2633 emit_byte(0xc0+8*s+d);
2634 }
2635 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2636
2637 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2638 {
2639 emit_byte(0x66);
2640 emit_byte(0x09);
2641 emit_byte(0xc0+8*s+d);
2642 }
2643 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2644
2645 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2646 {
2647 emit_byte(0x08);
2648 emit_byte(0xc0+8*s+d);
2649 }
2650 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2651
2652 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2653 {
2654 emit_byte(0x11);
2655 emit_byte(0xc0+8*s+d);
2656 }
2657 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2658
2659 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2660 {
2661 emit_byte(0x66);
2662 emit_byte(0x11);
2663 emit_byte(0xc0+8*s+d);
2664 }
2665 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2666
2667 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2668 {
2669 emit_byte(0x10);
2670 emit_byte(0xc0+8*s+d);
2671 }
2672 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2673
2674 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2675 {
2676 emit_byte(0x01);
2677 emit_byte(0xc0+8*s+d);
2678 }
2679 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2680
2681 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2682 {
2683 emit_byte(0x66);
2684 emit_byte(0x01);
2685 emit_byte(0xc0+8*s+d);
2686 }
2687 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2688
2689 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2690 {
2691 emit_byte(0x00);
2692 emit_byte(0xc0+8*s+d);
2693 }
2694 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2695
2696 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2697 {
2698 if (isbyte(i)) {
2699 emit_byte(0x83);
2700 emit_byte(0xe8+d);
2701 emit_byte(i);
2702 }
2703 else {
2704 if (optimize_accum && isaccum(d))
2705 emit_byte(0x2d);
2706 else {
2707 emit_byte(0x81);
2708 emit_byte(0xe8+d);
2709 }
2710 emit_long(i);
2711 }
2712 }
2713 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2714
2715 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2716 {
2717 if (optimize_accum && isaccum(d))
2718 emit_byte(0x2c);
2719 else {
2720 emit_byte(0x80);
2721 emit_byte(0xe8+d);
2722 }
2723 emit_byte(i);
2724 }
2725 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2726
2727 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2728 {
2729 if (isbyte(i)) {
2730 emit_byte(0x83);
2731 emit_byte(0xc0+d);
2732 emit_byte(i);
2733 }
2734 else {
2735 if (optimize_accum && isaccum(d))
2736 emit_byte(0x05);
2737 else {
2738 emit_byte(0x81);
2739 emit_byte(0xc0+d);
2740 }
2741 emit_long(i);
2742 }
2743 }
2744 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2745
2746 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2747 {
2748 emit_byte(0x66);
2749 if (isbyte(i)) {
2750 emit_byte(0x83);
2751 emit_byte(0xc0+d);
2752 emit_byte(i);
2753 }
2754 else {
2755 if (optimize_accum && isaccum(d))
2756 emit_byte(0x05);
2757 else {
2758 emit_byte(0x81);
2759 emit_byte(0xc0+d);
2760 }
2761 emit_word(i);
2762 }
2763 }
2764 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2765
2766 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2767 {
2768 if (optimize_accum && isaccum(d))
2769 emit_byte(0x04);
2770 else {
2771 emit_byte(0x80);
2772 emit_byte(0xc0+d);
2773 }
2774 emit_byte(i);
2775 }
2776 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2777
2778 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2779 {
2780 emit_byte(0x19);
2781 emit_byte(0xc0+8*s+d);
2782 }
2783 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2784
2785 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2786 {
2787 emit_byte(0x66);
2788 emit_byte(0x19);
2789 emit_byte(0xc0+8*s+d);
2790 }
2791 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2792
2793 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2794 {
2795 emit_byte(0x18);
2796 emit_byte(0xc0+8*s+d);
2797 }
2798 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2799
2800 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2801 {
2802 emit_byte(0x29);
2803 emit_byte(0xc0+8*s+d);
2804 }
2805 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2806
2807 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2808 {
2809 emit_byte(0x66);
2810 emit_byte(0x29);
2811 emit_byte(0xc0+8*s+d);
2812 }
2813 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2814
2815 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2816 {
2817 emit_byte(0x28);
2818 emit_byte(0xc0+8*s+d);
2819 }
2820 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2821
2822 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2823 {
2824 emit_byte(0x39);
2825 emit_byte(0xc0+8*s+d);
2826 }
2827 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2828
2829 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2830 {
2831 if (optimize_imm8 && isbyte(i)) {
2832 emit_byte(0x83);
2833 emit_byte(0xf8+r);
2834 emit_byte(i);
2835 }
2836 else {
2837 if (optimize_accum && isaccum(r))
2838 emit_byte(0x3d);
2839 else {
2840 emit_byte(0x81);
2841 emit_byte(0xf8+r);
2842 }
2843 emit_long(i);
2844 }
2845 }
2846 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2847
2848 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2849 {
2850 emit_byte(0x66);
2851 emit_byte(0x39);
2852 emit_byte(0xc0+8*s+d);
2853 }
2854 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2855
2856 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2857 {
2858 emit_byte(0x80);
2859 emit_byte(0x3d);
2860 emit_long(d);
2861 emit_byte(s);
2862 }
2863 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2864
2865 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2866 {
2867 if (optimize_accum && isaccum(d))
2868 emit_byte(0x3c);
2869 else {
2870 emit_byte(0x80);
2871 emit_byte(0xf8+d);
2872 }
2873 emit_byte(i);
2874 }
2875 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2876
2877 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2878 {
2879 emit_byte(0x38);
2880 emit_byte(0xc0+8*s+d);
2881 }
2882 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2883
2884 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2885 {
2886 int fi;
2887
2888 switch(factor) {
2889 case 1: fi=0; break;
2890 case 2: fi=1; break;
2891 case 4: fi=2; break;
2892 case 8: fi=3; break;
2893 default: abort();
2894 }
2895 emit_byte(0x39);
2896 emit_byte(0x04+8*d);
2897 emit_byte(5+8*index+0x40*fi);
2898 emit_long(offset);
2899 }
2900 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2901
2902 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2903 {
2904 emit_byte(0x31);
2905 emit_byte(0xc0+8*s+d);
2906 }
2907 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2908
2909 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2910 {
2911 emit_byte(0x66);
2912 emit_byte(0x31);
2913 emit_byte(0xc0+8*s+d);
2914 }
2915 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2916
2917 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2918 {
2919 emit_byte(0x30);
2920 emit_byte(0xc0+8*s+d);
2921 }
2922 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2923
2924 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2925 {
2926 if (optimize_imm8 && isbyte(s)) {
2927 emit_byte(0x83);
2928 emit_byte(0x2d);
2929 emit_long(d);
2930 emit_byte(s);
2931 }
2932 else {
2933 emit_byte(0x81);
2934 emit_byte(0x2d);
2935 emit_long(d);
2936 emit_long(s);
2937 }
2938 }
2939 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2940
2941 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2942 {
2943 if (optimize_imm8 && isbyte(s)) {
2944 emit_byte(0x83);
2945 emit_byte(0x3d);
2946 emit_long(d);
2947 emit_byte(s);
2948 }
2949 else {
2950 emit_byte(0x81);
2951 emit_byte(0x3d);
2952 emit_long(d);
2953 emit_long(s);
2954 }
2955 }
2956 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2957
2958 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2959 {
2960 emit_byte(0x87);
2961 emit_byte(0xc0+8*r1+r2);
2962 }
2963 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2964
2965 /*************************************************************************
2966 * FIXME: mem access modes probably wrong *
2967 *************************************************************************/
2968
2969 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2970 {
2971 emit_byte(0x9c);
2972 }
2973 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2974
2975 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2976 {
2977 emit_byte(0x9d);
2978 }
2979 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2980
2981 /* Generate floating-point instructions */
2982 static inline void x86_fadd_m(MEMR s)
2983 {
2984 emit_byte(0xdc);
2985 emit_byte(0x05);
2986 emit_long(s);
2987 }
2988
2989 #endif
2990
2991 /*************************************************************************
2992 * Unoptimizable stuff --- jump *
2993 *************************************************************************/
2994
2995 static __inline__ void raw_call_r(R4 r)
2996 {
2997 #if USE_NEW_RTASM
2998 CALLsr(r);
2999 #else
3000 emit_byte(0xff);
3001 emit_byte(0xd0+r);
3002 #endif
3003 }
3004
3005 static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3006 {
3007 #if USE_NEW_RTASM
3008 CALLsm(base, X86_NOREG, r, m);
3009 #else
3010 int mu;
3011 switch(m) {
3012 case 1: mu=0; break;
3013 case 2: mu=1; break;
3014 case 4: mu=2; break;
3015 case 8: mu=3; break;
3016 default: abort();
3017 }
3018 emit_byte(0xff);
3019 emit_byte(0x14);
3020 emit_byte(0x05+8*r+0x40*mu);
3021 emit_long(base);
3022 #endif
3023 }
3024
3025 static __inline__ void raw_jmp_r(R4 r)
3026 {
3027 #if USE_NEW_RTASM
3028 JMPsr(r);
3029 #else
3030 emit_byte(0xff);
3031 emit_byte(0xe0+r);
3032 #endif
3033 }
3034
3035 static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3036 {
3037 #if USE_NEW_RTASM
3038 JMPsm(base, X86_NOREG, r, m);
3039 #else
3040 int mu;
3041 switch(m) {
3042 case 1: mu=0; break;
3043 case 2: mu=1; break;
3044 case 4: mu=2; break;
3045 case 8: mu=3; break;
3046 default: abort();
3047 }
3048 emit_byte(0xff);
3049 emit_byte(0x24);
3050 emit_byte(0x05+8*r+0x40*mu);
3051 emit_long(base);
3052 #endif
3053 }
3054
3055 static __inline__ void raw_jmp_m(uae_u32 base)
3056 {
3057 emit_byte(0xff);
3058 emit_byte(0x25);
3059 emit_long(base);
3060 }
3061
3062
3063 static __inline__ void raw_call(uae_u32 t)
3064 {
3065 #if USE_NEW_RTASM
3066 CALLm(t);
3067 #else
3068 emit_byte(0xe8);
3069 emit_long(t-(uae_u32)target-4);
3070 #endif
3071 }
3072
3073 static __inline__ void raw_jmp(uae_u32 t)
3074 {
3075 #if USE_NEW_RTASM
3076 JMPm(t);
3077 #else
3078 emit_byte(0xe9);
3079 emit_long(t-(uae_u32)target-4);
3080 #endif
3081 }
3082
3083 static __inline__ void raw_jl(uae_u32 t)
3084 {
3085 emit_byte(0x0f);
3086 emit_byte(0x8c);
3087 emit_long(t-(uintptr)target-4);
3088 }
3089
3090 static __inline__ void raw_jz(uae_u32 t)
3091 {
3092 emit_byte(0x0f);
3093 emit_byte(0x84);
3094 emit_long(t-(uintptr)target-4);
3095 }
3096
3097 static __inline__ void raw_jnz(uae_u32 t)
3098 {
3099 emit_byte(0x0f);
3100 emit_byte(0x85);
3101 emit_long(t-(uintptr)target-4);
3102 }
3103
3104 static __inline__ void raw_jnz_l_oponly(void)
3105 {
3106 emit_byte(0x0f);
3107 emit_byte(0x85);
3108 }
3109
3110 static __inline__ void raw_jcc_l_oponly(int cc)
3111 {
3112 emit_byte(0x0f);
3113 emit_byte(0x80+cc);
3114 }
3115
3116 static __inline__ void raw_jnz_b_oponly(void)
3117 {
3118 emit_byte(0x75);
3119 }
3120
3121 static __inline__ void raw_jz_b_oponly(void)
3122 {
3123 emit_byte(0x74);
3124 }
3125
3126 static __inline__ void raw_jcc_b_oponly(int cc)
3127 {
3128 emit_byte(0x70+cc);
3129 }
3130
3131 static __inline__ void raw_jmp_l_oponly(void)
3132 {
3133 emit_byte(0xe9);
3134 }
3135
3136 static __inline__ void raw_jmp_b_oponly(void)
3137 {
3138 emit_byte(0xeb);
3139 }
3140
3141 static __inline__ void raw_ret(void)
3142 {
3143 emit_byte(0xc3);
3144 }
3145
3146 static __inline__ void raw_nop(void)
3147 {
3148 emit_byte(0x90);
3149 }
3150
3151 static __inline__ void raw_emit_nop_filler(int nbytes)
3152 {
3153 /* Source: GNU Binutils 2.12.90.0.15 */
3154 /* Various efficient no-op patterns for aligning code labels.
3155 Note: Don't try to assemble the instructions in the comments.
3156 0L and 0w are not legal. */
3157 static const uae_u8 f32_1[] =
3158 {0x90}; /* nop */
3159 static const uae_u8 f32_2[] =
3160 {0x89,0xf6}; /* movl %esi,%esi */
3161 static const uae_u8 f32_3[] =
3162 {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3163 static const uae_u8 f32_4[] =
3164 {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3165 static const uae_u8 f32_5[] =
3166 {0x90, /* nop */
3167 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3168 static const uae_u8 f32_6[] =
3169 {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3170 static const uae_u8 f32_7[] =
3171 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3172 static const uae_u8 f32_8[] =
3173 {0x90, /* nop */
3174 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3175 static const uae_u8 f32_9[] =
3176 {0x89,0xf6, /* movl %esi,%esi */
3177 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3178 static const uae_u8 f32_10[] =
3179 {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3180 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3181 static const uae_u8 f32_11[] =
3182 {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3183 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3184 static const uae_u8 f32_12[] =
3185 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3186 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3187 static const uae_u8 f32_13[] =
3188 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3189 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3190 static const uae_u8 f32_14[] =
3191 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3192 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3193 static const uae_u8 f32_15[] =
3194 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3195 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3196 static const uae_u8 f32_16[] =
3197 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3198 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3199 static const uae_u8 *const f32_patt[] = {
3200 f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3201 f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3202 };
3203 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3204
3205 #if defined(__x86_64__)
3206 /* The recommended way to pad 64bit code is to use NOPs preceded by
3207 maximally four 0x66 prefixes. Balance the size of nops. */
3208 if (nbytes == 0)
3209 return;
3210
3211 int i;
3212 int nnops = (nbytes + 3) / 4;
3213 int len = nbytes / nnops;
3214 int remains = nbytes - nnops * len;
3215
3216 for (i = 0; i < remains; i++) {
3217 emit_block(prefixes, len);
3218 raw_nop();
3219 }
3220 for (; i < nnops; i++) {
3221 emit_block(prefixes, len - 1);
3222 raw_nop();
3223 }
3224 #else
3225 int nloops = nbytes / 16;
3226 while (nloops-- > 0)
3227 emit_block(f32_16, sizeof(f32_16));
3228
3229 nbytes %= 16;
3230 if (nbytes)
3231 emit_block(f32_patt[nbytes - 1], nbytes);
3232 #endif
3233 }
3234
3235
3236 /*************************************************************************
3237 * Flag handling, to and fro UAE flag register *
3238 *************************************************************************/
3239
3240 #ifdef SAHF_SETO_PROFITABLE
3241
3242 #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3243 static __inline__ void raw_flags_to_reg(int r)
3244 {
3245 raw_lahf(0); /* Most flags in AH */
3246 //raw_setcc(r,0); /* V flag in AL */
3247 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3248
3249 #if 1 /* Let's avoid those nasty partial register stalls */
3250 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3251 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
3252 //live.state[FLAGTMP].status=CLEAN;
3253 live.state[FLAGTMP].status=INMEM;
3254 live.state[FLAGTMP].realreg=-1;
3255 /* We just "evicted" FLAGTMP. */
3256 if (live.nat[r].nholds!=1) {
3257 /* Huh? */
3258 abort();
3259 }
3260 live.nat[r].nholds=0;
3261 #endif
3262 }
3263
3264 #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3265 static __inline__ void raw_reg_to_flags(int r)
3266 {
3267 raw_cmp_b_ri(r,-127); /* set V */
3268 raw_sahf(0);
3269 }
3270
3271 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3272 static __inline__ void raw_flags_set_zero(int s, int tmp)
3273 {
3274 raw_mov_l_rr(tmp,s);
3275 raw_lahf(s); /* flags into ah */
3276 raw_and_l_ri(s,0xffffbfff);
3277 raw_and_l_ri(tmp,0x00004000);
3278 raw_xor_l_ri(tmp,0x00004000);
3279 raw_or_l(s,tmp);
3280 raw_sahf(s);
3281 }
3282
3283 #else
3284
3285 #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3286 static __inline__ void raw_flags_to_reg(int r)
3287 {
3288 raw_pushfl();
3289 raw_pop_l_r(r);
3290 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3291 // live.state[FLAGTMP].status=CLEAN;
3292 live.state[FLAGTMP].status=INMEM;
3293 live.state[FLAGTMP].realreg=-1;
3294 /* We just "evicted" FLAGTMP. */
3295 if (live.nat[r].nholds!=1) {
3296 /* Huh? */
3297 abort();
3298 }
3299 live.nat[r].nholds=0;
3300 }
3301
3302 #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3303 static __inline__ void raw_reg_to_flags(int r)
3304 {
3305 raw_push_l_r(r);
3306 raw_popfl();
3307 }
3308
3309 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3310 static __inline__ void raw_flags_set_zero(int s, int tmp)
3311 {
3312 raw_mov_l_rr(tmp,s);
3313 raw_pushfl();
3314 raw_pop_l_r(s);
3315 raw_and_l_ri(s,0xffffffbf);
3316 raw_and_l_ri(tmp,0x00000040);
3317 raw_xor_l_ri(tmp,0x00000040);
3318 raw_or_l(s,tmp);
3319 raw_push_l_r(s);
3320 raw_popfl();
3321 }
3322 #endif
3323
3324 /* Apparently, there are enough instructions between flag store and
3325 flag reload to avoid the partial memory stall */
3326 static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3327 {
3328 #if 1
3329 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3330 #else
3331 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3332 raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3333 #endif
3334 }
3335
3336 /* FLAGX is byte sized, and we *do* write it at that size */
3337 static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3338 {
3339 if (live.nat[target].canbyte)
3340 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3341 else if (live.nat[target].canword)
3342 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3343 else
3344 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3345 }
3346
3347 static __inline__ void raw_dec_sp(int off)
3348 {
3349 if (off) raw_sub_l_ri(ESP_INDEX,off);
3350 }
3351
3352 static __inline__ void raw_inc_sp(int off)
3353 {
3354 if (off) raw_add_l_ri(ESP_INDEX,off);
3355 }
3356
3357 /*************************************************************************
3358 * Handling mistaken direct memory access *
3359 *************************************************************************/
3360
3361 // gb-- I don't need that part for JIT Basilisk II
3362 #if defined(NATMEM_OFFSET) && 0
3363 #include <asm/sigcontext.h>
3364 #include <signal.h>
3365
3366 #define SIG_READ 1
3367 #define SIG_WRITE 2
3368
3369 static int in_handler=0;
3370 static uae_u8 veccode[256];
3371
3372 static void vec(int x, struct sigcontext sc)
3373 {
3374 uae_u8* i=(uae_u8*)sc.eip;
3375 uae_u32 addr=sc.cr2;
3376 int r=-1;
3377 int size=4;
3378 int dir=-1;
3379 int len=0;
3380 int j;
3381
3382 write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3383 if (!canbang)
3384 write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3385 if (in_handler)
3386 write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3387
3388 if (canbang && i>=compiled_code && i<=current_compile_p) {
3389 if (*i==0x66) {
3390 i++;
3391 size=2;
3392 len++;
3393 }
3394
3395 switch(i[0]) {
3396 case 0x8a:
3397 if ((i[1]&0xc0)==0x80) {
3398 r=(i[1]>>3)&7;
3399 dir=SIG_READ;
3400 size=1;
3401 len+=6;
3402 break;
3403 }
3404 break;
3405 case 0x88:
3406 if ((i[1]&0xc0)==0x80) {
3407 r=(i[1]>>3)&7;
3408 dir=SIG_WRITE;
3409 size=1;
3410 len+=6;
3411 break;
3412 }
3413 break;
3414 case 0x8b:
3415 if ((i[1]&0xc0)==0x80) {
3416 r=(i[1]>>3)&7;
3417 dir=SIG_READ;
3418 len+=6;
3419 break;
3420 }
3421 if ((i[1]&0xc0)==0x40) {
3422 r=(i[1]>>3)&7;
3423 dir=SIG_READ;
3424 len+=3;
3425 break;
3426 }
3427 break;
3428 case 0x89:
3429 if ((i[1]&0xc0)==0x80) {
3430 r=(i[1]>>3)&7;
3431 dir=SIG_WRITE;
3432 len+=6;
3433 break;
3434 }
3435 if ((i[1]&0xc0)==0x40) {
3436 r=(i[1]>>3)&7;
3437 dir=SIG_WRITE;
3438 len+=3;
3439 break;
3440 }
3441 break;
3442 }
3443 }
3444
3445 if (r!=-1) {
3446 void* pr=NULL;
3447 write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3448
3449 switch(r) {
3450 case 0: pr=&(sc.eax); break;
3451 case 1: pr=&(sc.ecx); break;
3452 case 2: pr=&(sc.edx); break;
3453 case 3: pr=&(sc.ebx); break;
3454 case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3455 case 5: pr=(size>1)?
3456 (void*)(&(sc.ebp)):
3457 (void*)(((uae_u8*)&(sc.ecx))+1); break;
3458 case 6: pr=(size>1)?
3459 (void*)(&(sc.esi)):
3460 (void*)(((uae_u8*)&(sc.edx))+1); break;
3461 case 7: pr=(size>1)?
3462 (void*)(&(sc.edi)):
3463 (void*)(((uae_u8*)&(sc.ebx))+1); break;
3464 default: abort();
3465 }
3466 if (pr) {
3467 blockinfo* bi;
3468
3469 if (currprefs.comp_oldsegv) {
3470 addr-=NATMEM_OFFSET;
3471
3472 if ((addr>=0x10000000 && addr<0x40000000) ||
3473 (addr>=0x50000000)) {
3474 write_log("Suspicious address in %x SEGV handler.\n",addr);
3475 }
3476 if (dir==SIG_READ) {
3477 switch(size) {
3478 case 1: *((uae_u8*)pr)=get_byte(addr); break;
3479 case 2: *((uae_u16*)pr)=get_word(addr); break;
3480 case 4: *((uae_u32*)pr)=get_long(addr); break;
3481 default: abort();
3482 }
3483 }
3484 else { /* write */
3485 switch(size) {
3486 case 1: put_byte(addr,*((uae_u8*)pr)); break;
3487 case 2: put_word(addr,*((uae_u16*)pr)); break;
3488 case 4: put_long(addr,*((uae_u32*)pr)); break;
3489 default: abort();
3490 }
3491 }
3492 write_log("Handled one access!\n");
3493 fflush(stdout);
3494 segvcount++;
3495 sc.eip+=len;
3496 }
3497 else {
3498 void* tmp=target;
3499 int i;
3500 uae_u8 vecbuf[5];
3501
3502 addr-=NATMEM_OFFSET;
3503
3504 if ((addr>=0x10000000 && addr<0x40000000) ||
3505 (addr>=0x50000000)) {
3506 write_log("Suspicious address in %x SEGV handler.\n",addr);
3507 }
3508
3509 target=(uae_u8*)sc.eip;
3510 for (i=0;i<5;i++)
3511 vecbuf[i]=target[i];
3512 emit_byte(0xe9);
3513 emit_long((uintptr)veccode-(uintptr)target-4);
3514 write_log("Create jump to %p\n",veccode);
3515
3516 write_log("Handled one access!\n");
3517 fflush(stdout);
3518 segvcount++;
3519
3520 target=veccode;
3521
3522 if (dir==SIG_READ) {
3523 switch(size) {
3524 case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3525 case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3526 case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3527 default: abort();
3528 }
3529 }
3530 else { /* write */
3531 switch(size) {
3532 case 1: put_byte(addr,*((uae_u8*)pr)); break;
3533 case 2: put_word(addr,*((uae_u16*)pr)); break;
3534 case 4: put_long(addr,*((uae_u32*)pr)); break;
3535 default: abort();
3536 }
3537 }
3538 for (i=0;i<5;i++)
3539 raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3540 raw_mov_l_mi((uintptr)&in_handler,0);
3541 emit_byte(0xe9);
3542 emit_long(sc.eip+len-(uintptr)target-4);
3543 in_handler=1;
3544 target=tmp;
3545 }
3546 bi=active;
3547 while (bi) {
3548 if (bi->handler &&
3549 (uae_u8*)bi->direct_handler<=i &&
3550 (uae_u8*)bi->nexthandler>i) {
3551 write_log("deleted trigger (%p<%p<%p) %p\n",
3552 bi->handler,
3553 i,
3554 bi->nexthandler,
3555 bi->pc_p);
3556 invalidate_block(bi);
3557 raise_in_cl_list(bi);
3558 set_special(0);
3559 return;
3560 }
3561 bi=bi->next;
3562 }
3563 /* Not found in the active list. Might be a rom routine that
3564 is in the dormant list */
3565 bi=dormant;
3566 while (bi) {
3567 if (bi->handler &&
3568 (uae_u8*)bi->direct_handler<=i &&
3569 (uae_u8*)bi->nexthandler>i) {
3570 write_log("deleted trigger (%p<%p<%p) %p\n",
3571 bi->handler,
3572 i,
3573 bi->nexthandler,
3574 bi->pc_p);
3575 invalidate_block(bi);
3576 raise_in_cl_list(bi);
3577 set_special(0);
3578 return;
3579 }
3580 bi=bi->next;
3581 }
3582 write_log("Huh? Could not find trigger!\n");
3583 return;
3584 }
3585 }
3586 write_log("Can't handle access!\n");
3587 for (j=0;j<10;j++) {
3588 write_log("instruction byte %2d is %02x\n",j,i[j]);
3589 }
3590 write_log("Please send the above info (starting at \"fault address\") to\n"
3591 "bmeyer@csse.monash.edu.au\n"
3592 "This shouldn't happen ;-)\n");
3593 fflush(stdout);
3594 signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3595 }
3596 #endif
3597
3598
3599 /*************************************************************************
3600 * Checking for CPU features *
3601 *************************************************************************/
3602
3603 struct cpuinfo_x86 {
3604 uae_u8 x86; // CPU family
3605 uae_u8 x86_vendor; // CPU vendor
3606 uae_u8 x86_processor; // CPU canonical processor type
3607 uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3608 uae_u32 x86_hwcap;
3609 uae_u8 x86_model;
3610 uae_u8 x86_mask;
3611 int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3612 char x86_vendor_id[16];
3613 };
3614 struct cpuinfo_x86 cpuinfo;
3615
3616 enum {
3617 X86_VENDOR_INTEL = 0,
3618 X86_VENDOR_CYRIX = 1,
3619 X86_VENDOR_AMD = 2,
3620 X86_VENDOR_UMC = 3,
3621 X86_VENDOR_NEXGEN = 4,
3622 X86_VENDOR_CENTAUR = 5,
3623 X86_VENDOR_RISE = 6,
3624 X86_VENDOR_TRANSMETA = 7,
3625 X86_VENDOR_NSC = 8,
3626 X86_VENDOR_UNKNOWN = 0xff
3627 };
3628
3629 enum {
3630 X86_PROCESSOR_I386, /* 80386 */
3631 X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3632 X86_PROCESSOR_PENTIUM,
3633 X86_PROCESSOR_PENTIUMPRO,
3634 X86_PROCESSOR_K6,
3635 X86_PROCESSOR_ATHLON,
3636 X86_PROCESSOR_PENTIUM4,
3637 X86_PROCESSOR_X86_64,
3638 X86_PROCESSOR_max
3639 };
3640
3641 static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3642 "80386",
3643 "80486",
3644 "Pentium",
3645 "PentiumPro",
3646 "K6",
3647 "Athlon",
3648 "Pentium4",
3649 "x86-64"
3650 };
3651
3652 static struct ptt {
3653 const int align_loop;
3654 const int align_loop_max_skip;
3655 const int align_jump;
3656 const int align_jump_max_skip;
3657 const int align_func;
3658 }
3659 x86_alignments[X86_PROCESSOR_max] = {
3660 { 4, 3, 4, 3, 4 },
3661 { 16, 15, 16, 15, 16 },
3662 { 16, 7, 16, 7, 16 },
3663 { 16, 15, 16, 7, 16 },
3664 { 32, 7, 32, 7, 32 },
3665 { 16, 7, 16, 7, 16 },
3666 { 0, 0, 0, 0, 0 },
3667 { 16, 7, 16, 7, 16 }
3668 };
3669
3670 static void
3671 x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3672 {
3673 char *v = c->x86_vendor_id;
3674
3675 if (!strcmp(v, "GenuineIntel"))
3676 c->x86_vendor = X86_VENDOR_INTEL;
3677 else if (!strcmp(v, "AuthenticAMD"))
3678 c->x86_vendor = X86_VENDOR_AMD;
3679 else if (!strcmp(v, "CyrixInstead"))
3680 c->x86_vendor = X86_VENDOR_CYRIX;
3681 else if (!strcmp(v, "Geode by NSC"))
3682 c->x86_vendor = X86_VENDOR_NSC;
3683 else if (!strcmp(v, "UMC UMC UMC "))
3684 c->x86_vendor = X86_VENDOR_UMC;
3685 else if (!strcmp(v, "CentaurHauls"))
3686 c->x86_vendor = X86_VENDOR_CENTAUR;
3687 else if (!strcmp(v, "NexGenDriven"))
3688 c->x86_vendor = X86_VENDOR_NEXGEN;
3689 else if (!strcmp(v, "RiseRiseRise"))
3690 c->x86_vendor = X86_VENDOR_RISE;
3691 else if (!strcmp(v, "GenuineTMx86") ||
3692 !strcmp(v, "TransmetaCPU"))
3693 c->x86_vendor = X86_VENDOR_TRANSMETA;
3694 else
3695 c->x86_vendor = X86_VENDOR_UNKNOWN;
3696 }
3697
3698 static void
3699 cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3700 {
3701 const int CPUID_SPACE = 4096;
3702 uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3703 if (cpuid_space == VM_MAP_FAILED)
3704 abort();
3705 vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3706
3707 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3708 uae_u8* tmp=get_target();
3709
3710 s_op = op;
3711 set_target(cpuid_space);
3712 raw_push_l_r(0); /* eax */
3713 raw_push_l_r(1); /* ecx */
3714 raw_push_l_r(2); /* edx */
3715 raw_push_l_r(3); /* ebx */
3716 raw_mov_l_rm(0,(uintptr)&s_op);
3717 raw_cpuid(0);
3718 raw_mov_l_mr((uintptr)&s_eax,0);
3719 raw_mov_l_mr((uintptr)&s_ebx,3);
3720 raw_mov_l_mr((uintptr)&s_ecx,1);
3721 raw_mov_l_mr((uintptr)&s_edx,2);
3722 raw_pop_l_r(3);
3723 raw_pop_l_r(2);
3724 raw_pop_l_r(1);
3725 raw_pop_l_r(0);
3726 raw_ret();
3727 set_target(tmp);
3728
3729 ((cpuop_func*)cpuid_space)(0);
3730 if (eax != NULL) *eax = s_eax;
3731 if (ebx != NULL) *ebx = s_ebx;
3732 if (ecx != NULL) *ecx = s_ecx;
3733 if (edx != NULL) *edx = s_edx;
3734
3735 vm_release(cpuid_space, CPUID_SPACE);
3736 }
3737
3738 static void
3739 raw_init_cpu(void)
3740 {
3741 struct cpuinfo_x86 *c = &cpuinfo;
3742
3743 /* Defaults */
3744 c->x86_processor = X86_PROCESSOR_max;
3745 c->x86_vendor = X86_VENDOR_UNKNOWN;
3746 c->cpuid_level = -1; /* CPUID not detected */
3747 c->x86_model = c->x86_mask = 0; /* So far unknown... */
3748 c->x86_vendor_id[0] = '\0'; /* Unset */
3749 c->x86_hwcap = 0;
3750
3751 /* Get vendor name */
3752 c->x86_vendor_id[12] = '\0';
3753 cpuid(0x00000000,
3754 (uae_u32 *)&c->cpuid_level,
3755 (uae_u32 *)&c->x86_vendor_id[0],
3756 (uae_u32 *)&c->x86_vendor_id[8],
3757 (uae_u32 *)&c->x86_vendor_id[4]);
3758 x86_get_cpu_vendor(c);
3759
3760 /* Intel-defined flags: level 0x00000001 */
3761 c->x86_brand_id = 0;
3762 if ( c->cpuid_level >= 0x00000001 ) {
3763 uae_u32 tfms, brand_id;
3764 cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3765 c->x86 = (tfms >> 8) & 15;
3766 if (c->x86 == 0xf)
3767 c->x86 += (tfms >> 20) & 0xff; /* extended family */
3768 c->x86_model = (tfms >> 4) & 15;
3769 if (c->x86_model == 0xf)
3770 c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3771 c->x86_brand_id = brand_id & 0xff;
3772 c->x86_mask = tfms & 15;
3773 } else {
3774 /* Have CPUID level 0 only - unheard of */
3775 c->x86 = 4;
3776 }
3777
3778 /* AMD-defined flags: level 0x80000001 */
3779 uae_u32 xlvl;
3780 cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3781 if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3782 if ( xlvl >= 0x80000001 ) {
3783 uae_u32 features, extra_features;
3784 cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3785 if (features & (1 << 29)) {
3786 /* Assume x86-64 if long mode is supported */
3787 c->x86_processor = X86_PROCESSOR_X86_64;
3788 }
3789 if (extra_features & (1 << 0))
3790 have_lahf_lm = true;
3791 }
3792 }
3793
3794 /* Canonicalize processor ID */
3795 switch (c->x86) {
3796 case 3:
3797 c->x86_processor = X86_PROCESSOR_I386;
3798 break;
3799 case 4:
3800 c->x86_processor = X86_PROCESSOR_I486;
3801 break;
3802 case 5:
3803 if (c->x86_vendor == X86_VENDOR_AMD)
3804 c->x86_processor = X86_PROCESSOR_K6;
3805 else
3806 c->x86_processor = X86_PROCESSOR_PENTIUM;
3807 break;
3808 case 6:
3809 if (c->x86_vendor == X86_VENDOR_AMD)
3810 c->x86_processor = X86_PROCESSOR_ATHLON;
3811 else
3812 c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3813 break;
3814 case 15:
3815 if (c->x86_processor == X86_PROCESSOR_max) {
3816 switch (c->x86_vendor) {
3817 case X86_VENDOR_INTEL:
3818 c->x86_processor = X86_PROCESSOR_PENTIUM4;
3819 break;
3820 case X86_VENDOR_AMD:
3821 /* Assume a 32-bit Athlon processor if not in long mode */
3822 c->x86_processor = X86_PROCESSOR_ATHLON;
3823 break;
3824 }
3825 }
3826 break;
3827 }
3828 if (c->x86_processor == X86_PROCESSOR_max) {
3829 c->x86_processor = X86_PROCESSOR_I386;
3830 fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3831 fprintf(stderr, " Family : %d\n", c->x86);
3832 fprintf(stderr, " Model : %d\n", c->x86_model);
3833 fprintf(stderr, " Mask : %d\n", c->x86_mask);
3834 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3835 if (c->x86_brand_id)
3836 fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3837 }
3838
3839 /* Have CMOV support? */
3840 have_cmov = c->x86_hwcap & (1 << 15);
3841
3842 /* Can the host CPU suffer from partial register stalls? */
3843 have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3844 #if 1
3845 /* It appears that partial register writes are a bad idea even on
3846 AMD K7 cores, even though they are not supposed to have the
3847 dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3848 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3849 have_rat_stall = true;
3850 #endif
3851
3852 /* Alignments */
3853 if (tune_alignment) {
3854 align_loops = x86_alignments[c->x86_processor].align_loop;
3855 align_jumps = x86_alignments[c->x86_processor].align_jump;
3856 }
3857
3858 write_log("Max CPUID level=%d Processor is %s [%s]\n",
3859 c->cpuid_level, c->x86_vendor_id,
3860 x86_processor_string_table[c->x86_processor]);
3861 }
3862
3863 static bool target_check_bsf(void)
3864 {
3865 bool mismatch = false;
3866 for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3867 for (int g_CF = 0; g_CF <= 1; g_CF++) {
3868 for (int g_OF = 0; g_OF <= 1; g_OF++) {
3869 for (int g_SF = 0; g_SF <= 1; g_SF++) {
3870 for (int value = -1; value <= 1; value++) {
3871 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3872 unsigned long tmp = value;
3873 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3874 : "+r" (flags), "+r" (tmp) : : "cc");
3875 int OF = (flags >> 11) & 1;
3876 int SF = (flags >> 7) & 1;
3877 int ZF = (flags >> 6) & 1;
3878 int CF = flags & 1;
3879 tmp = (value == 0);
3880 if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3881 mismatch = true;
3882 }
3883 }}}}
3884 if (mismatch)
3885 write_log("Target CPU defines all flags on BSF instruction\n");
3886 return !mismatch;
3887 }
3888
3889
3890 /*************************************************************************
3891 * FPU stuff *
3892 *************************************************************************/
3893
3894
3895 static __inline__ void raw_fp_init(void)
3896 {
3897 int i;
3898
3899 for (i=0;i<N_FREGS;i++)
3900 live.spos[i]=-2;
3901 live.tos=-1; /* Stack is empty */
3902 }
3903
3904 static __inline__ void raw_fp_cleanup_drop(void)
3905 {
3906 #if 0
3907 /* using FINIT instead of popping all the entries.
3908 Seems to have side effects --- there is display corruption in
3909 Quake when this is used */
3910 if (live.tos>1) {
3911 emit_byte(0x9b);
3912 emit_byte(0xdb);
3913 emit_byte(0xe3);
3914 live.tos=-1;
3915 }
3916 #endif
3917 while (live.tos>=1) {
3918 emit_byte(0xde);
3919 emit_byte(0xd9);
3920 live.tos-=2;
3921 }
3922 while (live.tos>=0) {
3923 emit_byte(0xdd);
3924 emit_byte(0xd8);
3925 live.tos--;
3926 }
3927 raw_fp_init();
3928 }
3929
3930 static __inline__ void make_tos(int r)
3931 {
3932 int p,q;
3933
3934 if (live.spos[r]<0) { /* Register not yet on stack */
3935 emit_byte(0xd9);
3936 emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3937 live.tos++;
3938 live.spos[r]=live.tos;
3939 live.onstack[live.tos]=r;
3940 return;
3941 }
3942 /* Register is on stack */
3943 if (live.tos==live.spos[r])
3944 return;
3945 p=live.spos[r];
3946 q=live.onstack[live.tos];
3947
3948 emit_byte(0xd9);
3949 emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3950 live.onstack[live.tos]=r;
3951 live.spos[r]=live.tos;
3952 live.onstack[p]=q;
3953 live.spos[q]=p;
3954 }
3955
3956 static __inline__ void make_tos2(int r, int r2)
3957 {
3958 int q;
3959
3960 make_tos(r2); /* Put the reg that's supposed to end up in position2
3961 on top */
3962
3963 if (live.spos[r]<0) { /* Register not yet on stack */
3964 make_tos(r); /* This will extend the stack */
3965 return;
3966 }
3967 /* Register is on stack */
3968 emit_byte(0xd9);
3969 emit_byte(0xc9); /* Move r2 into position 2 */
3970
3971 q=live.onstack[live.tos-1];
3972 live.onstack[live.tos]=q;
3973 live.spos[q]=live.tos;
3974 live.onstack[live.tos-1]=r2;
3975 live.spos[r2]=live.tos-1;
3976
3977 make_tos(r); /* And r into 1 */
3978 }
3979
3980 static __inline__ int stackpos(int r)
3981 {
3982 if (live.spos[r]<0)
3983 abort();
3984 if (live.tos<live.spos[r]) {
3985 printf("Looking for spos for fnreg %d\n",r);
3986 abort();
3987 }
3988 return live.tos-live.spos[r];
3989 }
3990
3991 static __inline__ void usereg(int r)
3992 {
3993 if (live.spos[r]<0)
3994 make_tos(r);
3995 }
3996
3997 /* This is called with one FP value in a reg *above* tos, which it will
3998 pop off the stack if necessary */
3999 static __inline__ void tos_make(int r)
4000 {
4001 if (live.spos[r]<0) {
4002 live.tos++;
4003 live.spos[r]=live.tos;
4004 live.onstack[live.tos]=r;
4005 return;
4006 }
4007 emit_byte(0xdd);
4008 emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
4009 and pop it*/
4010 }
4011
4012 /* FP helper functions */
4013 #if USE_NEW_RTASM
4014 #define DEFINE_OP(NAME, GEN) \
4015 static inline void raw_##NAME(uint32 m) \
4016 { \
4017 GEN(m, X86_NOREG, X86_NOREG, 1); \
4018 }
4019 DEFINE_OP(fstl, FSTLm);
4020 DEFINE_OP(fstpl, FSTPLm);
4021 DEFINE_OP(fldl, FLDLm);
4022 DEFINE_OP(fildl, FILDLm);
4023 DEFINE_OP(fistl, FISTLm);
4024 DEFINE_OP(flds, FLDSm);
4025 DEFINE_OP(fsts, FSTSm);
4026 DEFINE_OP(fstpt, FSTPTm);
4027 DEFINE_OP(fldt, FLDTm);
4028 #else
4029 #define DEFINE_OP(NAME, OP1, OP2) \
4030 static inline void raw_##NAME(uint32 m) \
4031 { \
4032 emit_byte(OP1); \
4033 emit_byte(OP2); \
4034 emit_long(m); \
4035 }
4036 DEFINE_OP(fstl, 0xdd, 0x15);
4037 DEFINE_OP(fstpl, 0xdd, 0x1d);
4038 DEFINE_OP(fldl, 0xdd, 0x05);
4039 DEFINE_OP(fildl, 0xdb, 0x05);
4040 DEFINE_OP(fistl, 0xdb, 0x15);
4041 DEFINE_OP(flds, 0xd9, 0x05);
4042 DEFINE_OP(fsts, 0xd9, 0x15);
4043 DEFINE_OP(fstpt, 0xdb, 0x3d);
4044 DEFINE_OP(fldt, 0xdb, 0x2d);
4045 #endif
4046 #undef DEFINE_OP
4047
4048 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4049 {
4050 make_tos(r);
4051 raw_fstl(m);
4052 }
4053 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4054
4055 LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4056 {
4057 make_tos(r);
4058 raw_fstpl(m);
4059 live.onstack[live.tos]=-1;
4060 live.tos--;
4061 live.spos[r]=-2;
4062 }
4063 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4064
4065 LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4066 {
4067 raw_fldl(m);
4068 tos_make(r);
4069 }
4070 LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4071
4072 LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4073 {
4074 raw_fildl(m);
4075 tos_make(r);
4076 }
4077 LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4078
4079 LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4080 {
4081 make_tos(r);
4082 raw_fistl(m);
4083 }
4084 LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4085
4086 LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4087 {
4088 raw_flds(m);
4089 tos_make(r);
4090 }
4091 LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4092
4093 LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4094 {
4095 make_tos(r);
4096 raw_fsts(m);
4097 }
4098 LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4099
4100 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4101 {
4102 int rs;
4103
4104 /* Stupid x87 can't write a long double to mem without popping the
4105 stack! */
4106 usereg(r);
4107 rs=stackpos(r);
4108 emit_byte(0xd9); /* Get a copy to the top of stack */
4109 emit_byte(0xc0+rs);
4110
4111 raw_fstpt(m); /* store and pop it */
4112 }
4113 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4114
4115 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4116 {
4117 int rs;
4118
4119 make_tos(r);
4120 raw_fstpt(m); /* store and pop it */
4121 live.onstack[live.tos]=-1;
4122 live.tos--;
4123 live.spos[r]=-2;
4124 }
4125 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4126
4127 LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4128 {
4129 raw_fldt(m);
4130 tos_make(r);
4131 }
4132 LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4133
4134 LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4135 {
4136 emit_byte(0xd9);
4137 emit_byte(0xeb);
4138 tos_make(r);
4139 }
4140 LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4141
4142 LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4143 {
4144 emit_byte(0xd9);
4145 emit_byte(0xec);
4146 tos_make(r);
4147 }
4148 LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4149
4150 LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4151 {
4152 emit_byte(0xd9);
4153 emit_byte(0xea);
4154 tos_make(r);
4155 }
4156 LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4157
4158 LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4159 {
4160 emit_byte(0xd9);
4161 emit_byte(0xed);
4162 tos_make(r);
4163 }
4164 LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4165
4166 LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4167 {
4168 emit_byte(0xd9);
4169 emit_byte(0xe8);
4170 tos_make(r);
4171 }
4172 LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4173
4174 LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4175 {
4176 emit_byte(0xd9);
4177 emit_byte(0xee);
4178 tos_make(r);
4179 }
4180 LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4181
4182 LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4183 {
4184 int ds;
4185
4186 usereg(s);
4187 ds=stackpos(s);
4188 if (ds==0 && live.spos[d]>=0) {
4189 /* source is on top of stack, and we already have the dest */
4190 int dd=stackpos(d);
4191 emit_byte(0xdd);
4192 emit_byte(0xd0+dd);
4193 }
4194 else {
4195 emit_byte(0xd9);
4196 emit_byte(0xc0+ds); /* duplicate source on tos */
4197 tos_make(d); /* store to destination, pop if necessary */
4198 }
4199 }
4200 LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4201
4202 LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4203 {
4204 emit_byte(0xd9);
4205 emit_byte(0xa8+index);
4206 emit_long(base);
4207 }
4208 LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4209
4210
4211 LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4212 {
4213 int ds;
4214
4215 if (d!=s) {
4216 usereg(s);
4217 ds=stackpos(s);
4218 emit_byte(0xd9);
4219 emit_byte(0xc0+ds); /* duplicate source */
4220 emit_byte(0xd9);
4221 emit_byte(0xfa); /* take square root */
4222 tos_make(d); /* store to destination */
4223 }
4224 else {
4225 make_tos(d);
4226 emit_byte(0xd9);
4227 emit_byte(0xfa); /* take square root */
4228 }
4229 }
4230 LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4231
4232 LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4233 {
4234 int ds;
4235
4236 if (d!=s) {
4237 usereg(s);
4238 ds=stackpos(s);
4239 emit_byte(0xd9);
4240 emit_byte(0xc0+ds); /* duplicate source */
4241 emit_byte(0xd9);
4242 emit_byte(0xe1); /* take fabs */
4243 tos_make(d); /* store to destination */
4244 }
4245 else {
4246 make_tos(d);
4247 emit_byte(0xd9);
4248 emit_byte(0xe1); /* take fabs */
4249 }
4250 }
4251 LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4252
4253 LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4254 {
4255 int ds;
4256
4257 if (d!=s) {
4258 usereg(s);
4259 ds=stackpos(s);
4260 emit_byte(0xd9);
4261 emit_byte(0xc0+ds); /* duplicate source */
4262 emit_byte(0xd9);
4263 emit_byte(0xfc); /* take frndint */
4264 tos_make(d); /* store to destination */
4265 }
4266 else {
4267 make_tos(d);
4268 emit_byte(0xd9);
4269 emit_byte(0xfc); /* take frndint */
4270 }
4271 }
4272 LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4273
4274 LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4275 {
4276 int ds;
4277
4278 if (d!=s) {
4279 usereg(s);
4280 ds=stackpos(s);
4281 emit_byte(0xd9);
4282 emit_byte(0xc0+ds); /* duplicate source */
4283 emit_byte(0xd9);
4284 emit_byte(0xff); /* take cos */
4285 tos_make(d); /* store to destination */
4286 }
4287 else {
4288 make_tos(d);
4289 emit_byte(0xd9);
4290 emit_byte(0xff); /* take cos */
4291 }
4292 }
4293 LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4294
4295 LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4296 {
4297 int ds;
4298
4299 if (d!=s) {
4300 usereg(s);
4301 ds=stackpos(s);
4302 emit_byte(0xd9);
4303 emit_byte(0xc0+ds); /* duplicate source */
4304 emit_byte(0xd9);
4305 emit_byte(0xfe); /* take sin */
4306 tos_make(d); /* store to destination */
4307 }
4308 else {
4309 make_tos(d);
4310 emit_byte(0xd9);
4311 emit_byte(0xfe); /* take sin */
4312 }
4313 }
4314 LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4315
4316 static const double one=1;
4317 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4318 {
4319 int ds;
4320
4321 usereg(s);
4322 ds=stackpos(s);
4323 emit_byte(0xd9);
4324 emit_byte(0xc0+ds); /* duplicate source */
4325
4326 emit_byte(0xd9);
4327 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4328 emit_byte(0xd9);
4329 emit_byte(0xfc); /* rndint */
4330 emit_byte(0xd9);
4331 emit_byte(0xc9); /* swap top two elements */
4332 emit_byte(0xd8);
4333 emit_byte(0xe1); /* subtract rounded from original */
4334 emit_byte(0xd9);
4335 emit_byte(0xf0); /* f2xm1 */
4336 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4337 emit_byte(0xd9);
4338 emit_byte(0xfd); /* and scale it */
4339 emit_byte(0xdd);
4340 emit_byte(0xd9); /* take he rounded value off */
4341 tos_make(d); /* store to destination */
4342 }
4343 LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4344
4345 LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4346 {
4347 int ds;
4348
4349 usereg(s);
4350 ds=stackpos(s);
4351 emit_byte(0xd9);
4352 emit_byte(0xc0+ds); /* duplicate source */
4353 emit_byte(0xd9);
4354 emit_byte(0xea); /* fldl2e */
4355 emit_byte(0xde);
4356 emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4357
4358 emit_byte(0xd9);
4359 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4360 emit_byte(0xd9);
4361 emit_byte(0xfc); /* rndint */
4362 emit_byte(0xd9);
4363 emit_byte(0xc9); /* swap top two elements */
4364 emit_byte(0xd8);
4365 emit_byte(0xe1); /* subtract rounded from original */
4366 emit_byte(0xd9);
4367 emit_byte(0xf0); /* f2xm1 */
4368 x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
4369 emit_byte(0xd9);
4370 emit_byte(0xfd); /* and scale it */
4371 emit_byte(0xdd);
4372 emit_byte(0xd9); /* take he rounded value off */
4373 tos_make(d); /* store to destination */
4374 }
4375 LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4376
4377 LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4378 {
4379 int ds;
4380
4381 usereg(s);
4382 ds=stackpos(s);
4383 emit_byte(0xd9);
4384 emit_byte(0xc0+ds); /* duplicate source */
4385 emit_byte(0xd9);
4386 emit_byte(0xe8); /* push '1' */
4387 emit_byte(0xd9);
4388 emit_byte(0xc9); /* swap top two */
4389 emit_byte(0xd9);
4390 emit_byte(0xf1); /* take 1*log2(x) */
4391 tos_make(d); /* store to destination */
4392 }
4393 LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4394
4395
4396 LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4397 {
4398 int ds;
4399
4400 if (d!=s) {
4401 usereg(s);
4402 ds=stackpos(s);
4403 emit_byte(0xd9);
4404 emit_byte(0xc0+ds); /* duplicate source */
4405 emit_byte(0xd9);
4406 emit_byte(0xe0); /* take fchs */
4407 tos_make(d); /* store to destination */
4408 }
4409 else {
4410 make_tos(d);
4411 emit_byte(0xd9);
4412 emit_byte(0xe0); /* take fchs */
4413 }
4414 }
4415 LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4416
4417 LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4418 {
4419 int ds;
4420
4421 usereg(s);
4422 usereg(d);
4423
4424 if (live.spos[s]==live.tos) {
4425 /* Source is on top of stack */
4426 ds=stackpos(d);
4427 emit_byte(0xdc);
4428 emit_byte(0xc0+ds); /* add source to dest*/
4429 }
4430 else {
4431 make_tos(d);
4432 ds=stackpos(s);
4433
4434 emit_byte(0xd8);
4435 emit_byte(0xc0+ds); /* add source to dest*/
4436 }
4437 }
4438 LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4439
4440 LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4441 {
4442 int ds;
4443
4444 usereg(s);
4445 usereg(d);
4446
4447 if (live.spos[s]==live.tos) {
4448 /* Source is on top of stack */
4449 ds=stackpos(d);
4450 emit_byte(0xdc);
4451 emit_byte(0xe8+ds); /* sub source from dest*/
4452 }
4453 else {
4454 make_tos(d);
4455 ds=stackpos(s);
4456
4457 emit_byte(0xd8);
4458 emit_byte(0xe0+ds); /* sub src from dest */
4459 }
4460 }
4461 LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4462
4463 LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4464 {
4465 int ds;
4466
4467 usereg(s);
4468 usereg(d);
4469
4470 make_tos(d);
4471 ds=stackpos(s);
4472
4473 emit_byte(0xdd);
4474 emit_byte(0xe0+ds); /* cmp dest with source*/
4475 }
4476 LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4477
4478 LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4479 {
4480 int ds;
4481
4482 usereg(s);
4483 usereg(d);
4484
4485 if (live.spos[s]==live.tos) {
4486 /* Source is on top of stack */
4487 ds=stackpos(d);
4488 emit_byte(0xdc);
4489 emit_byte(0xc8+ds); /* mul dest by source*/
4490 }
4491 else {
4492 make_tos(d);
4493 ds=stackpos(s);
4494
4495 emit_byte(0xd8);
4496 emit_byte(0xc8+ds); /* mul dest by source*/
4497 }
4498 }
4499 LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4500
4501 LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4502 {
4503 int ds;
4504
4505 usereg(s);
4506 usereg(d);
4507
4508 if (live.spos[s]==live.tos) {
4509 /* Source is on top of stack */
4510 ds=stackpos(d);
4511 emit_byte(0xdc);
4512 emit_byte(0xf8+ds); /* div dest by source */
4513 }
4514 else {
4515 make_tos(d);
4516 ds=stackpos(s);
4517
4518 emit_byte(0xd8);
4519 emit_byte(0xf0+ds); /* div dest by source*/
4520 }
4521 }
4522 LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4523
4524 LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4525 {
4526 int ds;
4527
4528 usereg(s);
4529 usereg(d);
4530
4531 make_tos2(d,s);
4532 ds=stackpos(s);
4533
4534 if (ds!=1) {
4535 printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4536 abort();
4537 }
4538 emit_byte(0xd9);
4539 emit_byte(0xf8); /* take rem from dest by source */
4540 }
4541 LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4542
4543 LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4544 {
4545 int ds;
4546
4547 usereg(s);
4548 usereg(d);
4549
4550 make_tos2(d,s);
4551 ds=stackpos(s);
4552
4553 if (ds!=1) {
4554 printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4555 abort();
4556 }
4557 emit_byte(0xd9);
4558 emit_byte(0xf5); /* take rem1 from dest by source */
4559 }
4560 LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4561
4562
4563 LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4564 {
4565 make_tos(r);
4566 emit_byte(0xd9); /* ftst */
4567 emit_byte(0xe4);
4568 }
4569 LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4570
4571 /* %eax register is clobbered if target processor doesn't support fucomi */
4572 #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4573 #define FFLAG_NREG EAX_INDEX
4574
4575 static __inline__ void raw_fflags_into_flags(int r)
4576 {
4577 int p;
4578
4579 usereg(r);
4580 p=stackpos(r);
4581
4582 emit_byte(0xd9);
4583 emit_byte(0xee); /* Push 0 */
4584 emit_byte(0xd9);
4585 emit_byte(0xc9+p); /* swap top two around */
4586 if (have_cmov) {
4587 // gb-- fucomi is for P6 cores only, not K6-2 then...
4588 emit_byte(0xdb);
4589 emit_byte(0xe9+p); /* fucomi them */
4590 }
4591 else {
4592 emit_byte(0xdd);
4593 emit_byte(0xe1+p); /* fucom them */
4594 emit_byte(0x9b);
4595 emit_byte(0xdf);
4596 emit_byte(0xe0); /* fstsw ax */
4597 raw_sahf(0); /* sahf */
4598 }
4599 emit_byte(0xdd);
4600 emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4601 }