ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.31
Committed: 2006-01-15T22:42:51Z (18 years, 5 months ago) by gbeauche
Branch: MAIN
Changes since 1.30: +9 -1 lines
Log Message:
fix stack alignment (theoritically but it was OK in practise) in generated
functions, move m68k_compile_execute() to compiler/ dir since it's JIT
generic and it now depends on USE_PUSH_POP (as it should)

File Contents

# Content
1 /*
2 * compiler/codegen_x86.cpp - IA-32 code generator
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2005 Christian Bauer
10 *
11 * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 /* This should eventually end up in machdep/, but for now, x86 is the
29 only target, and it's easier this way... */
30
31 #include "flags_x86.h"
32
33 /*************************************************************************
34 * Some basic information about the the target CPU *
35 *************************************************************************/
36
37 #define EAX_INDEX 0
38 #define ECX_INDEX 1
39 #define EDX_INDEX 2
40 #define EBX_INDEX 3
41 #define ESP_INDEX 4
42 #define EBP_INDEX 5
43 #define ESI_INDEX 6
44 #define EDI_INDEX 7
45 #if defined(__x86_64__)
46 #define R8_INDEX 8
47 #define R9_INDEX 9
48 #define R10_INDEX 10
49 #define R11_INDEX 11
50 #define R12_INDEX 12
51 #define R13_INDEX 13
52 #define R14_INDEX 14
53 #define R15_INDEX 15
54 #endif
55
56 /* The register in which subroutines return an integer return value */
57 #define REG_RESULT EAX_INDEX
58
59 /* The registers subroutines take their first and second argument in */
60 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61 /* Handle the _fastcall parameters of ECX and EDX */
62 #define REG_PAR1 ECX_INDEX
63 #define REG_PAR2 EDX_INDEX
64 #elif defined(__x86_64__)
65 #define REG_PAR1 EDI_INDEX
66 #define REG_PAR2 ESI_INDEX
67 #else
68 #define REG_PAR1 EAX_INDEX
69 #define REG_PAR2 EDX_INDEX
70 #endif
71
72 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 #define REG_PC_TMP EAX_INDEX
75 #else
76 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 #endif
78
79 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 -1 if any reg will do */
81 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82 #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83
84 #define STACK_ALIGN 16
85 #define STACK_OFFSET sizeof(void *)
86
87 uae_s8 always_used[]={4,-1};
88 #if defined(__x86_64__)
89 uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
90 uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
91 #else
92 uae_s8 can_byte[]={0,1,2,3,-1};
93 uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
94 #endif
95
96 #if USE_OPTIMIZED_CALLS
97 /* Make sure interpretive core does not use cpuopti */
98 uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
99 #error FIXME: code not ready
100 #else
101 /* cpuopti mutate instruction handlers to assume registers are saved
102 by the caller */
103 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
104 #endif
105
106 /* This *should* be the same as call_saved. But:
107 - We might not really know which registers are saved, and which aren't,
108 so we need to preserve some, but don't want to rely on everyone else
109 also saving those registers
110 - Special registers (such like the stack pointer) should not be "preserved"
111 by pushing, even though they are "saved" across function calls
112 */
113 #if defined(__x86_64__)
114 /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
115 /* preserve r11 because it's generally used to hold pointers to functions */
116 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
117 #else
118 static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
119 #endif
120
121 /* Whether classes of instructions do or don't clobber the native flags */
122 #define CLOBBER_MOV
123 #define CLOBBER_LEA
124 #define CLOBBER_CMOV
125 #define CLOBBER_POP
126 #define CLOBBER_PUSH
127 #define CLOBBER_SUB clobber_flags()
128 #define CLOBBER_SBB clobber_flags()
129 #define CLOBBER_CMP clobber_flags()
130 #define CLOBBER_ADD clobber_flags()
131 #define CLOBBER_ADC clobber_flags()
132 #define CLOBBER_AND clobber_flags()
133 #define CLOBBER_OR clobber_flags()
134 #define CLOBBER_XOR clobber_flags()
135
136 #define CLOBBER_ROL clobber_flags()
137 #define CLOBBER_ROR clobber_flags()
138 #define CLOBBER_SHLL clobber_flags()
139 #define CLOBBER_SHRL clobber_flags()
140 #define CLOBBER_SHRA clobber_flags()
141 #define CLOBBER_TEST clobber_flags()
142 #define CLOBBER_CL16
143 #define CLOBBER_CL8
144 #define CLOBBER_SE32
145 #define CLOBBER_SE16
146 #define CLOBBER_SE8
147 #define CLOBBER_ZE32
148 #define CLOBBER_ZE16
149 #define CLOBBER_ZE8
150 #define CLOBBER_SW16 clobber_flags()
151 #define CLOBBER_SW32
152 #define CLOBBER_SETCC
153 #define CLOBBER_MUL clobber_flags()
154 #define CLOBBER_BT clobber_flags()
155 #define CLOBBER_BSF clobber_flags()
156
157 /* FIXME: disabled until that's proofread. */
158 #if defined(__x86_64__)
159 #define USE_NEW_RTASM 1
160 #endif
161
162 #if USE_NEW_RTASM
163
164 #if defined(__x86_64__)
165 #define X86_TARGET_64BIT 1
166 #endif
167 #define X86_FLAT_REGISTERS 0
168 #define X86_OPTIMIZE_ALU 1
169 #define X86_OPTIMIZE_ROTSHI 1
170 #include "codegen_x86.h"
171
172 #define x86_emit_byte(B) emit_byte(B)
173 #define x86_emit_word(W) emit_word(W)
174 #define x86_emit_long(L) emit_long(L)
175 #define x86_emit_quad(Q) emit_quad(Q)
176 #define x86_get_target() get_target()
177 #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
178
179 static void jit_fail(const char *msg, const char *file, int line, const char *function)
180 {
181 fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
182 function, file, line, msg);
183 abort();
184 }
185
186 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
187 {
188 #if defined(__x86_64__)
189 PUSHQr(r);
190 #else
191 PUSHLr(r);
192 #endif
193 }
194 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
195
196 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
197 {
198 #if defined(__x86_64__)
199 POPQr(r);
200 #else
201 POPLr(r);
202 #endif
203 }
204 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
205
206 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
207 {
208 #if defined(__x86_64__)
209 POPQm(d, X86_NOREG, X86_NOREG, 1);
210 #else
211 POPLm(d, X86_NOREG, X86_NOREG, 1);
212 #endif
213 }
214 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
215
216 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
217 {
218 BTLir(i, r);
219 }
220 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
221
222 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
223 {
224 BTLrr(b, r);
225 }
226 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
227
228 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
229 {
230 BTCLir(i, r);
231 }
232 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
233
234 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
235 {
236 BTCLrr(b, r);
237 }
238 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
239
240 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
241 {
242 BTRLir(i, r);
243 }
244 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
245
246 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
247 {
248 BTRLrr(b, r);
249 }
250 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
251
252 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
253 {
254 BTSLir(i, r);
255 }
256 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
257
258 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
259 {
260 BTSLrr(b, r);
261 }
262 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
263
264 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
265 {
266 SUBWir(i, d);
267 }
268 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
269
270 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
271 {
272 MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
273 }
274 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
275
276 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
277 {
278 MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
279 }
280 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
281
282 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
283 {
284 MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
285 }
286 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
287
288 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
289 {
290 MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
291 }
292 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
293
294 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
295 {
296 ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
297 }
298 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
299
300 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
301 {
302 ROLBir(i, r);
303 }
304 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
305
306 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
307 {
308 ROLWir(i, r);
309 }
310 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
311
312 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
313 {
314 ROLLir(i, r);
315 }
316 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
317
318 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
319 {
320 ROLLrr(r, d);
321 }
322 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
323
324 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
325 {
326 ROLWrr(r, d);
327 }
328 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
329
330 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
331 {
332 ROLBrr(r, d);
333 }
334 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
335
336 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
337 {
338 SHLLrr(r, d);
339 }
340 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
341
342 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
343 {
344 SHLWrr(r, d);
345 }
346 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
347
348 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
349 {
350 SHLBrr(r, d);
351 }
352 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
353
354 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
355 {
356 RORBir(i, r);
357 }
358 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
359
360 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
361 {
362 RORWir(i, r);
363 }
364 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
365
366 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
367 {
368 ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
369 }
370 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
371
372 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
373 {
374 RORLir(i, r);
375 }
376 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
377
378 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
379 {
380 RORLrr(r, d);
381 }
382 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
383
384 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
385 {
386 RORWrr(r, d);
387 }
388 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
389
390 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
391 {
392 RORBrr(r, d);
393 }
394 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
395
396 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
397 {
398 SHRLrr(r, d);
399 }
400 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
401
402 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
403 {
404 SHRWrr(r, d);
405 }
406 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
407
408 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
409 {
410 SHRBrr(r, d);
411 }
412 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
413
414 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
415 {
416 SARLrr(r, d);
417 }
418 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
419
420 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
421 {
422 SARWrr(r, d);
423 }
424 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
425
426 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
427 {
428 SARBrr(r, d);
429 }
430 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
431
432 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
433 {
434 SHLLir(i, r);
435 }
436 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
437
438 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
439 {
440 SHLWir(i, r);
441 }
442 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
443
444 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
445 {
446 SHLBir(i, r);
447 }
448 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
449
450 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
451 {
452 SHRLir(i, r);
453 }
454 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
455
456 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
457 {
458 SHRWir(i, r);
459 }
460 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
461
462 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
463 {
464 SHRBir(i, r);
465 }
466 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
467
468 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
469 {
470 SARLir(i, r);
471 }
472 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
473
474 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
475 {
476 SARWir(i, r);
477 }
478 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
479
480 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
481 {
482 SARBir(i, r);
483 }
484 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
485
486 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
487 {
488 SAHF();
489 }
490 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
491
492 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
493 {
494 CPUID();
495 }
496 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
497
498 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
499 {
500 LAHF();
501 }
502 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
503
504 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
505 {
506 SETCCir(cc, d);
507 }
508 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
509
510 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
511 {
512 SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
513 }
514 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
515
516 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
517 {
518 if (have_cmov)
519 CMOVLrr(cc, s, d);
520 else { /* replacement using branch and mov */
521 #if defined(__x86_64__)
522 write_log("x86-64 implementations are bound to have CMOV!\n");
523 abort();
524 #endif
525 JCCSii(cc^1, 2);
526 MOVLrr(s, d);
527 }
528 }
529 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
530
531 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
532 {
533 BSFLrr(s, d);
534 }
535 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
536
537 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
538 {
539 MOVSLQrr(s, d);
540 }
541 LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
542
543 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
544 {
545 MOVSWLrr(s, d);
546 }
547 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
548
549 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
550 {
551 MOVSBLrr(s, d);
552 }
553 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
554
555 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
556 {
557 MOVZWLrr(s, d);
558 }
559 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
560
561 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
562 {
563 MOVZBLrr(s, d);
564 }
565 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
566
567 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
568 {
569 IMULLrr(s, d);
570 }
571 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
572
573 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
574 {
575 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
576 write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
577 abort();
578 }
579 IMULLr(s);
580 }
581 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
582
583 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
584 {
585 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
586 write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
587 abort();
588 }
589 MULLr(s);
590 }
591 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
592
593 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
594 {
595 abort(); /* %^$&%^$%#^ x86! */
596 }
597 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
598
599 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
600 {
601 MOVBrr(s, d);
602 }
603 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
604
605 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
606 {
607 MOVWrr(s, d);
608 }
609 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
610
611 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
612 {
613 MOVLmr(0, baser, index, factor, d);
614 }
615 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
616
617 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
618 {
619 MOVWmr(0, baser, index, factor, d);
620 }
621 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
622
623 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
624 {
625 MOVBmr(0, baser, index, factor, d);
626 }
627 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
628
629 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
630 {
631 MOVLrm(s, 0, baser, index, factor);
632 }
633 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
634
635 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
636 {
637 MOVWrm(s, 0, baser, index, factor);
638 }
639 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
640
641 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
642 {
643 MOVBrm(s, 0, baser, index, factor);
644 }
645 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
646
647 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
648 {
649 MOVLrm(s, base, baser, index, factor);
650 }
651 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
652
653 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
654 {
655 MOVWrm(s, base, baser, index, factor);
656 }
657 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
658
659 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
660 {
661 MOVBrm(s, base, baser, index, factor);
662 }
663 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
664
665 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
666 {
667 MOVLmr(base, baser, index, factor, d);
668 }
669 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
670
671 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
672 {
673 MOVWmr(base, baser, index, factor, d);
674 }
675 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
676
677 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
678 {
679 MOVBmr(base, baser, index, factor, d);
680 }
681 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
682
683 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
684 {
685 MOVLmr(base, X86_NOREG, index, factor, d);
686 }
687 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
688
689 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
690 {
691 if (have_cmov)
692 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
693 else { /* replacement using branch and mov */
694 #if defined(__x86_64__)
695 write_log("x86-64 implementations are bound to have CMOV!\n");
696 abort();
697 #endif
698 JCCSii(cond^1, 7);
699 MOVLmr(base, X86_NOREG, index, factor, d);
700 }
701 }
702 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
703
704 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
705 {
706 if (have_cmov)
707 CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
708 else { /* replacement using branch and mov */
709 #if defined(__x86_64__)
710 write_log("x86-64 implementations are bound to have CMOV!\n");
711 abort();
712 #endif
713 JCCSii(cond^1, 6);
714 MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
715 }
716 }
717 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
718
719 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
720 {
721 MOVLmr(offset, s, X86_NOREG, 1, d);
722 }
723 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
724
725 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
726 {
727 MOVWmr(offset, s, X86_NOREG, 1, d);
728 }
729 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
730
731 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
732 {
733 MOVBmr(offset, s, X86_NOREG, 1, d);
734 }
735 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
736
737 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
738 {
739 MOVLmr(offset, s, X86_NOREG, 1, d);
740 }
741 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
742
743 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
744 {
745 MOVWmr(offset, s, X86_NOREG, 1, d);
746 }
747 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
748
749 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
750 {
751 MOVBmr(offset, s, X86_NOREG, 1, d);
752 }
753 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
754
755 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
756 {
757 MOVLim(i, offset, d, X86_NOREG, 1);
758 }
759 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
760
761 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
762 {
763 MOVWim(i, offset, d, X86_NOREG, 1);
764 }
765 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
766
767 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
768 {
769 MOVBim(i, offset, d, X86_NOREG, 1);
770 }
771 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
772
773 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
774 {
775 MOVLrm(s, offset, d, X86_NOREG, 1);
776 }
777 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
778
779 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
780 {
781 MOVWrm(s, offset, d, X86_NOREG, 1);
782 }
783 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
784
785 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
786 {
787 MOVBrm(s, offset, d, X86_NOREG, 1);
788 }
789 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
790
791 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
792 {
793 LEALmr(offset, s, X86_NOREG, 1, d);
794 }
795 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
796
797 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
798 {
799 LEALmr(offset, s, index, factor, d);
800 }
801 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
802
803 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
804 {
805 LEALmr(0, s, index, factor, d);
806 }
807 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
808
809 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
810 {
811 MOVLrm(s, offset, d, X86_NOREG, 1);
812 }
813 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
814
815 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
816 {
817 MOVWrm(s, offset, d, X86_NOREG, 1);
818 }
819 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
820
821 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
822 {
823 MOVBrm(s, offset, d, X86_NOREG, 1);
824 }
825 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
826
827 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
828 {
829 BSWAPLr(r);
830 }
831 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
832
833 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
834 {
835 ROLWir(8, r);
836 }
837 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
838
839 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
840 {
841 MOVLrr(s, d);
842 }
843 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
844
845 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
846 {
847 MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
848 }
849 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
850
851 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
852 {
853 MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
854 }
855 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
856
857 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
858 {
859 MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
860 }
861 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
862
863 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
864 {
865 MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
866 }
867 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
868
869 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
870 {
871 MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
872 }
873 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
874
875 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
876 {
877 MOVLir(s, d);
878 }
879 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
880
881 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
882 {
883 MOVWir(s, d);
884 }
885 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
886
887 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
888 {
889 MOVBir(s, d);
890 }
891 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
892
893 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
894 {
895 ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
896 }
897 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
898
899 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
900 {
901 ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
902 }
903 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
904
905 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
906 {
907 ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
908 }
909 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
910
911 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
912 {
913 ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
914 }
915 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
916
917 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
918 {
919 TESTLir(i, d);
920 }
921 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
922
923 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
924 {
925 TESTLrr(s, d);
926 }
927 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
928
929 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
930 {
931 TESTWrr(s, d);
932 }
933 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
934
935 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
936 {
937 TESTBrr(s, d);
938 }
939 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
940
941 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
942 {
943 XORLir(i, d);
944 }
945 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
946
947 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
948 {
949 ANDLir(i, d);
950 }
951 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
952
953 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
954 {
955 ANDWir(i, d);
956 }
957 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
958
959 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
960 {
961 ANDLrr(s, d);
962 }
963 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
964
965 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
966 {
967 ANDWrr(s, d);
968 }
969 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
970
971 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
972 {
973 ANDBrr(s, d);
974 }
975 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
976
977 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
978 {
979 ORLir(i, d);
980 }
981 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
982
983 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
984 {
985 ORLrr(s, d);
986 }
987 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
988
989 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
990 {
991 ORWrr(s, d);
992 }
993 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
994
995 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
996 {
997 ORBrr(s, d);
998 }
999 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1000
1001 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1002 {
1003 ADCLrr(s, d);
1004 }
1005 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1006
1007 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1008 {
1009 ADCWrr(s, d);
1010 }
1011 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1012
1013 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1014 {
1015 ADCBrr(s, d);
1016 }
1017 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1018
1019 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1020 {
1021 ADDLrr(s, d);
1022 }
1023 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1024
1025 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1026 {
1027 ADDWrr(s, d);
1028 }
1029 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1030
1031 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1032 {
1033 ADDBrr(s, d);
1034 }
1035 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1036
1037 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1038 {
1039 SUBLir(i, d);
1040 }
1041 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1042
1043 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1044 {
1045 SUBBir(i, d);
1046 }
1047 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1048
1049 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1050 {
1051 ADDLir(i, d);
1052 }
1053 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1054
1055 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1056 {
1057 ADDWir(i, d);
1058 }
1059 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1060
1061 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1062 {
1063 ADDBir(i, d);
1064 }
1065 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1066
1067 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1068 {
1069 SBBLrr(s, d);
1070 }
1071 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1072
1073 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1074 {
1075 SBBWrr(s, d);
1076 }
1077 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1078
1079 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1080 {
1081 SBBBrr(s, d);
1082 }
1083 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1084
1085 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1086 {
1087 SUBLrr(s, d);
1088 }
1089 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1090
1091 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1092 {
1093 SUBWrr(s, d);
1094 }
1095 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1096
1097 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1098 {
1099 SUBBrr(s, d);
1100 }
1101 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1102
1103 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1104 {
1105 CMPLrr(s, d);
1106 }
1107 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1108
1109 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1110 {
1111 CMPLir(i, r);
1112 }
1113 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1114
1115 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1116 {
1117 CMPWrr(s, d);
1118 }
1119 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1120
1121 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1122 {
1123 CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1124 }
1125 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1126
1127 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1128 {
1129 CMPBir(i, d);
1130 }
1131 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1132
1133 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1134 {
1135 CMPBrr(s, d);
1136 }
1137 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1138
1139 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1140 {
1141 CMPLmr(offset, X86_NOREG, index, factor, d);
1142 }
1143 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1144
1145 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1146 {
1147 XORLrr(s, d);
1148 }
1149 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1150
1151 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1152 {
1153 XORWrr(s, d);
1154 }
1155 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1156
1157 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1158 {
1159 XORBrr(s, d);
1160 }
1161 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1162
1163 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1164 {
1165 SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1166 }
1167 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1168
1169 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1170 {
1171 CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1172 }
1173 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1174
1175 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1176 {
1177 XCHGLrr(r2, r1);
1178 }
1179 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1180
1181 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1182 {
1183 PUSHF();
1184 }
1185 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1186
1187 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1188 {
1189 POPF();
1190 }
1191 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1192
1193 #else
1194
1195 const bool optimize_accum = true;
1196 const bool optimize_imm8 = true;
1197 const bool optimize_shift_once = true;
1198
1199 /*************************************************************************
1200 * Actual encoding of the instructions on the target CPU *
1201 *************************************************************************/
1202
1203 static __inline__ int isaccum(int r)
1204 {
1205 return (r == EAX_INDEX);
1206 }
1207
1208 static __inline__ int isbyte(uae_s32 x)
1209 {
1210 return (x>=-128 && x<=127);
1211 }
1212
1213 static __inline__ int isword(uae_s32 x)
1214 {
1215 return (x>=-32768 && x<=32767);
1216 }
1217
1218 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1219 {
1220 emit_byte(0x50+r);
1221 }
1222 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1223
1224 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1225 {
1226 emit_byte(0x58+r);
1227 }
1228 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1229
1230 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1231 {
1232 emit_byte(0x8f);
1233 emit_byte(0x05);
1234 emit_long(d);
1235 }
1236 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1237
1238 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1239 {
1240 emit_byte(0x0f);
1241 emit_byte(0xba);
1242 emit_byte(0xe0+r);
1243 emit_byte(i);
1244 }
1245 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1246
1247 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1248 {
1249 emit_byte(0x0f);
1250 emit_byte(0xa3);
1251 emit_byte(0xc0+8*b+r);
1252 }
1253 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1254
1255 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1256 {
1257 emit_byte(0x0f);
1258 emit_byte(0xba);
1259 emit_byte(0xf8+r);
1260 emit_byte(i);
1261 }
1262 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1263
1264 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1265 {
1266 emit_byte(0x0f);
1267 emit_byte(0xbb);
1268 emit_byte(0xc0+8*b+r);
1269 }
1270 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1271
1272
1273 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1274 {
1275 emit_byte(0x0f);
1276 emit_byte(0xba);
1277 emit_byte(0xf0+r);
1278 emit_byte(i);
1279 }
1280 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1281
1282 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1283 {
1284 emit_byte(0x0f);
1285 emit_byte(0xb3);
1286 emit_byte(0xc0+8*b+r);
1287 }
1288 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1289
1290 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1291 {
1292 emit_byte(0x0f);
1293 emit_byte(0xba);
1294 emit_byte(0xe8+r);
1295 emit_byte(i);
1296 }
1297 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1298
1299 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1300 {
1301 emit_byte(0x0f);
1302 emit_byte(0xab);
1303 emit_byte(0xc0+8*b+r);
1304 }
1305 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1306
1307 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1308 {
1309 emit_byte(0x66);
1310 if (isbyte(i)) {
1311 emit_byte(0x83);
1312 emit_byte(0xe8+d);
1313 emit_byte(i);
1314 }
1315 else {
1316 if (optimize_accum && isaccum(d))
1317 emit_byte(0x2d);
1318 else {
1319 emit_byte(0x81);
1320 emit_byte(0xe8+d);
1321 }
1322 emit_word(i);
1323 }
1324 }
1325 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1326
1327
1328 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1329 {
1330 emit_byte(0x8b);
1331 emit_byte(0x05+8*d);
1332 emit_long(s);
1333 }
1334 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1335
1336 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1337 {
1338 emit_byte(0xc7);
1339 emit_byte(0x05);
1340 emit_long(d);
1341 emit_long(s);
1342 }
1343 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1344
1345 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1346 {
1347 emit_byte(0x66);
1348 emit_byte(0xc7);
1349 emit_byte(0x05);
1350 emit_long(d);
1351 emit_word(s);
1352 }
1353 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1354
1355 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1356 {
1357 emit_byte(0xc6);
1358 emit_byte(0x05);
1359 emit_long(d);
1360 emit_byte(s);
1361 }
1362 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1363
1364 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1365 {
1366 if (optimize_shift_once && (i == 1)) {
1367 emit_byte(0xd0);
1368 emit_byte(0x05);
1369 emit_long(d);
1370 }
1371 else {
1372 emit_byte(0xc0);
1373 emit_byte(0x05);
1374 emit_long(d);
1375 emit_byte(i);
1376 }
1377 }
1378 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1379
1380 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1381 {
1382 if (optimize_shift_once && (i == 1)) {
1383 emit_byte(0xd0);
1384 emit_byte(0xc0+r);
1385 }
1386 else {
1387 emit_byte(0xc0);
1388 emit_byte(0xc0+r);
1389 emit_byte(i);
1390 }
1391 }
1392 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1393
1394 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1395 {
1396 emit_byte(0x66);
1397 emit_byte(0xc1);
1398 emit_byte(0xc0+r);
1399 emit_byte(i);
1400 }
1401 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1402
1403 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1404 {
1405 if (optimize_shift_once && (i == 1)) {
1406 emit_byte(0xd1);
1407 emit_byte(0xc0+r);
1408 }
1409 else {
1410 emit_byte(0xc1);
1411 emit_byte(0xc0+r);
1412 emit_byte(i);
1413 }
1414 }
1415 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1416
1417 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1418 {
1419 emit_byte(0xd3);
1420 emit_byte(0xc0+d);
1421 }
1422 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1423
1424 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1425 {
1426 emit_byte(0x66);
1427 emit_byte(0xd3);
1428 emit_byte(0xc0+d);
1429 }
1430 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1431
1432 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1433 {
1434 emit_byte(0xd2);
1435 emit_byte(0xc0+d);
1436 }
1437 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1438
1439 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1440 {
1441 emit_byte(0xd3);
1442 emit_byte(0xe0+d);
1443 }
1444 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1445
1446 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1447 {
1448 emit_byte(0x66);
1449 emit_byte(0xd3);
1450 emit_byte(0xe0+d);
1451 }
1452 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1453
1454 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1455 {
1456 emit_byte(0xd2);
1457 emit_byte(0xe0+d);
1458 }
1459 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1460
1461 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1462 {
1463 if (optimize_shift_once && (i == 1)) {
1464 emit_byte(0xd0);
1465 emit_byte(0xc8+r);
1466 }
1467 else {
1468 emit_byte(0xc0);
1469 emit_byte(0xc8+r);
1470 emit_byte(i);
1471 }
1472 }
1473 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1474
1475 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1476 {
1477 emit_byte(0x66);
1478 emit_byte(0xc1);
1479 emit_byte(0xc8+r);
1480 emit_byte(i);
1481 }
1482 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1483
1484 // gb-- used for making an fpcr value in compemu_fpp.cpp
1485 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1486 {
1487 emit_byte(0x0b);
1488 emit_byte(0x05+8*d);
1489 emit_long(s);
1490 }
1491 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1492
1493 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1494 {
1495 if (optimize_shift_once && (i == 1)) {
1496 emit_byte(0xd1);
1497 emit_byte(0xc8+r);
1498 }
1499 else {
1500 emit_byte(0xc1);
1501 emit_byte(0xc8+r);
1502 emit_byte(i);
1503 }
1504 }
1505 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1506
1507 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1508 {
1509 emit_byte(0xd3);
1510 emit_byte(0xc8+d);
1511 }
1512 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1513
1514 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1515 {
1516 emit_byte(0x66);
1517 emit_byte(0xd3);
1518 emit_byte(0xc8+d);
1519 }
1520 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1521
1522 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1523 {
1524 emit_byte(0xd2);
1525 emit_byte(0xc8+d);
1526 }
1527 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1528
1529 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1530 {
1531 emit_byte(0xd3);
1532 emit_byte(0xe8+d);
1533 }
1534 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1535
1536 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1537 {
1538 emit_byte(0x66);
1539 emit_byte(0xd3);
1540 emit_byte(0xe8+d);
1541 }
1542 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1543
1544 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1545 {
1546 emit_byte(0xd2);
1547 emit_byte(0xe8+d);
1548 }
1549 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1550
1551 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1552 {
1553 emit_byte(0xd3);
1554 emit_byte(0xf8+d);
1555 }
1556 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1557
1558 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1559 {
1560 emit_byte(0x66);
1561 emit_byte(0xd3);
1562 emit_byte(0xf8+d);
1563 }
1564 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1565
1566 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1567 {
1568 emit_byte(0xd2);
1569 emit_byte(0xf8+d);
1570 }
1571 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1572
1573 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1574 {
1575 if (optimize_shift_once && (i == 1)) {
1576 emit_byte(0xd1);
1577 emit_byte(0xe0+r);
1578 }
1579 else {
1580 emit_byte(0xc1);
1581 emit_byte(0xe0+r);
1582 emit_byte(i);
1583 }
1584 }
1585 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1586
1587 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1588 {
1589 emit_byte(0x66);
1590 emit_byte(0xc1);
1591 emit_byte(0xe0+r);
1592 emit_byte(i);
1593 }
1594 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1595
1596 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1597 {
1598 if (optimize_shift_once && (i == 1)) {
1599 emit_byte(0xd0);
1600 emit_byte(0xe0+r);
1601 }
1602 else {
1603 emit_byte(0xc0);
1604 emit_byte(0xe0+r);
1605 emit_byte(i);
1606 }
1607 }
1608 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1609
1610 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1611 {
1612 if (optimize_shift_once && (i == 1)) {
1613 emit_byte(0xd1);
1614 emit_byte(0xe8+r);
1615 }
1616 else {
1617 emit_byte(0xc1);
1618 emit_byte(0xe8+r);
1619 emit_byte(i);
1620 }
1621 }
1622 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1623
1624 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1625 {
1626 emit_byte(0x66);
1627 emit_byte(0xc1);
1628 emit_byte(0xe8+r);
1629 emit_byte(i);
1630 }
1631 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1632
1633 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1634 {
1635 if (optimize_shift_once && (i == 1)) {
1636 emit_byte(0xd0);
1637 emit_byte(0xe8+r);
1638 }
1639 else {
1640 emit_byte(0xc0);
1641 emit_byte(0xe8+r);
1642 emit_byte(i);
1643 }
1644 }
1645 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1646
1647 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1648 {
1649 if (optimize_shift_once && (i == 1)) {
1650 emit_byte(0xd1);
1651 emit_byte(0xf8+r);
1652 }
1653 else {
1654 emit_byte(0xc1);
1655 emit_byte(0xf8+r);
1656 emit_byte(i);
1657 }
1658 }
1659 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1660
1661 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1662 {
1663 emit_byte(0x66);
1664 emit_byte(0xc1);
1665 emit_byte(0xf8+r);
1666 emit_byte(i);
1667 }
1668 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1669
1670 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1671 {
1672 if (optimize_shift_once && (i == 1)) {
1673 emit_byte(0xd0);
1674 emit_byte(0xf8+r);
1675 }
1676 else {
1677 emit_byte(0xc0);
1678 emit_byte(0xf8+r);
1679 emit_byte(i);
1680 }
1681 }
1682 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1683
1684 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1685 {
1686 emit_byte(0x9e);
1687 }
1688 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1689
1690 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1691 {
1692 emit_byte(0x0f);
1693 emit_byte(0xa2);
1694 }
1695 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1696
1697 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1698 {
1699 emit_byte(0x9f);
1700 }
1701 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1702
1703 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1704 {
1705 emit_byte(0x0f);
1706 emit_byte(0x90+cc);
1707 emit_byte(0xc0+d);
1708 }
1709 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1710
1711 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1712 {
1713 emit_byte(0x0f);
1714 emit_byte(0x90+cc);
1715 emit_byte(0x05);
1716 emit_long(d);
1717 }
1718 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1719
1720 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1721 {
1722 if (have_cmov) {
1723 emit_byte(0x0f);
1724 emit_byte(0x40+cc);
1725 emit_byte(0xc0+8*d+s);
1726 }
1727 else { /* replacement using branch and mov */
1728 int uncc=(cc^1);
1729 emit_byte(0x70+uncc);
1730 emit_byte(2); /* skip next 2 bytes if not cc=true */
1731 emit_byte(0x89);
1732 emit_byte(0xc0+8*s+d);
1733 }
1734 }
1735 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1736
1737 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1738 {
1739 emit_byte(0x0f);
1740 emit_byte(0xbc);
1741 emit_byte(0xc0+8*d+s);
1742 }
1743 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1744
1745 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1746 {
1747 emit_byte(0x0f);
1748 emit_byte(0xbf);
1749 emit_byte(0xc0+8*d+s);
1750 }
1751 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1752
1753 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1754 {
1755 emit_byte(0x0f);
1756 emit_byte(0xbe);
1757 emit_byte(0xc0+8*d+s);
1758 }
1759 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1760
1761 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1762 {
1763 emit_byte(0x0f);
1764 emit_byte(0xb7);
1765 emit_byte(0xc0+8*d+s);
1766 }
1767 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1768
1769 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1770 {
1771 emit_byte(0x0f);
1772 emit_byte(0xb6);
1773 emit_byte(0xc0+8*d+s);
1774 }
1775 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1776
1777 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1778 {
1779 emit_byte(0x0f);
1780 emit_byte(0xaf);
1781 emit_byte(0xc0+8*d+s);
1782 }
1783 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1784
1785 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1786 {
1787 if (d!=MUL_NREG1 || s!=MUL_NREG2)
1788 abort();
1789 emit_byte(0xf7);
1790 emit_byte(0xea);
1791 }
1792 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1793
1794 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1795 {
1796 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1797 printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1798 abort();
1799 }
1800 emit_byte(0xf7);
1801 emit_byte(0xe2);
1802 }
1803 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1804
1805 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1806 {
1807 abort(); /* %^$&%^$%#^ x86! */
1808 emit_byte(0x0f);
1809 emit_byte(0xaf);
1810 emit_byte(0xc0+8*d+s);
1811 }
1812 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1813
1814 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1815 {
1816 emit_byte(0x88);
1817 emit_byte(0xc0+8*s+d);
1818 }
1819 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1820
1821 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1822 {
1823 emit_byte(0x66);
1824 emit_byte(0x89);
1825 emit_byte(0xc0+8*s+d);
1826 }
1827 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1828
1829 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1830 {
1831 int isebp=(baser==5)?0x40:0;
1832 int fi;
1833
1834 switch(factor) {
1835 case 1: fi=0; break;
1836 case 2: fi=1; break;
1837 case 4: fi=2; break;
1838 case 8: fi=3; break;
1839 default: abort();
1840 }
1841
1842
1843 emit_byte(0x8b);
1844 emit_byte(0x04+8*d+isebp);
1845 emit_byte(baser+8*index+0x40*fi);
1846 if (isebp)
1847 emit_byte(0x00);
1848 }
1849 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1850
1851 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1852 {
1853 int fi;
1854 int isebp;
1855
1856 switch(factor) {
1857 case 1: fi=0; break;
1858 case 2: fi=1; break;
1859 case 4: fi=2; break;
1860 case 8: fi=3; break;
1861 default: abort();
1862 }
1863 isebp=(baser==5)?0x40:0;
1864
1865 emit_byte(0x66);
1866 emit_byte(0x8b);
1867 emit_byte(0x04+8*d+isebp);
1868 emit_byte(baser+8*index+0x40*fi);
1869 if (isebp)
1870 emit_byte(0x00);
1871 }
1872 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1873
1874 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1875 {
1876 int fi;
1877 int isebp;
1878
1879 switch(factor) {
1880 case 1: fi=0; break;
1881 case 2: fi=1; break;
1882 case 4: fi=2; break;
1883 case 8: fi=3; break;
1884 default: abort();
1885 }
1886 isebp=(baser==5)?0x40:0;
1887
1888 emit_byte(0x8a);
1889 emit_byte(0x04+8*d+isebp);
1890 emit_byte(baser+8*index+0x40*fi);
1891 if (isebp)
1892 emit_byte(0x00);
1893 }
1894 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1895
1896 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1897 {
1898 int fi;
1899 int isebp;
1900
1901 switch(factor) {
1902 case 1: fi=0; break;
1903 case 2: fi=1; break;
1904 case 4: fi=2; break;
1905 case 8: fi=3; break;
1906 default: abort();
1907 }
1908
1909
1910 isebp=(baser==5)?0x40:0;
1911
1912 emit_byte(0x89);
1913 emit_byte(0x04+8*s+isebp);
1914 emit_byte(baser+8*index+0x40*fi);
1915 if (isebp)
1916 emit_byte(0x00);
1917 }
1918 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1919
1920 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1921 {
1922 int fi;
1923 int isebp;
1924
1925 switch(factor) {
1926 case 1: fi=0; break;
1927 case 2: fi=1; break;
1928 case 4: fi=2; break;
1929 case 8: fi=3; break;
1930 default: abort();
1931 }
1932 isebp=(baser==5)?0x40:0;
1933
1934 emit_byte(0x66);
1935 emit_byte(0x89);
1936 emit_byte(0x04+8*s+isebp);
1937 emit_byte(baser+8*index+0x40*fi);
1938 if (isebp)
1939 emit_byte(0x00);
1940 }
1941 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1942
1943 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1944 {
1945 int fi;
1946 int isebp;
1947
1948 switch(factor) {
1949 case 1: fi=0; break;
1950 case 2: fi=1; break;
1951 case 4: fi=2; break;
1952 case 8: fi=3; break;
1953 default: abort();
1954 }
1955 isebp=(baser==5)?0x40:0;
1956
1957 emit_byte(0x88);
1958 emit_byte(0x04+8*s+isebp);
1959 emit_byte(baser+8*index+0x40*fi);
1960 if (isebp)
1961 emit_byte(0x00);
1962 }
1963 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1964
1965 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1966 {
1967 int fi;
1968
1969 switch(factor) {
1970 case 1: fi=0; break;
1971 case 2: fi=1; break;
1972 case 4: fi=2; break;
1973 case 8: fi=3; break;
1974 default: abort();
1975 }
1976
1977 emit_byte(0x89);
1978 emit_byte(0x84+8*s);
1979 emit_byte(baser+8*index+0x40*fi);
1980 emit_long(base);
1981 }
1982 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1983
1984 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1985 {
1986 int fi;
1987
1988 switch(factor) {
1989 case 1: fi=0; break;
1990 case 2: fi=1; break;
1991 case 4: fi=2; break;
1992 case 8: fi=3; break;
1993 default: abort();
1994 }
1995
1996 emit_byte(0x66);
1997 emit_byte(0x89);
1998 emit_byte(0x84+8*s);
1999 emit_byte(baser+8*index+0x40*fi);
2000 emit_long(base);
2001 }
2002 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2003
2004 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2005 {
2006 int fi;
2007
2008 switch(factor) {
2009 case 1: fi=0; break;
2010 case 2: fi=1; break;
2011 case 4: fi=2; break;
2012 case 8: fi=3; break;
2013 default: abort();
2014 }
2015
2016 emit_byte(0x88);
2017 emit_byte(0x84+8*s);
2018 emit_byte(baser+8*index+0x40*fi);
2019 emit_long(base);
2020 }
2021 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2022
2023 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2024 {
2025 int fi;
2026
2027 switch(factor) {
2028 case 1: fi=0; break;
2029 case 2: fi=1; break;
2030 case 4: fi=2; break;
2031 case 8: fi=3; break;
2032 default: abort();
2033 }
2034
2035 emit_byte(0x8b);
2036 emit_byte(0x84+8*d);
2037 emit_byte(baser+8*index+0x40*fi);
2038 emit_long(base);
2039 }
2040 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2041
2042 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2043 {
2044 int fi;
2045
2046 switch(factor) {
2047 case 1: fi=0; break;
2048 case 2: fi=1; break;
2049 case 4: fi=2; break;
2050 case 8: fi=3; break;
2051 default: abort();
2052 }
2053
2054 emit_byte(0x66);
2055 emit_byte(0x8b);
2056 emit_byte(0x84+8*d);
2057 emit_byte(baser+8*index+0x40*fi);
2058 emit_long(base);
2059 }
2060 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2061
2062 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2063 {
2064 int fi;
2065
2066 switch(factor) {
2067 case 1: fi=0; break;
2068 case 2: fi=1; break;
2069 case 4: fi=2; break;
2070 case 8: fi=3; break;
2071 default: abort();
2072 }
2073
2074 emit_byte(0x8a);
2075 emit_byte(0x84+8*d);
2076 emit_byte(baser+8*index+0x40*fi);
2077 emit_long(base);
2078 }
2079 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2080
2081 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2082 {
2083 int fi;
2084 switch(factor) {
2085 case 1: fi=0; break;
2086 case 2: fi=1; break;
2087 case 4: fi=2; break;
2088 case 8: fi=3; break;
2089 default:
2090 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2091 abort();
2092 }
2093 emit_byte(0x8b);
2094 emit_byte(0x04+8*d);
2095 emit_byte(0x05+8*index+64*fi);
2096 emit_long(base);
2097 }
2098 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2099
2100 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2101 {
2102 int fi;
2103 switch(factor) {
2104 case 1: fi=0; break;
2105 case 2: fi=1; break;
2106 case 4: fi=2; break;
2107 case 8: fi=3; break;
2108 default:
2109 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2110 abort();
2111 }
2112 if (have_cmov) {
2113 emit_byte(0x0f);
2114 emit_byte(0x40+cond);
2115 emit_byte(0x04+8*d);
2116 emit_byte(0x05+8*index+64*fi);
2117 emit_long(base);
2118 }
2119 else { /* replacement using branch and mov */
2120 int uncc=(cond^1);
2121 emit_byte(0x70+uncc);
2122 emit_byte(7); /* skip next 7 bytes if not cc=true */
2123 emit_byte(0x8b);
2124 emit_byte(0x04+8*d);
2125 emit_byte(0x05+8*index+64*fi);
2126 emit_long(base);
2127 }
2128 }
2129 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2130
2131 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2132 {
2133 if (have_cmov) {
2134 emit_byte(0x0f);
2135 emit_byte(0x40+cond);
2136 emit_byte(0x05+8*d);
2137 emit_long(mem);
2138 }
2139 else { /* replacement using branch and mov */
2140 int uncc=(cond^1);
2141 emit_byte(0x70+uncc);
2142 emit_byte(6); /* skip next 6 bytes if not cc=true */
2143 emit_byte(0x8b);
2144 emit_byte(0x05+8*d);
2145 emit_long(mem);
2146 }
2147 }
2148 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2149
2150 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2151 {
2152 Dif(!isbyte(offset)) abort();
2153 emit_byte(0x8b);
2154 emit_byte(0x40+8*d+s);
2155 emit_byte(offset);
2156 }
2157 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2158
2159 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2160 {
2161 Dif(!isbyte(offset)) abort();
2162 emit_byte(0x66);
2163 emit_byte(0x8b);
2164 emit_byte(0x40+8*d+s);
2165 emit_byte(offset);
2166 }
2167 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2168
2169 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2170 {
2171 Dif(!isbyte(offset)) abort();
2172 emit_byte(0x8a);
2173 emit_byte(0x40+8*d+s);
2174 emit_byte(offset);
2175 }
2176 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2177
2178 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2179 {
2180 emit_byte(0x8b);
2181 emit_byte(0x80+8*d+s);
2182 emit_long(offset);
2183 }
2184 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2185
2186 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2187 {
2188 emit_byte(0x66);
2189 emit_byte(0x8b);
2190 emit_byte(0x80+8*d+s);
2191 emit_long(offset);
2192 }
2193 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2194
2195 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2196 {
2197 emit_byte(0x8a);
2198 emit_byte(0x80+8*d+s);
2199 emit_long(offset);
2200 }
2201 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2202
2203 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2204 {
2205 Dif(!isbyte(offset)) abort();
2206 emit_byte(0xc7);
2207 emit_byte(0x40+d);
2208 emit_byte(offset);
2209 emit_long(i);
2210 }
2211 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2212
2213 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2214 {
2215 Dif(!isbyte(offset)) abort();
2216 emit_byte(0x66);
2217 emit_byte(0xc7);
2218 emit_byte(0x40+d);
2219 emit_byte(offset);
2220 emit_word(i);
2221 }
2222 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2223
2224 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2225 {
2226 Dif(!isbyte(offset)) abort();
2227 emit_byte(0xc6);
2228 emit_byte(0x40+d);
2229 emit_byte(offset);
2230 emit_byte(i);
2231 }
2232 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2233
2234 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2235 {
2236 Dif(!isbyte(offset)) abort();
2237 emit_byte(0x89);
2238 emit_byte(0x40+8*s+d);
2239 emit_byte(offset);
2240 }
2241 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2242
2243 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2244 {
2245 Dif(!isbyte(offset)) abort();
2246 emit_byte(0x66);
2247 emit_byte(0x89);
2248 emit_byte(0x40+8*s+d);
2249 emit_byte(offset);
2250 }
2251 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2252
2253 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2254 {
2255 Dif(!isbyte(offset)) abort();
2256 emit_byte(0x88);
2257 emit_byte(0x40+8*s+d);
2258 emit_byte(offset);
2259 }
2260 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2261
2262 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2263 {
2264 if (optimize_imm8 && isbyte(offset)) {
2265 emit_byte(0x8d);
2266 emit_byte(0x40+8*d+s);
2267 emit_byte(offset);
2268 }
2269 else {
2270 emit_byte(0x8d);
2271 emit_byte(0x80+8*d+s);
2272 emit_long(offset);
2273 }
2274 }
2275 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2276
2277 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2278 {
2279 int fi;
2280
2281 switch(factor) {
2282 case 1: fi=0; break;
2283 case 2: fi=1; break;
2284 case 4: fi=2; break;
2285 case 8: fi=3; break;
2286 default: abort();
2287 }
2288
2289 if (optimize_imm8 && isbyte(offset)) {
2290 emit_byte(0x8d);
2291 emit_byte(0x44+8*d);
2292 emit_byte(0x40*fi+8*index+s);
2293 emit_byte(offset);
2294 }
2295 else {
2296 emit_byte(0x8d);
2297 emit_byte(0x84+8*d);
2298 emit_byte(0x40*fi+8*index+s);
2299 emit_long(offset);
2300 }
2301 }
2302 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2303
2304 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2305 {
2306 int isebp=(s==5)?0x40:0;
2307 int fi;
2308
2309 switch(factor) {
2310 case 1: fi=0; break;
2311 case 2: fi=1; break;
2312 case 4: fi=2; break;
2313 case 8: fi=3; break;
2314 default: abort();
2315 }
2316
2317 emit_byte(0x8d);
2318 emit_byte(0x04+8*d+isebp);
2319 emit_byte(0x40*fi+8*index+s);
2320 if (isebp)
2321 emit_byte(0);
2322 }
2323 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2324
2325 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2326 {
2327 if (optimize_imm8 && isbyte(offset)) {
2328 emit_byte(0x89);
2329 emit_byte(0x40+8*s+d);
2330 emit_byte(offset);
2331 }
2332 else {
2333 emit_byte(0x89);
2334 emit_byte(0x80+8*s+d);
2335 emit_long(offset);
2336 }
2337 }
2338 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2339
2340 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2341 {
2342 emit_byte(0x66);
2343 emit_byte(0x89);
2344 emit_byte(0x80+8*s+d);
2345 emit_long(offset);
2346 }
2347 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2348
2349 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2350 {
2351 if (optimize_imm8 && isbyte(offset)) {
2352 emit_byte(0x88);
2353 emit_byte(0x40+8*s+d);
2354 emit_byte(offset);
2355 }
2356 else {
2357 emit_byte(0x88);
2358 emit_byte(0x80+8*s+d);
2359 emit_long(offset);
2360 }
2361 }
2362 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2363
2364 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2365 {
2366 emit_byte(0x0f);
2367 emit_byte(0xc8+r);
2368 }
2369 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2370
2371 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2372 {
2373 emit_byte(0x66);
2374 emit_byte(0xc1);
2375 emit_byte(0xc0+r);
2376 emit_byte(0x08);
2377 }
2378 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2379
2380 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2381 {
2382 emit_byte(0x89);
2383 emit_byte(0xc0+8*s+d);
2384 }
2385 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2386
2387 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2388 {
2389 emit_byte(0x89);
2390 emit_byte(0x05+8*s);
2391 emit_long(d);
2392 }
2393 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2394
2395 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2396 {
2397 emit_byte(0x66);
2398 emit_byte(0x89);
2399 emit_byte(0x05+8*s);
2400 emit_long(d);
2401 }
2402 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2403
2404 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2405 {
2406 emit_byte(0x66);
2407 emit_byte(0x8b);
2408 emit_byte(0x05+8*d);
2409 emit_long(s);
2410 }
2411 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2412
2413 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2414 {
2415 emit_byte(0x88);
2416 emit_byte(0x05+8*s);
2417 emit_long(d);
2418 }
2419 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2420
2421 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2422 {
2423 emit_byte(0x8a);
2424 emit_byte(0x05+8*d);
2425 emit_long(s);
2426 }
2427 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2428
2429 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2430 {
2431 emit_byte(0xb8+d);
2432 emit_long(s);
2433 }
2434 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2435
2436 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2437 {
2438 emit_byte(0x66);
2439 emit_byte(0xb8+d);
2440 emit_word(s);
2441 }
2442 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2443
2444 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2445 {
2446 emit_byte(0xb0+d);
2447 emit_byte(s);
2448 }
2449 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2450
2451 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2452 {
2453 emit_byte(0x81);
2454 emit_byte(0x15);
2455 emit_long(d);
2456 emit_long(s);
2457 }
2458 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2459
2460 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2461 {
2462 if (optimize_imm8 && isbyte(s)) {
2463 emit_byte(0x83);
2464 emit_byte(0x05);
2465 emit_long(d);
2466 emit_byte(s);
2467 }
2468 else {
2469 emit_byte(0x81);
2470 emit_byte(0x05);
2471 emit_long(d);
2472 emit_long(s);
2473 }
2474 }
2475 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2476
2477 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2478 {
2479 emit_byte(0x66);
2480 emit_byte(0x81);
2481 emit_byte(0x05);
2482 emit_long(d);
2483 emit_word(s);
2484 }
2485 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2486
2487 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2488 {
2489 emit_byte(0x80);
2490 emit_byte(0x05);
2491 emit_long(d);
2492 emit_byte(s);
2493 }
2494 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2495
2496 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2497 {
2498 if (optimize_accum && isaccum(d))
2499 emit_byte(0xa9);
2500 else {
2501 emit_byte(0xf7);
2502 emit_byte(0xc0+d);
2503 }
2504 emit_long(i);
2505 }
2506 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2507
2508 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2509 {
2510 emit_byte(0x85);
2511 emit_byte(0xc0+8*s+d);
2512 }
2513 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2514
2515 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2516 {
2517 emit_byte(0x66);
2518 emit_byte(0x85);
2519 emit_byte(0xc0+8*s+d);
2520 }
2521 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2522
2523 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2524 {
2525 emit_byte(0x84);
2526 emit_byte(0xc0+8*s+d);
2527 }
2528 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2529
2530 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2531 {
2532 emit_byte(0x81);
2533 emit_byte(0xf0+d);
2534 emit_long(i);
2535 }
2536 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2537
2538 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2539 {
2540 if (optimize_imm8 && isbyte(i)) {
2541 emit_byte(0x83);
2542 emit_byte(0xe0+d);
2543 emit_byte(i);
2544 }
2545 else {
2546 if (optimize_accum && isaccum(d))
2547 emit_byte(0x25);
2548 else {
2549 emit_byte(0x81);
2550 emit_byte(0xe0+d);
2551 }
2552 emit_long(i);
2553 }
2554 }
2555 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2556
2557 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2558 {
2559 emit_byte(0x66);
2560 if (optimize_imm8 && isbyte(i)) {
2561 emit_byte(0x83);
2562 emit_byte(0xe0+d);
2563 emit_byte(i);
2564 }
2565 else {
2566 if (optimize_accum && isaccum(d))
2567 emit_byte(0x25);
2568 else {
2569 emit_byte(0x81);
2570 emit_byte(0xe0+d);
2571 }
2572 emit_word(i);
2573 }
2574 }
2575 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2576
2577 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2578 {
2579 emit_byte(0x21);
2580 emit_byte(0xc0+8*s+d);
2581 }
2582 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2583
2584 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2585 {
2586 emit_byte(0x66);
2587 emit_byte(0x21);
2588 emit_byte(0xc0+8*s+d);
2589 }
2590 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2591
2592 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2593 {
2594 emit_byte(0x20);
2595 emit_byte(0xc0+8*s+d);
2596 }
2597 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2598
2599 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2600 {
2601 if (optimize_imm8 && isbyte(i)) {
2602 emit_byte(0x83);
2603 emit_byte(0xc8+d);
2604 emit_byte(i);
2605 }
2606 else {
2607 if (optimize_accum && isaccum(d))
2608 emit_byte(0x0d);
2609 else {
2610 emit_byte(0x81);
2611 emit_byte(0xc8+d);
2612 }
2613 emit_long(i);
2614 }
2615 }
2616 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2617
2618 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2619 {
2620 emit_byte(0x09);
2621 emit_byte(0xc0+8*s+d);
2622 }
2623 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2624
2625 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2626 {
2627 emit_byte(0x66);
2628 emit_byte(0x09);
2629 emit_byte(0xc0+8*s+d);
2630 }
2631 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2632
2633 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2634 {
2635 emit_byte(0x08);
2636 emit_byte(0xc0+8*s+d);
2637 }
2638 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2639
2640 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2641 {
2642 emit_byte(0x11);
2643 emit_byte(0xc0+8*s+d);
2644 }
2645 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2646
2647 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2648 {
2649 emit_byte(0x66);
2650 emit_byte(0x11);
2651 emit_byte(0xc0+8*s+d);
2652 }
2653 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2654
2655 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2656 {
2657 emit_byte(0x10);
2658 emit_byte(0xc0+8*s+d);
2659 }
2660 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2661
2662 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2663 {
2664 emit_byte(0x01);
2665 emit_byte(0xc0+8*s+d);
2666 }
2667 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2668
2669 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2670 {
2671 emit_byte(0x66);
2672 emit_byte(0x01);
2673 emit_byte(0xc0+8*s+d);
2674 }
2675 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2676
2677 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2678 {
2679 emit_byte(0x00);
2680 emit_byte(0xc0+8*s+d);
2681 }
2682 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2683
2684 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2685 {
2686 if (isbyte(i)) {
2687 emit_byte(0x83);
2688 emit_byte(0xe8+d);
2689 emit_byte(i);
2690 }
2691 else {
2692 if (optimize_accum && isaccum(d))
2693 emit_byte(0x2d);
2694 else {
2695 emit_byte(0x81);
2696 emit_byte(0xe8+d);
2697 }
2698 emit_long(i);
2699 }
2700 }
2701 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2702
2703 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2704 {
2705 if (optimize_accum && isaccum(d))
2706 emit_byte(0x2c);
2707 else {
2708 emit_byte(0x80);
2709 emit_byte(0xe8+d);
2710 }
2711 emit_byte(i);
2712 }
2713 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2714
2715 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2716 {
2717 if (isbyte(i)) {
2718 emit_byte(0x83);
2719 emit_byte(0xc0+d);
2720 emit_byte(i);
2721 }
2722 else {
2723 if (optimize_accum && isaccum(d))
2724 emit_byte(0x05);
2725 else {
2726 emit_byte(0x81);
2727 emit_byte(0xc0+d);
2728 }
2729 emit_long(i);
2730 }
2731 }
2732 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2733
2734 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2735 {
2736 emit_byte(0x66);
2737 if (isbyte(i)) {
2738 emit_byte(0x83);
2739 emit_byte(0xc0+d);
2740 emit_byte(i);
2741 }
2742 else {
2743 if (optimize_accum && isaccum(d))
2744 emit_byte(0x05);
2745 else {
2746 emit_byte(0x81);
2747 emit_byte(0xc0+d);
2748 }
2749 emit_word(i);
2750 }
2751 }
2752 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2753
2754 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2755 {
2756 if (optimize_accum && isaccum(d))
2757 emit_byte(0x04);
2758 else {
2759 emit_byte(0x80);
2760 emit_byte(0xc0+d);
2761 }
2762 emit_byte(i);
2763 }
2764 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2765
2766 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2767 {
2768 emit_byte(0x19);
2769 emit_byte(0xc0+8*s+d);
2770 }
2771 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2772
2773 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2774 {
2775 emit_byte(0x66);
2776 emit_byte(0x19);
2777 emit_byte(0xc0+8*s+d);
2778 }
2779 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2780
2781 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2782 {
2783 emit_byte(0x18);
2784 emit_byte(0xc0+8*s+d);
2785 }
2786 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2787
2788 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2789 {
2790 emit_byte(0x29);
2791 emit_byte(0xc0+8*s+d);
2792 }
2793 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2794
2795 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2796 {
2797 emit_byte(0x66);
2798 emit_byte(0x29);
2799 emit_byte(0xc0+8*s+d);
2800 }
2801 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2802
2803 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2804 {
2805 emit_byte(0x28);
2806 emit_byte(0xc0+8*s+d);
2807 }
2808 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2809
2810 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2811 {
2812 emit_byte(0x39);
2813 emit_byte(0xc0+8*s+d);
2814 }
2815 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2816
2817 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2818 {
2819 if (optimize_imm8 && isbyte(i)) {
2820 emit_byte(0x83);
2821 emit_byte(0xf8+r);
2822 emit_byte(i);
2823 }
2824 else {
2825 if (optimize_accum && isaccum(r))
2826 emit_byte(0x3d);
2827 else {
2828 emit_byte(0x81);
2829 emit_byte(0xf8+r);
2830 }
2831 emit_long(i);
2832 }
2833 }
2834 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2835
2836 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2837 {
2838 emit_byte(0x66);
2839 emit_byte(0x39);
2840 emit_byte(0xc0+8*s+d);
2841 }
2842 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2843
2844 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2845 {
2846 emit_byte(0x80);
2847 emit_byte(0x3d);
2848 emit_long(d);
2849 emit_byte(s);
2850 }
2851 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2852
2853 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2854 {
2855 if (optimize_accum && isaccum(d))
2856 emit_byte(0x3c);
2857 else {
2858 emit_byte(0x80);
2859 emit_byte(0xf8+d);
2860 }
2861 emit_byte(i);
2862 }
2863 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2864
2865 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2866 {
2867 emit_byte(0x38);
2868 emit_byte(0xc0+8*s+d);
2869 }
2870 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2871
2872 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2873 {
2874 int fi;
2875
2876 switch(factor) {
2877 case 1: fi=0; break;
2878 case 2: fi=1; break;
2879 case 4: fi=2; break;
2880 case 8: fi=3; break;
2881 default: abort();
2882 }
2883 emit_byte(0x39);
2884 emit_byte(0x04+8*d);
2885 emit_byte(5+8*index+0x40*fi);
2886 emit_long(offset);
2887 }
2888 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2889
2890 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2891 {
2892 emit_byte(0x31);
2893 emit_byte(0xc0+8*s+d);
2894 }
2895 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2896
2897 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2898 {
2899 emit_byte(0x66);
2900 emit_byte(0x31);
2901 emit_byte(0xc0+8*s+d);
2902 }
2903 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2904
2905 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2906 {
2907 emit_byte(0x30);
2908 emit_byte(0xc0+8*s+d);
2909 }
2910 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2911
2912 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2913 {
2914 if (optimize_imm8 && isbyte(s)) {
2915 emit_byte(0x83);
2916 emit_byte(0x2d);
2917 emit_long(d);
2918 emit_byte(s);
2919 }
2920 else {
2921 emit_byte(0x81);
2922 emit_byte(0x2d);
2923 emit_long(d);
2924 emit_long(s);
2925 }
2926 }
2927 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2928
2929 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2930 {
2931 if (optimize_imm8 && isbyte(s)) {
2932 emit_byte(0x83);
2933 emit_byte(0x3d);
2934 emit_long(d);
2935 emit_byte(s);
2936 }
2937 else {
2938 emit_byte(0x81);
2939 emit_byte(0x3d);
2940 emit_long(d);
2941 emit_long(s);
2942 }
2943 }
2944 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2945
2946 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2947 {
2948 emit_byte(0x87);
2949 emit_byte(0xc0+8*r1+r2);
2950 }
2951 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2952
2953 /*************************************************************************
2954 * FIXME: mem access modes probably wrong *
2955 *************************************************************************/
2956
2957 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2958 {
2959 emit_byte(0x9c);
2960 }
2961 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2962
2963 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2964 {
2965 emit_byte(0x9d);
2966 }
2967 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2968
2969 #endif
2970
2971 /*************************************************************************
2972 * Unoptimizable stuff --- jump *
2973 *************************************************************************/
2974
2975 static __inline__ void raw_call_r(R4 r)
2976 {
2977 #if USE_NEW_RTASM
2978 CALLsr(r);
2979 #else
2980 emit_byte(0xff);
2981 emit_byte(0xd0+r);
2982 #endif
2983 }
2984
2985 static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2986 {
2987 #if USE_NEW_RTASM
2988 CALLsm(base, X86_NOREG, r, m);
2989 #else
2990 int mu;
2991 switch(m) {
2992 case 1: mu=0; break;
2993 case 2: mu=1; break;
2994 case 4: mu=2; break;
2995 case 8: mu=3; break;
2996 default: abort();
2997 }
2998 emit_byte(0xff);
2999 emit_byte(0x14);
3000 emit_byte(0x05+8*r+0x40*mu);
3001 emit_long(base);
3002 #endif
3003 }
3004
3005 static __inline__ void raw_jmp_r(R4 r)
3006 {
3007 #if USE_NEW_RTASM
3008 JMPsr(r);
3009 #else
3010 emit_byte(0xff);
3011 emit_byte(0xe0+r);
3012 #endif
3013 }
3014
3015 static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3016 {
3017 #if USE_NEW_RTASM
3018 JMPsm(base, X86_NOREG, r, m);
3019 #else
3020 int mu;
3021 switch(m) {
3022 case 1: mu=0; break;
3023 case 2: mu=1; break;
3024 case 4: mu=2; break;
3025 case 8: mu=3; break;
3026 default: abort();
3027 }
3028 emit_byte(0xff);
3029 emit_byte(0x24);
3030 emit_byte(0x05+8*r+0x40*mu);
3031 emit_long(base);
3032 #endif
3033 }
3034
3035 static __inline__ void raw_jmp_m(uae_u32 base)
3036 {
3037 emit_byte(0xff);
3038 emit_byte(0x25);
3039 emit_long(base);
3040 }
3041
3042
3043 static __inline__ void raw_call(uae_u32 t)
3044 {
3045 #if USE_NEW_RTASM
3046 CALLm(t);
3047 #else
3048 emit_byte(0xe8);
3049 emit_long(t-(uae_u32)target-4);
3050 #endif
3051 }
3052
3053 static __inline__ void raw_jmp(uae_u32 t)
3054 {
3055 #if USE_NEW_RTASM
3056 JMPm(t);
3057 #else
3058 emit_byte(0xe9);
3059 emit_long(t-(uae_u32)target-4);
3060 #endif
3061 }
3062
3063 static __inline__ void raw_jl(uae_u32 t)
3064 {
3065 emit_byte(0x0f);
3066 emit_byte(0x8c);
3067 emit_long(t-(uintptr)target-4);
3068 }
3069
3070 static __inline__ void raw_jz(uae_u32 t)
3071 {
3072 emit_byte(0x0f);
3073 emit_byte(0x84);
3074 emit_long(t-(uintptr)target-4);
3075 }
3076
3077 static __inline__ void raw_jnz(uae_u32 t)
3078 {
3079 emit_byte(0x0f);
3080 emit_byte(0x85);
3081 emit_long(t-(uintptr)target-4);
3082 }
3083
3084 static __inline__ void raw_jnz_l_oponly(void)
3085 {
3086 emit_byte(0x0f);
3087 emit_byte(0x85);
3088 }
3089
3090 static __inline__ void raw_jcc_l_oponly(int cc)
3091 {
3092 emit_byte(0x0f);
3093 emit_byte(0x80+cc);
3094 }
3095
3096 static __inline__ void raw_jnz_b_oponly(void)
3097 {
3098 emit_byte(0x75);
3099 }
3100
3101 static __inline__ void raw_jz_b_oponly(void)
3102 {
3103 emit_byte(0x74);
3104 }
3105
3106 static __inline__ void raw_jcc_b_oponly(int cc)
3107 {
3108 emit_byte(0x70+cc);
3109 }
3110
3111 static __inline__ void raw_jmp_l_oponly(void)
3112 {
3113 emit_byte(0xe9);
3114 }
3115
3116 static __inline__ void raw_jmp_b_oponly(void)
3117 {
3118 emit_byte(0xeb);
3119 }
3120
3121 static __inline__ void raw_ret(void)
3122 {
3123 emit_byte(0xc3);
3124 }
3125
3126 static __inline__ void raw_nop(void)
3127 {
3128 emit_byte(0x90);
3129 }
3130
3131 static __inline__ void raw_emit_nop_filler(int nbytes)
3132 {
3133 /* Source: GNU Binutils 2.12.90.0.15 */
3134 /* Various efficient no-op patterns for aligning code labels.
3135 Note: Don't try to assemble the instructions in the comments.
3136 0L and 0w are not legal. */
3137 static const uae_u8 f32_1[] =
3138 {0x90}; /* nop */
3139 static const uae_u8 f32_2[] =
3140 {0x89,0xf6}; /* movl %esi,%esi */
3141 static const uae_u8 f32_3[] =
3142 {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3143 static const uae_u8 f32_4[] =
3144 {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3145 static const uae_u8 f32_5[] =
3146 {0x90, /* nop */
3147 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3148 static const uae_u8 f32_6[] =
3149 {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3150 static const uae_u8 f32_7[] =
3151 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3152 static const uae_u8 f32_8[] =
3153 {0x90, /* nop */
3154 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3155 static const uae_u8 f32_9[] =
3156 {0x89,0xf6, /* movl %esi,%esi */
3157 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3158 static const uae_u8 f32_10[] =
3159 {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3160 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3161 static const uae_u8 f32_11[] =
3162 {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3163 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3164 static const uae_u8 f32_12[] =
3165 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3166 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3167 static const uae_u8 f32_13[] =
3168 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3169 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3170 static const uae_u8 f32_14[] =
3171 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3172 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3173 static const uae_u8 f32_15[] =
3174 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3175 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3176 static const uae_u8 f32_16[] =
3177 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3178 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3179 static const uae_u8 *const f32_patt[] = {
3180 f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3181 f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3182 };
3183 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3184
3185 #if defined(__x86_64__)
3186 /* The recommended way to pad 64bit code is to use NOPs preceded by
3187 maximally four 0x66 prefixes. Balance the size of nops. */
3188 if (nbytes == 0)
3189 return;
3190
3191 int i;
3192 int nnops = (nbytes + 3) / 4;
3193 int len = nbytes / nnops;
3194 int remains = nbytes - nnops * len;
3195
3196 for (i = 0; i < remains; i++) {
3197 emit_block(prefixes, len);
3198 raw_nop();
3199 }
3200 for (; i < nnops; i++) {
3201 emit_block(prefixes, len - 1);
3202 raw_nop();
3203 }
3204 #else
3205 int nloops = nbytes / 16;
3206 while (nloops-- > 0)
3207 emit_block(f32_16, sizeof(f32_16));
3208
3209 nbytes %= 16;
3210 if (nbytes)
3211 emit_block(f32_patt[nbytes - 1], nbytes);
3212 #endif
3213 }
3214
3215
3216 /*************************************************************************
3217 * Flag handling, to and fro UAE flag register *
3218 *************************************************************************/
3219
3220 #ifdef SAHF_SETO_PROFITABLE
3221
3222 #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3223
3224 static __inline__ void raw_flags_to_reg(int r)
3225 {
3226 raw_lahf(0); /* Most flags in AH */
3227 //raw_setcc(r,0); /* V flag in AL */
3228 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3229
3230 #if 1 /* Let's avoid those nasty partial register stalls */
3231 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3232 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3233 //live.state[FLAGTMP].status=CLEAN;
3234 live.state[FLAGTMP].status=INMEM;
3235 live.state[FLAGTMP].realreg=-1;
3236 /* We just "evicted" FLAGTMP. */
3237 if (live.nat[r].nholds!=1) {
3238 /* Huh? */
3239 abort();
3240 }
3241 live.nat[r].nholds=0;
3242 #endif
3243 }
3244
3245 #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3246 static __inline__ void raw_reg_to_flags(int r)
3247 {
3248 raw_cmp_b_ri(r,-127); /* set V */
3249 raw_sahf(0);
3250 }
3251
3252 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3253 static __inline__ void raw_flags_set_zero(int s, int tmp)
3254 {
3255 raw_mov_l_rr(tmp,s);
3256 raw_lahf(s); /* flags into ah */
3257 raw_and_l_ri(s,0xffffbfff);
3258 raw_and_l_ri(tmp,0x00004000);
3259 raw_xor_l_ri(tmp,0x00004000);
3260 raw_or_l(s,tmp);
3261 raw_sahf(s);
3262 }
3263
3264 #else
3265
3266 #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3267 static __inline__ void raw_flags_to_reg(int r)
3268 {
3269 raw_pushfl();
3270 raw_pop_l_r(r);
3271 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3272 // live.state[FLAGTMP].status=CLEAN;
3273 live.state[FLAGTMP].status=INMEM;
3274 live.state[FLAGTMP].realreg=-1;
3275 /* We just "evicted" FLAGTMP. */
3276 if (live.nat[r].nholds!=1) {
3277 /* Huh? */
3278 abort();
3279 }
3280 live.nat[r].nholds=0;
3281 }
3282
3283 #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3284 static __inline__ void raw_reg_to_flags(int r)
3285 {
3286 raw_push_l_r(r);
3287 raw_popfl();
3288 }
3289
3290 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3291 static __inline__ void raw_flags_set_zero(int s, int tmp)
3292 {
3293 raw_mov_l_rr(tmp,s);
3294 raw_pushfl();
3295 raw_pop_l_r(s);
3296 raw_and_l_ri(s,0xffffffbf);
3297 raw_and_l_ri(tmp,0x00000040);
3298 raw_xor_l_ri(tmp,0x00000040);
3299 raw_or_l(s,tmp);
3300 raw_push_l_r(s);
3301 raw_popfl();
3302 }
3303 #endif
3304
3305 /* Apparently, there are enough instructions between flag store and
3306 flag reload to avoid the partial memory stall */
3307 static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3308 {
3309 #if 1
3310 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3311 #else
3312 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3313 raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3314 #endif
3315 }
3316
3317 /* FLAGX is byte sized, and we *do* write it at that size */
3318 static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3319 {
3320 if (live.nat[target].canbyte)
3321 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3322 else if (live.nat[target].canword)
3323 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3324 else
3325 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3326 }
3327
3328 static __inline__ void raw_dec_sp(int off)
3329 {
3330 if (off) raw_sub_l_ri(ESP_INDEX,off);
3331 }
3332
3333 static __inline__ void raw_inc_sp(int off)
3334 {
3335 if (off) raw_add_l_ri(ESP_INDEX,off);
3336 }
3337
3338 /*************************************************************************
3339 * Handling mistaken direct memory access *
3340 *************************************************************************/
3341
3342 // gb-- I don't need that part for JIT Basilisk II
3343 #if defined(NATMEM_OFFSET) && 0
3344 #include <asm/sigcontext.h>
3345 #include <signal.h>
3346
3347 #define SIG_READ 1
3348 #define SIG_WRITE 2
3349
3350 static int in_handler=0;
3351 static uae_u8 veccode[256];
3352
3353 static void vec(int x, struct sigcontext sc)
3354 {
3355 uae_u8* i=(uae_u8*)sc.eip;
3356 uae_u32 addr=sc.cr2;
3357 int r=-1;
3358 int size=4;
3359 int dir=-1;
3360 int len=0;
3361 int j;
3362
3363 write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3364 if (!canbang)
3365 write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3366 if (in_handler)
3367 write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3368
3369 if (canbang && i>=compiled_code && i<=current_compile_p) {
3370 if (*i==0x66) {
3371 i++;
3372 size=2;
3373 len++;
3374 }
3375
3376 switch(i[0]) {
3377 case 0x8a:
3378 if ((i[1]&0xc0)==0x80) {
3379 r=(i[1]>>3)&7;
3380 dir=SIG_READ;
3381 size=1;
3382 len+=6;
3383 break;
3384 }
3385 break;
3386 case 0x88:
3387 if ((i[1]&0xc0)==0x80) {
3388 r=(i[1]>>3)&7;
3389 dir=SIG_WRITE;
3390 size=1;
3391 len+=6;
3392 break;
3393 }
3394 break;
3395 case 0x8b:
3396 if ((i[1]&0xc0)==0x80) {
3397 r=(i[1]>>3)&7;
3398 dir=SIG_READ;
3399 len+=6;
3400 break;
3401 }
3402 if ((i[1]&0xc0)==0x40) {
3403 r=(i[1]>>3)&7;
3404 dir=SIG_READ;
3405 len+=3;
3406 break;
3407 }
3408 break;
3409 case 0x89:
3410 if ((i[1]&0xc0)==0x80) {
3411 r=(i[1]>>3)&7;
3412 dir=SIG_WRITE;
3413 len+=6;
3414 break;
3415 }
3416 if ((i[1]&0xc0)==0x40) {
3417 r=(i[1]>>3)&7;
3418 dir=SIG_WRITE;
3419 len+=3;
3420 break;
3421 }
3422 break;
3423 }
3424 }
3425
3426 if (r!=-1) {
3427 void* pr=NULL;
3428 write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3429
3430 switch(r) {
3431 case 0: pr=&(sc.eax); break;
3432 case 1: pr=&(sc.ecx); break;
3433 case 2: pr=&(sc.edx); break;
3434 case 3: pr=&(sc.ebx); break;
3435 case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3436 case 5: pr=(size>1)?
3437 (void*)(&(sc.ebp)):
3438 (void*)(((uae_u8*)&(sc.ecx))+1); break;
3439 case 6: pr=(size>1)?
3440 (void*)(&(sc.esi)):
3441 (void*)(((uae_u8*)&(sc.edx))+1); break;
3442 case 7: pr=(size>1)?
3443 (void*)(&(sc.edi)):
3444 (void*)(((uae_u8*)&(sc.ebx))+1); break;
3445 default: abort();
3446 }
3447 if (pr) {
3448 blockinfo* bi;
3449
3450 if (currprefs.comp_oldsegv) {
3451 addr-=NATMEM_OFFSET;
3452
3453 if ((addr>=0x10000000 && addr<0x40000000) ||
3454 (addr>=0x50000000)) {
3455 write_log("Suspicious address in %x SEGV handler.\n",addr);
3456 }
3457 if (dir==SIG_READ) {
3458 switch(size) {
3459 case 1: *((uae_u8*)pr)=get_byte(addr); break;
3460 case 2: *((uae_u16*)pr)=get_word(addr); break;
3461 case 4: *((uae_u32*)pr)=get_long(addr); break;
3462 default: abort();
3463 }
3464 }
3465 else { /* write */
3466 switch(size) {
3467 case 1: put_byte(addr,*((uae_u8*)pr)); break;
3468 case 2: put_word(addr,*((uae_u16*)pr)); break;
3469 case 4: put_long(addr,*((uae_u32*)pr)); break;
3470 default: abort();
3471 }
3472 }
3473 write_log("Handled one access!\n");
3474 fflush(stdout);
3475 segvcount++;
3476 sc.eip+=len;
3477 }
3478 else {
3479 void* tmp=target;
3480 int i;
3481 uae_u8 vecbuf[5];
3482
3483 addr-=NATMEM_OFFSET;
3484
3485 if ((addr>=0x10000000 && addr<0x40000000) ||
3486 (addr>=0x50000000)) {
3487 write_log("Suspicious address in %x SEGV handler.\n",addr);
3488 }
3489
3490 target=(uae_u8*)sc.eip;
3491 for (i=0;i<5;i++)
3492 vecbuf[i]=target[i];
3493 emit_byte(0xe9);
3494 emit_long((uintptr)veccode-(uintptr)target-4);
3495 write_log("Create jump to %p\n",veccode);
3496
3497 write_log("Handled one access!\n");
3498 fflush(stdout);
3499 segvcount++;
3500
3501 target=veccode;
3502
3503 if (dir==SIG_READ) {
3504 switch(size) {
3505 case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3506 case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3507 case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3508 default: abort();
3509 }
3510 }
3511 else { /* write */
3512 switch(size) {
3513 case 1: put_byte(addr,*((uae_u8*)pr)); break;
3514 case 2: put_word(addr,*((uae_u16*)pr)); break;
3515 case 4: put_long(addr,*((uae_u32*)pr)); break;
3516 default: abort();
3517 }
3518 }
3519 for (i=0;i<5;i++)
3520 raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3521 raw_mov_l_mi((uintptr)&in_handler,0);
3522 emit_byte(0xe9);
3523 emit_long(sc.eip+len-(uintptr)target-4);
3524 in_handler=1;
3525 target=tmp;
3526 }
3527 bi=active;
3528 while (bi) {
3529 if (bi->handler &&
3530 (uae_u8*)bi->direct_handler<=i &&
3531 (uae_u8*)bi->nexthandler>i) {
3532 write_log("deleted trigger (%p<%p<%p) %p\n",
3533 bi->handler,
3534 i,
3535 bi->nexthandler,
3536 bi->pc_p);
3537 invalidate_block(bi);
3538 raise_in_cl_list(bi);
3539 set_special(0);
3540 return;
3541 }
3542 bi=bi->next;
3543 }
3544 /* Not found in the active list. Might be a rom routine that
3545 is in the dormant list */
3546 bi=dormant;
3547 while (bi) {
3548 if (bi->handler &&
3549 (uae_u8*)bi->direct_handler<=i &&
3550 (uae_u8*)bi->nexthandler>i) {
3551 write_log("deleted trigger (%p<%p<%p) %p\n",
3552 bi->handler,
3553 i,
3554 bi->nexthandler,
3555 bi->pc_p);
3556 invalidate_block(bi);
3557 raise_in_cl_list(bi);
3558 set_special(0);
3559 return;
3560 }
3561 bi=bi->next;
3562 }
3563 write_log("Huh? Could not find trigger!\n");
3564 return;
3565 }
3566 }
3567 write_log("Can't handle access!\n");
3568 for (j=0;j<10;j++) {
3569 write_log("instruction byte %2d is %02x\n",j,i[j]);
3570 }
3571 write_log("Please send the above info (starting at \"fault address\") to\n"
3572 "bmeyer@csse.monash.edu.au\n"
3573 "This shouldn't happen ;-)\n");
3574 fflush(stdout);
3575 signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3576 }
3577 #endif
3578
3579
3580 /*************************************************************************
3581 * Checking for CPU features *
3582 *************************************************************************/
3583
3584 struct cpuinfo_x86 {
3585 uae_u8 x86; // CPU family
3586 uae_u8 x86_vendor; // CPU vendor
3587 uae_u8 x86_processor; // CPU canonical processor type
3588 uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3589 uae_u32 x86_hwcap;
3590 uae_u8 x86_model;
3591 uae_u8 x86_mask;
3592 int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3593 char x86_vendor_id[16];
3594 };
3595 struct cpuinfo_x86 cpuinfo;
3596
3597 enum {
3598 X86_VENDOR_INTEL = 0,
3599 X86_VENDOR_CYRIX = 1,
3600 X86_VENDOR_AMD = 2,
3601 X86_VENDOR_UMC = 3,
3602 X86_VENDOR_NEXGEN = 4,
3603 X86_VENDOR_CENTAUR = 5,
3604 X86_VENDOR_RISE = 6,
3605 X86_VENDOR_TRANSMETA = 7,
3606 X86_VENDOR_NSC = 8,
3607 X86_VENDOR_UNKNOWN = 0xff
3608 };
3609
3610 enum {
3611 X86_PROCESSOR_I386, /* 80386 */
3612 X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3613 X86_PROCESSOR_PENTIUM,
3614 X86_PROCESSOR_PENTIUMPRO,
3615 X86_PROCESSOR_K6,
3616 X86_PROCESSOR_ATHLON,
3617 X86_PROCESSOR_PENTIUM4,
3618 X86_PROCESSOR_X86_64,
3619 X86_PROCESSOR_max
3620 };
3621
3622 static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3623 "80386",
3624 "80486",
3625 "Pentium",
3626 "PentiumPro",
3627 "K6",
3628 "Athlon",
3629 "Pentium4",
3630 "x86-64"
3631 };
3632
3633 static struct ptt {
3634 const int align_loop;
3635 const int align_loop_max_skip;
3636 const int align_jump;
3637 const int align_jump_max_skip;
3638 const int align_func;
3639 }
3640 x86_alignments[X86_PROCESSOR_max] = {
3641 { 4, 3, 4, 3, 4 },
3642 { 16, 15, 16, 15, 16 },
3643 { 16, 7, 16, 7, 16 },
3644 { 16, 15, 16, 7, 16 },
3645 { 32, 7, 32, 7, 32 },
3646 { 16, 7, 16, 7, 16 },
3647 { 0, 0, 0, 0, 0 },
3648 { 16, 7, 16, 7, 16 }
3649 };
3650
3651 static void
3652 x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3653 {
3654 char *v = c->x86_vendor_id;
3655
3656 if (!strcmp(v, "GenuineIntel"))
3657 c->x86_vendor = X86_VENDOR_INTEL;
3658 else if (!strcmp(v, "AuthenticAMD"))
3659 c->x86_vendor = X86_VENDOR_AMD;
3660 else if (!strcmp(v, "CyrixInstead"))
3661 c->x86_vendor = X86_VENDOR_CYRIX;
3662 else if (!strcmp(v, "Geode by NSC"))
3663 c->x86_vendor = X86_VENDOR_NSC;
3664 else if (!strcmp(v, "UMC UMC UMC "))
3665 c->x86_vendor = X86_VENDOR_UMC;
3666 else if (!strcmp(v, "CentaurHauls"))
3667 c->x86_vendor = X86_VENDOR_CENTAUR;
3668 else if (!strcmp(v, "NexGenDriven"))
3669 c->x86_vendor = X86_VENDOR_NEXGEN;
3670 else if (!strcmp(v, "RiseRiseRise"))
3671 c->x86_vendor = X86_VENDOR_RISE;
3672 else if (!strcmp(v, "GenuineTMx86") ||
3673 !strcmp(v, "TransmetaCPU"))
3674 c->x86_vendor = X86_VENDOR_TRANSMETA;
3675 else
3676 c->x86_vendor = X86_VENDOR_UNKNOWN;
3677 }
3678
3679 static void
3680 cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3681 {
3682 const int CPUID_SPACE = 4096;
3683 uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3684 if (cpuid_space == VM_MAP_FAILED)
3685 abort();
3686 vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3687
3688 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3689 uae_u8* tmp=get_target();
3690
3691 s_op = op;
3692 set_target(cpuid_space);
3693 raw_push_l_r(0); /* eax */
3694 raw_push_l_r(1); /* ecx */
3695 raw_push_l_r(2); /* edx */
3696 raw_push_l_r(3); /* ebx */
3697 raw_mov_l_rm(0,(uintptr)&s_op);
3698 raw_cpuid(0);
3699 raw_mov_l_mr((uintptr)&s_eax,0);
3700 raw_mov_l_mr((uintptr)&s_ebx,3);
3701 raw_mov_l_mr((uintptr)&s_ecx,1);
3702 raw_mov_l_mr((uintptr)&s_edx,2);
3703 raw_pop_l_r(3);
3704 raw_pop_l_r(2);
3705 raw_pop_l_r(1);
3706 raw_pop_l_r(0);
3707 raw_ret();
3708 set_target(tmp);
3709
3710 ((cpuop_func*)cpuid_space)(0);
3711 if (eax != NULL) *eax = s_eax;
3712 if (ebx != NULL) *ebx = s_ebx;
3713 if (ecx != NULL) *ecx = s_ecx;
3714 if (edx != NULL) *edx = s_edx;
3715
3716 vm_release(cpuid_space, CPUID_SPACE);
3717 }
3718
3719 static void
3720 raw_init_cpu(void)
3721 {
3722 struct cpuinfo_x86 *c = &cpuinfo;
3723
3724 /* Defaults */
3725 c->x86_processor = X86_PROCESSOR_max;
3726 c->x86_vendor = X86_VENDOR_UNKNOWN;
3727 c->cpuid_level = -1; /* CPUID not detected */
3728 c->x86_model = c->x86_mask = 0; /* So far unknown... */
3729 c->x86_vendor_id[0] = '\0'; /* Unset */
3730 c->x86_hwcap = 0;
3731
3732 /* Get vendor name */
3733 c->x86_vendor_id[12] = '\0';
3734 cpuid(0x00000000,
3735 (uae_u32 *)&c->cpuid_level,
3736 (uae_u32 *)&c->x86_vendor_id[0],
3737 (uae_u32 *)&c->x86_vendor_id[8],
3738 (uae_u32 *)&c->x86_vendor_id[4]);
3739 x86_get_cpu_vendor(c);
3740
3741 /* Intel-defined flags: level 0x00000001 */
3742 c->x86_brand_id = 0;
3743 if ( c->cpuid_level >= 0x00000001 ) {
3744 uae_u32 tfms, brand_id;
3745 cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3746 c->x86 = (tfms >> 8) & 15;
3747 if (c->x86 == 0xf)
3748 c->x86 += (tfms >> 20) & 0xff; /* extended family */
3749 c->x86_model = (tfms >> 4) & 15;
3750 if (c->x86_model == 0xf)
3751 c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3752 c->x86_brand_id = brand_id & 0xff;
3753 c->x86_mask = tfms & 15;
3754 } else {
3755 /* Have CPUID level 0 only - unheard of */
3756 c->x86 = 4;
3757 }
3758
3759 /* AMD-defined flags: level 0x80000001 */
3760 uae_u32 xlvl;
3761 cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3762 if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3763 if ( xlvl >= 0x80000001 ) {
3764 uae_u32 features, extra_features;
3765 cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3766 if (features & (1 << 29)) {
3767 /* Assume x86-64 if long mode is supported */
3768 c->x86_processor = X86_PROCESSOR_X86_64;
3769 }
3770 if (extra_features & (1 << 0))
3771 have_lahf_lm = true;
3772 }
3773 }
3774
3775 /* Canonicalize processor ID */
3776 switch (c->x86) {
3777 case 3:
3778 c->x86_processor = X86_PROCESSOR_I386;
3779 break;
3780 case 4:
3781 c->x86_processor = X86_PROCESSOR_I486;
3782 break;
3783 case 5:
3784 if (c->x86_vendor == X86_VENDOR_AMD)
3785 c->x86_processor = X86_PROCESSOR_K6;
3786 else
3787 c->x86_processor = X86_PROCESSOR_PENTIUM;
3788 break;
3789 case 6:
3790 if (c->x86_vendor == X86_VENDOR_AMD)
3791 c->x86_processor = X86_PROCESSOR_ATHLON;
3792 else
3793 c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3794 break;
3795 case 15:
3796 if (c->x86_processor == X86_PROCESSOR_max) {
3797 switch (c->x86_vendor) {
3798 case X86_VENDOR_INTEL:
3799 c->x86_processor = X86_PROCESSOR_PENTIUM4;
3800 break;
3801 case X86_VENDOR_AMD:
3802 /* Assume a 32-bit Athlon processor if not in long mode */
3803 c->x86_processor = X86_PROCESSOR_ATHLON;
3804 break;
3805 }
3806 }
3807 break;
3808 }
3809 if (c->x86_processor == X86_PROCESSOR_max) {
3810 c->x86_processor = X86_PROCESSOR_I386;
3811 fprintf(stderr, "Error: unknown processor type, assuming i386\n");
3812 fprintf(stderr, " Family : %d\n", c->x86);
3813 fprintf(stderr, " Model : %d\n", c->x86_model);
3814 fprintf(stderr, " Mask : %d\n", c->x86_mask);
3815 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3816 if (c->x86_brand_id)
3817 fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3818 }
3819
3820 /* Have CMOV support? */
3821 have_cmov = c->x86_hwcap & (1 << 15);
3822
3823 /* Can the host CPU suffer from partial register stalls? */
3824 have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3825 #if 1
3826 /* It appears that partial register writes are a bad idea even on
3827 AMD K7 cores, even though they are not supposed to have the
3828 dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3829 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3830 have_rat_stall = true;
3831 #endif
3832
3833 /* Alignments */
3834 if (tune_alignment) {
3835 align_loops = x86_alignments[c->x86_processor].align_loop;
3836 align_jumps = x86_alignments[c->x86_processor].align_jump;
3837 }
3838
3839 write_log("Max CPUID level=%d Processor is %s [%s]\n",
3840 c->cpuid_level, c->x86_vendor_id,
3841 x86_processor_string_table[c->x86_processor]);
3842 }
3843
3844 static bool target_check_bsf(void)
3845 {
3846 bool mismatch = false;
3847 for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3848 for (int g_CF = 0; g_CF <= 1; g_CF++) {
3849 for (int g_OF = 0; g_OF <= 1; g_OF++) {
3850 for (int g_SF = 0; g_SF <= 1; g_SF++) {
3851 for (int value = -1; value <= 1; value++) {
3852 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3853 unsigned long tmp = value;
3854 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3855 : "+r" (flags), "+r" (tmp) : : "cc");
3856 int OF = (flags >> 11) & 1;
3857 int SF = (flags >> 7) & 1;
3858 int ZF = (flags >> 6) & 1;
3859 int CF = flags & 1;
3860 tmp = (value == 0);
3861 if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3862 mismatch = true;
3863 }
3864 }}}}
3865 if (mismatch)
3866 write_log("Target CPU defines all flags on BSF instruction\n");
3867 return !mismatch;
3868 }
3869
3870
3871 /*************************************************************************
3872 * FPU stuff *
3873 *************************************************************************/
3874
3875
3876 static __inline__ void raw_fp_init(void)
3877 {
3878 int i;
3879
3880 for (i=0;i<N_FREGS;i++)
3881 live.spos[i]=-2;
3882 live.tos=-1; /* Stack is empty */
3883 }
3884
3885 static __inline__ void raw_fp_cleanup_drop(void)
3886 {
3887 #if 0
3888 /* using FINIT instead of popping all the entries.
3889 Seems to have side effects --- there is display corruption in
3890 Quake when this is used */
3891 if (live.tos>1) {
3892 emit_byte(0x9b);
3893 emit_byte(0xdb);
3894 emit_byte(0xe3);
3895 live.tos=-1;
3896 }
3897 #endif
3898 while (live.tos>=1) {
3899 emit_byte(0xde);
3900 emit_byte(0xd9);
3901 live.tos-=2;
3902 }
3903 while (live.tos>=0) {
3904 emit_byte(0xdd);
3905 emit_byte(0xd8);
3906 live.tos--;
3907 }
3908 raw_fp_init();
3909 }
3910
3911 static __inline__ void make_tos(int r)
3912 {
3913 int p,q;
3914
3915 if (live.spos[r]<0) { /* Register not yet on stack */
3916 emit_byte(0xd9);
3917 emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3918 live.tos++;
3919 live.spos[r]=live.tos;
3920 live.onstack[live.tos]=r;
3921 return;
3922 }
3923 /* Register is on stack */
3924 if (live.tos==live.spos[r])
3925 return;
3926 p=live.spos[r];
3927 q=live.onstack[live.tos];
3928
3929 emit_byte(0xd9);
3930 emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3931 live.onstack[live.tos]=r;
3932 live.spos[r]=live.tos;
3933 live.onstack[p]=q;
3934 live.spos[q]=p;
3935 }
3936
3937 static __inline__ void make_tos2(int r, int r2)
3938 {
3939 int q;
3940
3941 make_tos(r2); /* Put the reg that's supposed to end up in position2
3942 on top */
3943
3944 if (live.spos[r]<0) { /* Register not yet on stack */
3945 make_tos(r); /* This will extend the stack */
3946 return;
3947 }
3948 /* Register is on stack */
3949 emit_byte(0xd9);
3950 emit_byte(0xc9); /* Move r2 into position 2 */
3951
3952 q=live.onstack[live.tos-1];
3953 live.onstack[live.tos]=q;
3954 live.spos[q]=live.tos;
3955 live.onstack[live.tos-1]=r2;
3956 live.spos[r2]=live.tos-1;
3957
3958 make_tos(r); /* And r into 1 */
3959 }
3960
3961 static __inline__ int stackpos(int r)
3962 {
3963 if (live.spos[r]<0)
3964 abort();
3965 if (live.tos<live.spos[r]) {
3966 printf("Looking for spos for fnreg %d\n",r);
3967 abort();
3968 }
3969 return live.tos-live.spos[r];
3970 }
3971
3972 static __inline__ void usereg(int r)
3973 {
3974 if (live.spos[r]<0)
3975 make_tos(r);
3976 }
3977
3978 /* This is called with one FP value in a reg *above* tos, which it will
3979 pop off the stack if necessary */
3980 static __inline__ void tos_make(int r)
3981 {
3982 if (live.spos[r]<0) {
3983 live.tos++;
3984 live.spos[r]=live.tos;
3985 live.onstack[live.tos]=r;
3986 return;
3987 }
3988 emit_byte(0xdd);
3989 emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3990 and pop it*/
3991 }
3992
3993 /* FP helper functions */
3994 #if USE_NEW_RTASM
3995 #define DEFINE_OP(NAME, GEN) \
3996 static inline void raw_##NAME(uint32 m) \
3997 { \
3998 GEN(m, X86_NOREG, X86_NOREG, 1); \
3999 }
4000 DEFINE_OP(fstl, FSTLm);
4001 DEFINE_OP(fstpl, FSTPLm);
4002 DEFINE_OP(fldl, FLDLm);
4003 DEFINE_OP(fildl, FILDLm);
4004 DEFINE_OP(fistl, FISTLm);
4005 DEFINE_OP(flds, FLDSm);
4006 DEFINE_OP(fsts, FSTSm);
4007 DEFINE_OP(fstpt, FSTPTm);
4008 DEFINE_OP(fldt, FLDTm);
4009 #else
4010 #define DEFINE_OP(NAME, OP1, OP2) \
4011 static inline void raw_##NAME(uint32 m) \
4012 { \
4013 emit_byte(OP1); \
4014 emit_byte(OP2); \
4015 emit_long(m); \
4016 }
4017 DEFINE_OP(fstl, 0xdd, 0x15);
4018 DEFINE_OP(fstpl, 0xdd, 0x1d);
4019 DEFINE_OP(fldl, 0xdd, 0x05);
4020 DEFINE_OP(fildl, 0xdb, 0x05);
4021 DEFINE_OP(fistl, 0xdb, 0x15);
4022 DEFINE_OP(flds, 0xd9, 0x05);
4023 DEFINE_OP(fsts, 0xd9, 0x15);
4024 DEFINE_OP(fstpt, 0xdb, 0x3d);
4025 DEFINE_OP(fldt, 0xdb, 0x2d);
4026 #endif
4027 #undef DEFINE_OP
4028
4029 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4030 {
4031 make_tos(r);
4032 raw_fstl(m);
4033 }
4034 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4035
4036 LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4037 {
4038 make_tos(r);
4039 raw_fstpl(m);
4040 live.onstack[live.tos]=-1;
4041 live.tos--;
4042 live.spos[r]=-2;
4043 }
4044 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4045
4046 LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4047 {
4048 raw_fldl(m);
4049 tos_make(r);
4050 }
4051 LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4052
4053 LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4054 {
4055 raw_fildl(m);
4056 tos_make(r);
4057 }
4058 LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4059
4060 LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4061 {
4062 make_tos(r);
4063 raw_fistl(m);
4064 }
4065 LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4066
4067 LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4068 {
4069 raw_flds(m);
4070 tos_make(r);
4071 }
4072 LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4073
4074 LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4075 {
4076 make_tos(r);
4077 raw_fsts(m);
4078 }
4079 LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4080
4081 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4082 {
4083 int rs;
4084
4085 /* Stupid x87 can't write a long double to mem without popping the
4086 stack! */
4087 usereg(r);
4088 rs=stackpos(r);
4089 emit_byte(0xd9); /* Get a copy to the top of stack */
4090 emit_byte(0xc0+rs);
4091
4092 raw_fstpt(m); /* store and pop it */
4093 }
4094 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4095
4096 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4097 {
4098 int rs;
4099
4100 make_tos(r);
4101 raw_fstpt(m); /* store and pop it */
4102 live.onstack[live.tos]=-1;
4103 live.tos--;
4104 live.spos[r]=-2;
4105 }
4106 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4107
4108 LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4109 {
4110 raw_fldt(m);
4111 tos_make(r);
4112 }
4113 LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4114
4115 LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4116 {
4117 emit_byte(0xd9);
4118 emit_byte(0xeb);
4119 tos_make(r);
4120 }
4121 LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4122
4123 LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4124 {
4125 emit_byte(0xd9);
4126 emit_byte(0xec);
4127 tos_make(r);
4128 }
4129 LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4130
4131 LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4132 {
4133 emit_byte(0xd9);
4134 emit_byte(0xea);
4135 tos_make(r);
4136 }
4137 LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4138
4139 LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4140 {
4141 emit_byte(0xd9);
4142 emit_byte(0xed);
4143 tos_make(r);
4144 }
4145 LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4146
4147 LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4148 {
4149 emit_byte(0xd9);
4150 emit_byte(0xe8);
4151 tos_make(r);
4152 }
4153 LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4154
4155 LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4156 {
4157 emit_byte(0xd9);
4158 emit_byte(0xee);
4159 tos_make(r);
4160 }
4161 LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4162
4163 LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4164 {
4165 int ds;
4166
4167 usereg(s);
4168 ds=stackpos(s);
4169 if (ds==0 && live.spos[d]>=0) {
4170 /* source is on top of stack, and we already have the dest */
4171 int dd=stackpos(d);
4172 emit_byte(0xdd);
4173 emit_byte(0xd0+dd);
4174 }
4175 else {
4176 emit_byte(0xd9);
4177 emit_byte(0xc0+ds); /* duplicate source on tos */
4178 tos_make(d); /* store to destination, pop if necessary */
4179 }
4180 }
4181 LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4182
4183 LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4184 {
4185 emit_byte(0xd9);
4186 emit_byte(0xa8+index);
4187 emit_long(base);
4188 }
4189 LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4190
4191
4192 LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4193 {
4194 int ds;
4195
4196 if (d!=s) {
4197 usereg(s);
4198 ds=stackpos(s);
4199 emit_byte(0xd9);
4200 emit_byte(0xc0+ds); /* duplicate source */
4201 emit_byte(0xd9);
4202 emit_byte(0xfa); /* take square root */
4203 tos_make(d); /* store to destination */
4204 }
4205 else {
4206 make_tos(d);
4207 emit_byte(0xd9);
4208 emit_byte(0xfa); /* take square root */
4209 }
4210 }
4211 LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4212
4213 LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4214 {
4215 int ds;
4216
4217 if (d!=s) {
4218 usereg(s);
4219 ds=stackpos(s);
4220 emit_byte(0xd9);
4221 emit_byte(0xc0+ds); /* duplicate source */
4222 emit_byte(0xd9);
4223 emit_byte(0xe1); /* take fabs */
4224 tos_make(d); /* store to destination */
4225 }
4226 else {
4227 make_tos(d);
4228 emit_byte(0xd9);
4229 emit_byte(0xe1); /* take fabs */
4230 }
4231 }
4232 LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4233
4234 LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4235 {
4236 int ds;
4237
4238 if (d!=s) {
4239 usereg(s);
4240 ds=stackpos(s);
4241 emit_byte(0xd9);
4242 emit_byte(0xc0+ds); /* duplicate source */
4243 emit_byte(0xd9);
4244 emit_byte(0xfc); /* take frndint */
4245 tos_make(d); /* store to destination */
4246 }
4247 else {
4248 make_tos(d);
4249 emit_byte(0xd9);
4250 emit_byte(0xfc); /* take frndint */
4251 }
4252 }
4253 LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4254
4255 LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4256 {
4257 int ds;
4258
4259 if (d!=s) {
4260 usereg(s);
4261 ds=stackpos(s);
4262 emit_byte(0xd9);
4263 emit_byte(0xc0+ds); /* duplicate source */
4264 emit_byte(0xd9);
4265 emit_byte(0xff); /* take cos */
4266 tos_make(d); /* store to destination */
4267 }
4268 else {
4269 make_tos(d);
4270 emit_byte(0xd9);
4271 emit_byte(0xff); /* take cos */
4272 }
4273 }
4274 LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4275
4276 LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4277 {
4278 int ds;
4279
4280 if (d!=s) {
4281 usereg(s);
4282 ds=stackpos(s);
4283 emit_byte(0xd9);
4284 emit_byte(0xc0+ds); /* duplicate source */
4285 emit_byte(0xd9);
4286 emit_byte(0xfe); /* take sin */
4287 tos_make(d); /* store to destination */
4288 }
4289 else {
4290 make_tos(d);
4291 emit_byte(0xd9);
4292 emit_byte(0xfe); /* take sin */
4293 }
4294 }
4295 LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4296
4297 double one=1;
4298 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4299 {
4300 int ds;
4301
4302 usereg(s);
4303 ds=stackpos(s);
4304 emit_byte(0xd9);
4305 emit_byte(0xc0+ds); /* duplicate source */
4306
4307 emit_byte(0xd9);
4308 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4309 emit_byte(0xd9);
4310 emit_byte(0xfc); /* rndint */
4311 emit_byte(0xd9);
4312 emit_byte(0xc9); /* swap top two elements */
4313 emit_byte(0xd8);
4314 emit_byte(0xe1); /* subtract rounded from original */
4315 emit_byte(0xd9);
4316 emit_byte(0xf0); /* f2xm1 */
4317 emit_byte(0xdc);
4318 emit_byte(0x05);
4319 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4320 emit_byte(0xd9);
4321 emit_byte(0xfd); /* and scale it */
4322 emit_byte(0xdd);
4323 emit_byte(0xd9); /* take he rounded value off */
4324 tos_make(d); /* store to destination */
4325 }
4326 LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4327
4328 LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4329 {
4330 int ds;
4331
4332 usereg(s);
4333 ds=stackpos(s);
4334 emit_byte(0xd9);
4335 emit_byte(0xc0+ds); /* duplicate source */
4336 emit_byte(0xd9);
4337 emit_byte(0xea); /* fldl2e */
4338 emit_byte(0xde);
4339 emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4340
4341 emit_byte(0xd9);
4342 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4343 emit_byte(0xd9);
4344 emit_byte(0xfc); /* rndint */
4345 emit_byte(0xd9);
4346 emit_byte(0xc9); /* swap top two elements */
4347 emit_byte(0xd8);
4348 emit_byte(0xe1); /* subtract rounded from original */
4349 emit_byte(0xd9);
4350 emit_byte(0xf0); /* f2xm1 */
4351 emit_byte(0xdc);
4352 emit_byte(0x05);
4353 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4354 emit_byte(0xd9);
4355 emit_byte(0xfd); /* and scale it */
4356 emit_byte(0xdd);
4357 emit_byte(0xd9); /* take he rounded value off */
4358 tos_make(d); /* store to destination */
4359 }
4360 LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4361
4362 LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4363 {
4364 int ds;
4365
4366 usereg(s);
4367 ds=stackpos(s);
4368 emit_byte(0xd9);
4369 emit_byte(0xc0+ds); /* duplicate source */
4370 emit_byte(0xd9);
4371 emit_byte(0xe8); /* push '1' */
4372 emit_byte(0xd9);
4373 emit_byte(0xc9); /* swap top two */
4374 emit_byte(0xd9);
4375 emit_byte(0xf1); /* take 1*log2(x) */
4376 tos_make(d); /* store to destination */
4377 }
4378 LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4379
4380
4381 LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4382 {
4383 int ds;
4384
4385 if (d!=s) {
4386 usereg(s);
4387 ds=stackpos(s);
4388 emit_byte(0xd9);
4389 emit_byte(0xc0+ds); /* duplicate source */
4390 emit_byte(0xd9);
4391 emit_byte(0xe0); /* take fchs */
4392 tos_make(d); /* store to destination */
4393 }
4394 else {
4395 make_tos(d);
4396 emit_byte(0xd9);
4397 emit_byte(0xe0); /* take fchs */
4398 }
4399 }
4400 LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4401
4402 LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4403 {
4404 int ds;
4405
4406 usereg(s);
4407 usereg(d);
4408
4409 if (live.spos[s]==live.tos) {
4410 /* Source is on top of stack */
4411 ds=stackpos(d);
4412 emit_byte(0xdc);
4413 emit_byte(0xc0+ds); /* add source to dest*/
4414 }
4415 else {
4416 make_tos(d);
4417 ds=stackpos(s);
4418
4419 emit_byte(0xd8);
4420 emit_byte(0xc0+ds); /* add source to dest*/
4421 }
4422 }
4423 LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4424
4425 LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4426 {
4427 int ds;
4428
4429 usereg(s);
4430 usereg(d);
4431
4432 if (live.spos[s]==live.tos) {
4433 /* Source is on top of stack */
4434 ds=stackpos(d);
4435 emit_byte(0xdc);
4436 emit_byte(0xe8+ds); /* sub source from dest*/
4437 }
4438 else {
4439 make_tos(d);
4440 ds=stackpos(s);
4441
4442 emit_byte(0xd8);
4443 emit_byte(0xe0+ds); /* sub src from dest */
4444 }
4445 }
4446 LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4447
4448 LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4449 {
4450 int ds;
4451
4452 usereg(s);
4453 usereg(d);
4454
4455 make_tos(d);
4456 ds=stackpos(s);
4457
4458 emit_byte(0xdd);
4459 emit_byte(0xe0+ds); /* cmp dest with source*/
4460 }
4461 LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4462
4463 LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4464 {
4465 int ds;
4466
4467 usereg(s);
4468 usereg(d);
4469
4470 if (live.spos[s]==live.tos) {
4471 /* Source is on top of stack */
4472 ds=stackpos(d);
4473 emit_byte(0xdc);
4474 emit_byte(0xc8+ds); /* mul dest by source*/
4475 }
4476 else {
4477 make_tos(d);
4478 ds=stackpos(s);
4479
4480 emit_byte(0xd8);
4481 emit_byte(0xc8+ds); /* mul dest by source*/
4482 }
4483 }
4484 LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4485
4486 LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4487 {
4488 int ds;
4489
4490 usereg(s);
4491 usereg(d);
4492
4493 if (live.spos[s]==live.tos) {
4494 /* Source is on top of stack */
4495 ds=stackpos(d);
4496 emit_byte(0xdc);
4497 emit_byte(0xf8+ds); /* div dest by source */
4498 }
4499 else {
4500 make_tos(d);
4501 ds=stackpos(s);
4502
4503 emit_byte(0xd8);
4504 emit_byte(0xf0+ds); /* div dest by source*/
4505 }
4506 }
4507 LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4508
4509 LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4510 {
4511 int ds;
4512
4513 usereg(s);
4514 usereg(d);
4515
4516 make_tos2(d,s);
4517 ds=stackpos(s);
4518
4519 if (ds!=1) {
4520 printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4521 abort();
4522 }
4523 emit_byte(0xd9);
4524 emit_byte(0xf8); /* take rem from dest by source */
4525 }
4526 LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4527
4528 LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4529 {
4530 int ds;
4531
4532 usereg(s);
4533 usereg(d);
4534
4535 make_tos2(d,s);
4536 ds=stackpos(s);
4537
4538 if (ds!=1) {
4539 printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4540 abort();
4541 }
4542 emit_byte(0xd9);
4543 emit_byte(0xf5); /* take rem1 from dest by source */
4544 }
4545 LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4546
4547
4548 LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4549 {
4550 make_tos(r);
4551 emit_byte(0xd9); /* ftst */
4552 emit_byte(0xe4);
4553 }
4554 LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4555
4556 /* %eax register is clobbered if target processor doesn't support fucomi */
4557 #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4558 #define FFLAG_NREG EAX_INDEX
4559
4560 static __inline__ void raw_fflags_into_flags(int r)
4561 {
4562 int p;
4563
4564 usereg(r);
4565 p=stackpos(r);
4566
4567 emit_byte(0xd9);
4568 emit_byte(0xee); /* Push 0 */
4569 emit_byte(0xd9);
4570 emit_byte(0xc9+p); /* swap top two around */
4571 if (have_cmov) {
4572 // gb-- fucomi is for P6 cores only, not K6-2 then...
4573 emit_byte(0xdb);
4574 emit_byte(0xe9+p); /* fucomi them */
4575 }
4576 else {
4577 emit_byte(0xdd);
4578 emit_byte(0xe1+p); /* fucom them */
4579 emit_byte(0x9b);
4580 emit_byte(0xdf);
4581 emit_byte(0xe0); /* fstsw ax */
4582 raw_sahf(0); /* sahf */
4583 }
4584 emit_byte(0xdd);
4585 emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4586 }