ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.24
Committed: 2004-11-08T21:10:46Z (19 years, 8 months ago) by gbeauche
Branch: MAIN
Changes since 1.23: +57 -16 lines
Log Message:
Merge BSF simulation on P4 from Amithlon. Use 33-bit memory addressing model.

File Contents

# Content
1 /*
2 * compiler/codegen_x86.cpp - IA-32 code generator
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2004
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2004 Christian Bauer
10 *
11 * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 /* This should eventually end up in machdep/, but for now, x86 is the
29 only target, and it's easier this way... */
30
31 #include "flags_x86.h"
32
33 /*************************************************************************
34 * Some basic information about the the target CPU *
35 *************************************************************************/
36
37 #define EAX_INDEX 0
38 #define ECX_INDEX 1
39 #define EDX_INDEX 2
40 #define EBX_INDEX 3
41 #define ESP_INDEX 4
42 #define EBP_INDEX 5
43 #define ESI_INDEX 6
44 #define EDI_INDEX 7
45 #if defined(__x86_64__)
46 #define R8_INDEX 8
47 #define R9_INDEX 9
48 #define R10_INDEX 10
49 #define R11_INDEX 11
50 #define R12_INDEX 12
51 #define R13_INDEX 13
52 #define R14_INDEX 14
53 #define R15_INDEX 15
54 #endif
55
56 /* The register in which subroutines return an integer return value */
57 #define REG_RESULT EAX_INDEX
58
59 /* The registers subroutines take their first and second argument in */
60 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61 /* Handle the _fastcall parameters of ECX and EDX */
62 #define REG_PAR1 ECX_INDEX
63 #define REG_PAR2 EDX_INDEX
64 #elif defined(__x86_64__)
65 #define REG_PAR1 EDI_INDEX
66 #define REG_PAR2 ESI_INDEX
67 #else
68 #define REG_PAR1 EAX_INDEX
69 #define REG_PAR2 EDX_INDEX
70 #endif
71
72 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 #define REG_PC_TMP EAX_INDEX
75 #else
76 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 #endif
78
79 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 -1 if any reg will do */
81 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82 #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83
84 uae_s8 always_used[]={4,-1};
85 #if defined(__x86_64__)
86 uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87 uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88 #else
89 uae_s8 can_byte[]={0,1,2,3,-1};
90 uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 #endif
92
93 #if USE_OPTIMIZED_CALLS
94 /* Make sure interpretive core does not use cpuopti */
95 uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 #error FIXME: code not ready
97 #else
98 /* cpuopti mutate instruction handlers to assume registers are saved
99 by the caller */
100 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 #endif
102
103 /* This *should* be the same as call_saved. But:
104 - We might not really know which registers are saved, and which aren't,
105 so we need to preserve some, but don't want to rely on everyone else
106 also saving those registers
107 - Special registers (such like the stack pointer) should not be "preserved"
108 by pushing, even though they are "saved" across function calls
109 */
110 #if defined(__x86_64__)
111 /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
112 /* preserve r11 because it's generally used to hold pointers to functions */
113 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
114 #else
115 static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
116 #endif
117
118 /* Whether classes of instructions do or don't clobber the native flags */
119 #define CLOBBER_MOV
120 #define CLOBBER_LEA
121 #define CLOBBER_CMOV
122 #define CLOBBER_POP
123 #define CLOBBER_PUSH
124 #define CLOBBER_SUB clobber_flags()
125 #define CLOBBER_SBB clobber_flags()
126 #define CLOBBER_CMP clobber_flags()
127 #define CLOBBER_ADD clobber_flags()
128 #define CLOBBER_ADC clobber_flags()
129 #define CLOBBER_AND clobber_flags()
130 #define CLOBBER_OR clobber_flags()
131 #define CLOBBER_XOR clobber_flags()
132
133 #define CLOBBER_ROL clobber_flags()
134 #define CLOBBER_ROR clobber_flags()
135 #define CLOBBER_SHLL clobber_flags()
136 #define CLOBBER_SHRL clobber_flags()
137 #define CLOBBER_SHRA clobber_flags()
138 #define CLOBBER_TEST clobber_flags()
139 #define CLOBBER_CL16
140 #define CLOBBER_CL8
141 #define CLOBBER_SE32
142 #define CLOBBER_SE16
143 #define CLOBBER_SE8
144 #define CLOBBER_ZE32
145 #define CLOBBER_ZE16
146 #define CLOBBER_ZE8
147 #define CLOBBER_SW16 clobber_flags()
148 #define CLOBBER_SW32
149 #define CLOBBER_SETCC
150 #define CLOBBER_MUL clobber_flags()
151 #define CLOBBER_BT clobber_flags()
152 #define CLOBBER_BSF clobber_flags()
153
154 /* FIXME: disabled until that's proofread. */
155 #if defined(__x86_64__)
156 #define USE_NEW_RTASM 1
157 #endif
158
159 #if USE_NEW_RTASM
160
161 #if defined(__x86_64__)
162 #define X86_TARGET_64BIT 1
163 #endif
164 #define X86_FLAT_REGISTERS 0
165 #define X86_OPTIMIZE_ALU 1
166 #define X86_OPTIMIZE_ROTSHI 1
167 #include "codegen_x86.h"
168
169 #define x86_emit_byte(B) emit_byte(B)
170 #define x86_emit_word(W) emit_word(W)
171 #define x86_emit_long(L) emit_long(L)
172 #define x86_emit_quad(Q) emit_quad(Q)
173 #define x86_get_target() get_target()
174 #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
175
176 static void jit_fail(const char *msg, const char *file, int line, const char *function)
177 {
178 fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
179 function, file, line, msg);
180 abort();
181 }
182
183 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
184 {
185 #if defined(__x86_64__)
186 PUSHQr(r);
187 #else
188 PUSHLr(r);
189 #endif
190 }
191 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
192
193 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
194 {
195 #if defined(__x86_64__)
196 POPQr(r);
197 #else
198 POPLr(r);
199 #endif
200 }
201 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
202
203 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
204 {
205 #if defined(__x86_64__)
206 POPQm(d, X86_NOREG, X86_NOREG, 1);
207 #else
208 POPLm(d, X86_NOREG, X86_NOREG, 1);
209 #endif
210 }
211 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
212
213 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
214 {
215 BTLir(i, r);
216 }
217 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
218
219 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
220 {
221 BTLrr(b, r);
222 }
223 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
224
225 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
226 {
227 BTCLir(i, r);
228 }
229 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
230
231 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
232 {
233 BTCLrr(b, r);
234 }
235 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
236
237 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
238 {
239 BTRLir(i, r);
240 }
241 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
242
243 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
244 {
245 BTRLrr(b, r);
246 }
247 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
248
249 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
250 {
251 BTSLir(i, r);
252 }
253 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
254
255 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
256 {
257 BTSLrr(b, r);
258 }
259 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
260
261 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
262 {
263 SUBWir(i, d);
264 }
265 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
266
267 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
268 {
269 MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
270 }
271 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
272
273 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
274 {
275 MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
276 }
277 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
278
279 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
280 {
281 MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
282 }
283 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
284
285 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
286 {
287 MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
288 }
289 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
290
291 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
292 {
293 ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
294 }
295 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
296
297 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
298 {
299 ROLBir(i, r);
300 }
301 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
302
303 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
304 {
305 ROLWir(i, r);
306 }
307 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
308
309 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
310 {
311 ROLLir(i, r);
312 }
313 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
314
315 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
316 {
317 ROLLrr(r, d);
318 }
319 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
320
321 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
322 {
323 ROLWrr(r, d);
324 }
325 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
326
327 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
328 {
329 ROLBrr(r, d);
330 }
331 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
332
333 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
334 {
335 SHLLrr(r, d);
336 }
337 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
338
339 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
340 {
341 SHLWrr(r, d);
342 }
343 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
344
345 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
346 {
347 SHLBrr(r, d);
348 }
349 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
350
351 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
352 {
353 RORBir(i, r);
354 }
355 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
356
357 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
358 {
359 RORWir(i, r);
360 }
361 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
362
363 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
364 {
365 ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
366 }
367 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
368
369 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
370 {
371 RORLir(i, r);
372 }
373 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
374
375 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
376 {
377 RORLrr(r, d);
378 }
379 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
380
381 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
382 {
383 RORWrr(r, d);
384 }
385 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
386
387 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
388 {
389 RORBrr(r, d);
390 }
391 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
392
393 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
394 {
395 SHRLrr(r, d);
396 }
397 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
398
399 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
400 {
401 SHRWrr(r, d);
402 }
403 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
404
405 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
406 {
407 SHRBrr(r, d);
408 }
409 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
410
411 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
412 {
413 SARLrr(r, d);
414 }
415 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
416
417 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
418 {
419 SARWrr(r, d);
420 }
421 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
422
423 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
424 {
425 SARBrr(r, d);
426 }
427 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
428
429 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
430 {
431 SHLLir(i, r);
432 }
433 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
434
435 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
436 {
437 SHLWir(i, r);
438 }
439 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
440
441 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
442 {
443 SHLBir(i, r);
444 }
445 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
446
447 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
448 {
449 SHRLir(i, r);
450 }
451 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
452
453 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
454 {
455 SHRWir(i, r);
456 }
457 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
458
459 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
460 {
461 SHRBir(i, r);
462 }
463 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
464
465 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
466 {
467 SARLir(i, r);
468 }
469 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
470
471 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
472 {
473 SARWir(i, r);
474 }
475 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
476
477 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
478 {
479 SARBir(i, r);
480 }
481 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
482
483 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
484 {
485 SAHF();
486 }
487 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
488
489 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
490 {
491 CPUID();
492 }
493 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
494
495 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
496 {
497 LAHF();
498 }
499 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
500
501 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
502 {
503 SETCCir(cc, d);
504 }
505 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
506
507 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
508 {
509 SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
510 }
511 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
512
513 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
514 {
515 if (have_cmov)
516 CMOVLrr(cc, s, d);
517 else { /* replacement using branch and mov */
518 #if defined(__x86_64__)
519 write_log("x86-64 implementations are bound to have CMOV!\n");
520 abort();
521 #endif
522 JCCSii(cc^1, 2);
523 MOVLrr(s, d);
524 }
525 }
526 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
527
528 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
529 {
530 BSFLrr(s, d);
531 }
532 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
533
534 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
535 {
536 MOVSLQrr(s, d);
537 }
538 LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
539
540 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
541 {
542 MOVSWLrr(s, d);
543 }
544 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
545
546 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
547 {
548 MOVSBLrr(s, d);
549 }
550 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
551
552 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
553 {
554 MOVZWLrr(s, d);
555 }
556 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
557
558 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
559 {
560 MOVZBLrr(s, d);
561 }
562 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
563
564 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
565 {
566 IMULLrr(s, d);
567 }
568 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
569
570 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
571 {
572 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
573 write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
574 abort();
575 }
576 IMULLr(s);
577 }
578 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
579
580 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
581 {
582 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
583 write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
584 abort();
585 }
586 MULLr(s);
587 }
588 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
589
590 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
591 {
592 abort(); /* %^$&%^$%#^ x86! */
593 }
594 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
595
596 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
597 {
598 MOVBrr(s, d);
599 }
600 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
601
602 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
603 {
604 MOVWrr(s, d);
605 }
606 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
607
608 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
609 {
610 MOVLmr(0, baser, index, factor, d);
611 }
612 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
613
614 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
615 {
616 MOVWmr(0, baser, index, factor, d);
617 }
618 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
619
620 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
621 {
622 MOVBmr(0, baser, index, factor, d);
623 }
624 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
625
626 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
627 {
628 MOVLrm(s, 0, baser, index, factor);
629 }
630 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
631
632 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
633 {
634 MOVWrm(s, 0, baser, index, factor);
635 }
636 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
637
638 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
639 {
640 MOVBrm(s, 0, baser, index, factor);
641 }
642 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
643
644 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
645 {
646 MOVLrm(s, base, baser, index, factor);
647 }
648 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
649
650 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
651 {
652 MOVWrm(s, base, baser, index, factor);
653 }
654 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
655
656 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
657 {
658 MOVBrm(s, base, baser, index, factor);
659 }
660 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
661
662 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
663 {
664 MOVLmr(base, baser, index, factor, d);
665 }
666 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
667
668 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
669 {
670 MOVWmr(base, baser, index, factor, d);
671 }
672 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
673
674 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
675 {
676 MOVBmr(base, baser, index, factor, d);
677 }
678 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
679
680 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
681 {
682 MOVLmr(base, X86_NOREG, index, factor, d);
683 }
684 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
685
686 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
687 {
688 if (have_cmov)
689 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
690 else { /* replacement using branch and mov */
691 #if defined(__x86_64__)
692 write_log("x86-64 implementations are bound to have CMOV!\n");
693 abort();
694 #endif
695 JCCSii(cond^1, 7);
696 MOVLmr(base, X86_NOREG, index, factor, d);
697 }
698 }
699 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
700
701 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
702 {
703 if (have_cmov)
704 CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
705 else { /* replacement using branch and mov */
706 #if defined(__x86_64__)
707 write_log("x86-64 implementations are bound to have CMOV!\n");
708 abort();
709 #endif
710 JCCSii(cond^1, 6);
711 MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
712 }
713 }
714 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
715
716 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
717 {
718 MOVLmr(offset, s, X86_NOREG, 1, d);
719 }
720 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
721
722 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
723 {
724 MOVWmr(offset, s, X86_NOREG, 1, d);
725 }
726 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
727
728 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
729 {
730 MOVBmr(offset, s, X86_NOREG, 1, d);
731 }
732 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
733
734 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
735 {
736 MOVLmr(offset, s, X86_NOREG, 1, d);
737 }
738 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
739
740 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
741 {
742 MOVWmr(offset, s, X86_NOREG, 1, d);
743 }
744 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
745
746 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
747 {
748 MOVBmr(offset, s, X86_NOREG, 1, d);
749 }
750 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
751
752 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
753 {
754 MOVLim(i, offset, d, X86_NOREG, 1);
755 }
756 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
757
758 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
759 {
760 MOVWim(i, offset, d, X86_NOREG, 1);
761 }
762 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
763
764 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
765 {
766 MOVBim(i, offset, d, X86_NOREG, 1);
767 }
768 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
769
770 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
771 {
772 MOVLrm(s, offset, d, X86_NOREG, 1);
773 }
774 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
775
776 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
777 {
778 MOVWrm(s, offset, d, X86_NOREG, 1);
779 }
780 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
781
782 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
783 {
784 MOVBrm(s, offset, d, X86_NOREG, 1);
785 }
786 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
787
788 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
789 {
790 LEALmr(offset, s, X86_NOREG, 1, d);
791 }
792 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
793
794 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
795 {
796 LEALmr(offset, s, index, factor, d);
797 }
798 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
799
800 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
801 {
802 LEALmr(0, s, index, factor, d);
803 }
804 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
805
806 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
807 {
808 MOVLrm(s, offset, d, X86_NOREG, 1);
809 }
810 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
811
812 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
813 {
814 MOVWrm(s, offset, d, X86_NOREG, 1);
815 }
816 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
817
818 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
819 {
820 MOVBrm(s, offset, d, X86_NOREG, 1);
821 }
822 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
823
824 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
825 {
826 BSWAPLr(r);
827 }
828 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
829
830 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
831 {
832 ROLWir(8, r);
833 }
834 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
835
836 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
837 {
838 MOVLrr(s, d);
839 }
840 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
841
842 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
843 {
844 MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
845 }
846 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
847
848 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
849 {
850 MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
851 }
852 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
853
854 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
855 {
856 MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
857 }
858 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
859
860 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
861 {
862 MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
863 }
864 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
865
866 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
867 {
868 MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
869 }
870 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
871
872 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
873 {
874 MOVLir(s, d);
875 }
876 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
877
878 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
879 {
880 MOVWir(s, d);
881 }
882 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
883
884 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
885 {
886 MOVBir(s, d);
887 }
888 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
889
890 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
891 {
892 ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
893 }
894 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
895
896 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
897 {
898 ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
899 }
900 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
901
902 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
903 {
904 ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
905 }
906 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
907
908 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
909 {
910 ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
911 }
912 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
913
914 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
915 {
916 TESTLir(i, d);
917 }
918 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
919
920 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
921 {
922 TESTLrr(s, d);
923 }
924 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
925
926 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
927 {
928 TESTWrr(s, d);
929 }
930 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
931
932 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
933 {
934 TESTBrr(s, d);
935 }
936 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
937
938 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
939 {
940 XORLir(i, d);
941 }
942 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
943
944 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
945 {
946 ANDLir(i, d);
947 }
948 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
949
950 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
951 {
952 ANDWir(i, d);
953 }
954 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
955
956 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
957 {
958 ANDLrr(s, d);
959 }
960 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
961
962 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
963 {
964 ANDWrr(s, d);
965 }
966 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
967
968 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
969 {
970 ANDBrr(s, d);
971 }
972 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
973
974 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
975 {
976 ORLir(i, d);
977 }
978 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
979
980 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
981 {
982 ORLrr(s, d);
983 }
984 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
985
986 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
987 {
988 ORWrr(s, d);
989 }
990 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
991
992 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
993 {
994 ORBrr(s, d);
995 }
996 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
997
998 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
999 {
1000 ADCLrr(s, d);
1001 }
1002 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1003
1004 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1005 {
1006 ADCWrr(s, d);
1007 }
1008 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1009
1010 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1011 {
1012 ADCBrr(s, d);
1013 }
1014 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1015
1016 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1017 {
1018 ADDLrr(s, d);
1019 }
1020 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1021
1022 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1023 {
1024 ADDWrr(s, d);
1025 }
1026 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1027
1028 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1029 {
1030 ADDBrr(s, d);
1031 }
1032 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1033
1034 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1035 {
1036 SUBLir(i, d);
1037 }
1038 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1039
1040 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1041 {
1042 SUBBir(i, d);
1043 }
1044 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1045
1046 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1047 {
1048 ADDLir(i, d);
1049 }
1050 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1051
1052 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1053 {
1054 ADDWir(i, d);
1055 }
1056 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1057
1058 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1059 {
1060 ADDBir(i, d);
1061 }
1062 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1063
1064 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1065 {
1066 SBBLrr(s, d);
1067 }
1068 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1069
1070 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1071 {
1072 SBBWrr(s, d);
1073 }
1074 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1075
1076 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1077 {
1078 SBBBrr(s, d);
1079 }
1080 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1081
1082 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1083 {
1084 SUBLrr(s, d);
1085 }
1086 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1087
1088 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1089 {
1090 SUBWrr(s, d);
1091 }
1092 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1093
1094 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1095 {
1096 SUBBrr(s, d);
1097 }
1098 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1099
1100 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1101 {
1102 CMPLrr(s, d);
1103 }
1104 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1105
1106 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1107 {
1108 CMPLir(i, r);
1109 }
1110 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1111
1112 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1113 {
1114 CMPWrr(s, d);
1115 }
1116 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1117
1118 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1119 {
1120 CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1121 }
1122 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1123
1124 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1125 {
1126 CMPBir(i, d);
1127 }
1128 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1129
1130 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1131 {
1132 CMPBrr(s, d);
1133 }
1134 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1135
1136 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1137 {
1138 CMPLmr(offset, X86_NOREG, index, factor, d);
1139 }
1140 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1141
1142 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1143 {
1144 XORLrr(s, d);
1145 }
1146 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1147
1148 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1149 {
1150 XORWrr(s, d);
1151 }
1152 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1153
1154 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1155 {
1156 XORBrr(s, d);
1157 }
1158 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1159
1160 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1161 {
1162 SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1163 }
1164 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1165
1166 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1167 {
1168 CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1169 }
1170 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1171
1172 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1173 {
1174 XCHGLrr(r2, r1);
1175 }
1176 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1177
1178 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1179 {
1180 PUSHF();
1181 }
1182 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1183
1184 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1185 {
1186 POPF();
1187 }
1188 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1189
1190 #else
1191
1192 const bool optimize_accum = true;
1193 const bool optimize_imm8 = true;
1194 const bool optimize_shift_once = true;
1195
1196 /*************************************************************************
1197 * Actual encoding of the instructions on the target CPU *
1198 *************************************************************************/
1199
1200 static __inline__ int isaccum(int r)
1201 {
1202 return (r == EAX_INDEX);
1203 }
1204
1205 static __inline__ int isbyte(uae_s32 x)
1206 {
1207 return (x>=-128 && x<=127);
1208 }
1209
1210 static __inline__ int isword(uae_s32 x)
1211 {
1212 return (x>=-32768 && x<=32767);
1213 }
1214
1215 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1216 {
1217 emit_byte(0x50+r);
1218 }
1219 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1220
1221 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1222 {
1223 emit_byte(0x58+r);
1224 }
1225 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1226
1227 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1228 {
1229 emit_byte(0x8f);
1230 emit_byte(0x05);
1231 emit_long(d);
1232 }
1233 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1234
1235 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1236 {
1237 emit_byte(0x0f);
1238 emit_byte(0xba);
1239 emit_byte(0xe0+r);
1240 emit_byte(i);
1241 }
1242 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1243
1244 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1245 {
1246 emit_byte(0x0f);
1247 emit_byte(0xa3);
1248 emit_byte(0xc0+8*b+r);
1249 }
1250 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1251
1252 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1253 {
1254 emit_byte(0x0f);
1255 emit_byte(0xba);
1256 emit_byte(0xf8+r);
1257 emit_byte(i);
1258 }
1259 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1260
1261 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1262 {
1263 emit_byte(0x0f);
1264 emit_byte(0xbb);
1265 emit_byte(0xc0+8*b+r);
1266 }
1267 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1268
1269
1270 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1271 {
1272 emit_byte(0x0f);
1273 emit_byte(0xba);
1274 emit_byte(0xf0+r);
1275 emit_byte(i);
1276 }
1277 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1278
1279 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1280 {
1281 emit_byte(0x0f);
1282 emit_byte(0xb3);
1283 emit_byte(0xc0+8*b+r);
1284 }
1285 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1286
1287 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1288 {
1289 emit_byte(0x0f);
1290 emit_byte(0xba);
1291 emit_byte(0xe8+r);
1292 emit_byte(i);
1293 }
1294 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1295
1296 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1297 {
1298 emit_byte(0x0f);
1299 emit_byte(0xab);
1300 emit_byte(0xc0+8*b+r);
1301 }
1302 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1303
1304 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1305 {
1306 emit_byte(0x66);
1307 if (isbyte(i)) {
1308 emit_byte(0x83);
1309 emit_byte(0xe8+d);
1310 emit_byte(i);
1311 }
1312 else {
1313 if (optimize_accum && isaccum(d))
1314 emit_byte(0x2d);
1315 else {
1316 emit_byte(0x81);
1317 emit_byte(0xe8+d);
1318 }
1319 emit_word(i);
1320 }
1321 }
1322 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1323
1324
1325 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1326 {
1327 emit_byte(0x8b);
1328 emit_byte(0x05+8*d);
1329 emit_long(s);
1330 }
1331 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1332
1333 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1334 {
1335 emit_byte(0xc7);
1336 emit_byte(0x05);
1337 emit_long(d);
1338 emit_long(s);
1339 }
1340 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1341
1342 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1343 {
1344 emit_byte(0x66);
1345 emit_byte(0xc7);
1346 emit_byte(0x05);
1347 emit_long(d);
1348 emit_word(s);
1349 }
1350 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1351
1352 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1353 {
1354 emit_byte(0xc6);
1355 emit_byte(0x05);
1356 emit_long(d);
1357 emit_byte(s);
1358 }
1359 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1360
1361 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1362 {
1363 if (optimize_shift_once && (i == 1)) {
1364 emit_byte(0xd0);
1365 emit_byte(0x05);
1366 emit_long(d);
1367 }
1368 else {
1369 emit_byte(0xc0);
1370 emit_byte(0x05);
1371 emit_long(d);
1372 emit_byte(i);
1373 }
1374 }
1375 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1376
1377 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1378 {
1379 if (optimize_shift_once && (i == 1)) {
1380 emit_byte(0xd0);
1381 emit_byte(0xc0+r);
1382 }
1383 else {
1384 emit_byte(0xc0);
1385 emit_byte(0xc0+r);
1386 emit_byte(i);
1387 }
1388 }
1389 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1390
1391 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1392 {
1393 emit_byte(0x66);
1394 emit_byte(0xc1);
1395 emit_byte(0xc0+r);
1396 emit_byte(i);
1397 }
1398 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1399
1400 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1401 {
1402 if (optimize_shift_once && (i == 1)) {
1403 emit_byte(0xd1);
1404 emit_byte(0xc0+r);
1405 }
1406 else {
1407 emit_byte(0xc1);
1408 emit_byte(0xc0+r);
1409 emit_byte(i);
1410 }
1411 }
1412 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1413
1414 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1415 {
1416 emit_byte(0xd3);
1417 emit_byte(0xc0+d);
1418 }
1419 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1420
1421 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1422 {
1423 emit_byte(0x66);
1424 emit_byte(0xd3);
1425 emit_byte(0xc0+d);
1426 }
1427 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1428
1429 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1430 {
1431 emit_byte(0xd2);
1432 emit_byte(0xc0+d);
1433 }
1434 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1435
1436 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1437 {
1438 emit_byte(0xd3);
1439 emit_byte(0xe0+d);
1440 }
1441 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1442
1443 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1444 {
1445 emit_byte(0x66);
1446 emit_byte(0xd3);
1447 emit_byte(0xe0+d);
1448 }
1449 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1450
1451 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1452 {
1453 emit_byte(0xd2);
1454 emit_byte(0xe0+d);
1455 }
1456 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1457
1458 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1459 {
1460 if (optimize_shift_once && (i == 1)) {
1461 emit_byte(0xd0);
1462 emit_byte(0xc8+r);
1463 }
1464 else {
1465 emit_byte(0xc0);
1466 emit_byte(0xc8+r);
1467 emit_byte(i);
1468 }
1469 }
1470 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1471
1472 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1473 {
1474 emit_byte(0x66);
1475 emit_byte(0xc1);
1476 emit_byte(0xc8+r);
1477 emit_byte(i);
1478 }
1479 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1480
1481 // gb-- used for making an fpcr value in compemu_fpp.cpp
1482 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1483 {
1484 emit_byte(0x0b);
1485 emit_byte(0x05+8*d);
1486 emit_long(s);
1487 }
1488 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1489
1490 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1491 {
1492 if (optimize_shift_once && (i == 1)) {
1493 emit_byte(0xd1);
1494 emit_byte(0xc8+r);
1495 }
1496 else {
1497 emit_byte(0xc1);
1498 emit_byte(0xc8+r);
1499 emit_byte(i);
1500 }
1501 }
1502 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1503
1504 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1505 {
1506 emit_byte(0xd3);
1507 emit_byte(0xc8+d);
1508 }
1509 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1510
1511 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1512 {
1513 emit_byte(0x66);
1514 emit_byte(0xd3);
1515 emit_byte(0xc8+d);
1516 }
1517 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1518
1519 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1520 {
1521 emit_byte(0xd2);
1522 emit_byte(0xc8+d);
1523 }
1524 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1525
1526 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1527 {
1528 emit_byte(0xd3);
1529 emit_byte(0xe8+d);
1530 }
1531 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1532
1533 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1534 {
1535 emit_byte(0x66);
1536 emit_byte(0xd3);
1537 emit_byte(0xe8+d);
1538 }
1539 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1540
1541 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1542 {
1543 emit_byte(0xd2);
1544 emit_byte(0xe8+d);
1545 }
1546 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1547
1548 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1549 {
1550 emit_byte(0xd3);
1551 emit_byte(0xf8+d);
1552 }
1553 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1554
1555 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1556 {
1557 emit_byte(0x66);
1558 emit_byte(0xd3);
1559 emit_byte(0xf8+d);
1560 }
1561 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1562
1563 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1564 {
1565 emit_byte(0xd2);
1566 emit_byte(0xf8+d);
1567 }
1568 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1569
1570 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1571 {
1572 if (optimize_shift_once && (i == 1)) {
1573 emit_byte(0xd1);
1574 emit_byte(0xe0+r);
1575 }
1576 else {
1577 emit_byte(0xc1);
1578 emit_byte(0xe0+r);
1579 emit_byte(i);
1580 }
1581 }
1582 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1583
1584 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1585 {
1586 emit_byte(0x66);
1587 emit_byte(0xc1);
1588 emit_byte(0xe0+r);
1589 emit_byte(i);
1590 }
1591 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1592
1593 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1594 {
1595 if (optimize_shift_once && (i == 1)) {
1596 emit_byte(0xd0);
1597 emit_byte(0xe0+r);
1598 }
1599 else {
1600 emit_byte(0xc0);
1601 emit_byte(0xe0+r);
1602 emit_byte(i);
1603 }
1604 }
1605 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1606
1607 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1608 {
1609 if (optimize_shift_once && (i == 1)) {
1610 emit_byte(0xd1);
1611 emit_byte(0xe8+r);
1612 }
1613 else {
1614 emit_byte(0xc1);
1615 emit_byte(0xe8+r);
1616 emit_byte(i);
1617 }
1618 }
1619 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1620
1621 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1622 {
1623 emit_byte(0x66);
1624 emit_byte(0xc1);
1625 emit_byte(0xe8+r);
1626 emit_byte(i);
1627 }
1628 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1629
1630 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1631 {
1632 if (optimize_shift_once && (i == 1)) {
1633 emit_byte(0xd0);
1634 emit_byte(0xe8+r);
1635 }
1636 else {
1637 emit_byte(0xc0);
1638 emit_byte(0xe8+r);
1639 emit_byte(i);
1640 }
1641 }
1642 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1643
1644 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1645 {
1646 if (optimize_shift_once && (i == 1)) {
1647 emit_byte(0xd1);
1648 emit_byte(0xf8+r);
1649 }
1650 else {
1651 emit_byte(0xc1);
1652 emit_byte(0xf8+r);
1653 emit_byte(i);
1654 }
1655 }
1656 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1657
1658 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1659 {
1660 emit_byte(0x66);
1661 emit_byte(0xc1);
1662 emit_byte(0xf8+r);
1663 emit_byte(i);
1664 }
1665 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1666
1667 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1668 {
1669 if (optimize_shift_once && (i == 1)) {
1670 emit_byte(0xd0);
1671 emit_byte(0xf8+r);
1672 }
1673 else {
1674 emit_byte(0xc0);
1675 emit_byte(0xf8+r);
1676 emit_byte(i);
1677 }
1678 }
1679 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1680
1681 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1682 {
1683 emit_byte(0x9e);
1684 }
1685 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1686
1687 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1688 {
1689 emit_byte(0x0f);
1690 emit_byte(0xa2);
1691 }
1692 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1693
1694 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1695 {
1696 emit_byte(0x9f);
1697 }
1698 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1699
1700 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1701 {
1702 emit_byte(0x0f);
1703 emit_byte(0x90+cc);
1704 emit_byte(0xc0+d);
1705 }
1706 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1707
1708 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1709 {
1710 emit_byte(0x0f);
1711 emit_byte(0x90+cc);
1712 emit_byte(0x05);
1713 emit_long(d);
1714 }
1715 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1716
1717 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1718 {
1719 if (have_cmov) {
1720 emit_byte(0x0f);
1721 emit_byte(0x40+cc);
1722 emit_byte(0xc0+8*d+s);
1723 }
1724 else { /* replacement using branch and mov */
1725 int uncc=(cc^1);
1726 emit_byte(0x70+uncc);
1727 emit_byte(2); /* skip next 2 bytes if not cc=true */
1728 emit_byte(0x89);
1729 emit_byte(0xc0+8*s+d);
1730 }
1731 }
1732 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1733
1734 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1735 {
1736 emit_byte(0x0f);
1737 emit_byte(0xbc);
1738 emit_byte(0xc0+8*d+s);
1739 }
1740 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1741
1742 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1743 {
1744 emit_byte(0x0f);
1745 emit_byte(0xbf);
1746 emit_byte(0xc0+8*d+s);
1747 }
1748 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1749
1750 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1751 {
1752 emit_byte(0x0f);
1753 emit_byte(0xbe);
1754 emit_byte(0xc0+8*d+s);
1755 }
1756 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1757
1758 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1759 {
1760 emit_byte(0x0f);
1761 emit_byte(0xb7);
1762 emit_byte(0xc0+8*d+s);
1763 }
1764 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1765
1766 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1767 {
1768 emit_byte(0x0f);
1769 emit_byte(0xb6);
1770 emit_byte(0xc0+8*d+s);
1771 }
1772 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1773
1774 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1775 {
1776 emit_byte(0x0f);
1777 emit_byte(0xaf);
1778 emit_byte(0xc0+8*d+s);
1779 }
1780 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1781
1782 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1783 {
1784 if (d!=MUL_NREG1 || s!=MUL_NREG2)
1785 abort();
1786 emit_byte(0xf7);
1787 emit_byte(0xea);
1788 }
1789 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1790
1791 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1792 {
1793 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1794 printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1795 abort();
1796 }
1797 emit_byte(0xf7);
1798 emit_byte(0xe2);
1799 }
1800 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1801
1802 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1803 {
1804 abort(); /* %^$&%^$%#^ x86! */
1805 emit_byte(0x0f);
1806 emit_byte(0xaf);
1807 emit_byte(0xc0+8*d+s);
1808 }
1809 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1810
1811 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1812 {
1813 emit_byte(0x88);
1814 emit_byte(0xc0+8*s+d);
1815 }
1816 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1817
1818 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1819 {
1820 emit_byte(0x66);
1821 emit_byte(0x89);
1822 emit_byte(0xc0+8*s+d);
1823 }
1824 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1825
1826 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1827 {
1828 int isebp=(baser==5)?0x40:0;
1829 int fi;
1830
1831 switch(factor) {
1832 case 1: fi=0; break;
1833 case 2: fi=1; break;
1834 case 4: fi=2; break;
1835 case 8: fi=3; break;
1836 default: abort();
1837 }
1838
1839
1840 emit_byte(0x8b);
1841 emit_byte(0x04+8*d+isebp);
1842 emit_byte(baser+8*index+0x40*fi);
1843 if (isebp)
1844 emit_byte(0x00);
1845 }
1846 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1847
1848 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1849 {
1850 int fi;
1851 int isebp;
1852
1853 switch(factor) {
1854 case 1: fi=0; break;
1855 case 2: fi=1; break;
1856 case 4: fi=2; break;
1857 case 8: fi=3; break;
1858 default: abort();
1859 }
1860 isebp=(baser==5)?0x40:0;
1861
1862 emit_byte(0x66);
1863 emit_byte(0x8b);
1864 emit_byte(0x04+8*d+isebp);
1865 emit_byte(baser+8*index+0x40*fi);
1866 if (isebp)
1867 emit_byte(0x00);
1868 }
1869 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1870
1871 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1872 {
1873 int fi;
1874 int isebp;
1875
1876 switch(factor) {
1877 case 1: fi=0; break;
1878 case 2: fi=1; break;
1879 case 4: fi=2; break;
1880 case 8: fi=3; break;
1881 default: abort();
1882 }
1883 isebp=(baser==5)?0x40:0;
1884
1885 emit_byte(0x8a);
1886 emit_byte(0x04+8*d+isebp);
1887 emit_byte(baser+8*index+0x40*fi);
1888 if (isebp)
1889 emit_byte(0x00);
1890 }
1891 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1892
1893 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1894 {
1895 int fi;
1896 int isebp;
1897
1898 switch(factor) {
1899 case 1: fi=0; break;
1900 case 2: fi=1; break;
1901 case 4: fi=2; break;
1902 case 8: fi=3; break;
1903 default: abort();
1904 }
1905
1906
1907 isebp=(baser==5)?0x40:0;
1908
1909 emit_byte(0x89);
1910 emit_byte(0x04+8*s+isebp);
1911 emit_byte(baser+8*index+0x40*fi);
1912 if (isebp)
1913 emit_byte(0x00);
1914 }
1915 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1916
1917 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1918 {
1919 int fi;
1920 int isebp;
1921
1922 switch(factor) {
1923 case 1: fi=0; break;
1924 case 2: fi=1; break;
1925 case 4: fi=2; break;
1926 case 8: fi=3; break;
1927 default: abort();
1928 }
1929 isebp=(baser==5)?0x40:0;
1930
1931 emit_byte(0x66);
1932 emit_byte(0x89);
1933 emit_byte(0x04+8*s+isebp);
1934 emit_byte(baser+8*index+0x40*fi);
1935 if (isebp)
1936 emit_byte(0x00);
1937 }
1938 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1939
1940 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1941 {
1942 int fi;
1943 int isebp;
1944
1945 switch(factor) {
1946 case 1: fi=0; break;
1947 case 2: fi=1; break;
1948 case 4: fi=2; break;
1949 case 8: fi=3; break;
1950 default: abort();
1951 }
1952 isebp=(baser==5)?0x40:0;
1953
1954 emit_byte(0x88);
1955 emit_byte(0x04+8*s+isebp);
1956 emit_byte(baser+8*index+0x40*fi);
1957 if (isebp)
1958 emit_byte(0x00);
1959 }
1960 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1961
1962 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1963 {
1964 int fi;
1965
1966 switch(factor) {
1967 case 1: fi=0; break;
1968 case 2: fi=1; break;
1969 case 4: fi=2; break;
1970 case 8: fi=3; break;
1971 default: abort();
1972 }
1973
1974 emit_byte(0x89);
1975 emit_byte(0x84+8*s);
1976 emit_byte(baser+8*index+0x40*fi);
1977 emit_long(base);
1978 }
1979 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1980
1981 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1982 {
1983 int fi;
1984
1985 switch(factor) {
1986 case 1: fi=0; break;
1987 case 2: fi=1; break;
1988 case 4: fi=2; break;
1989 case 8: fi=3; break;
1990 default: abort();
1991 }
1992
1993 emit_byte(0x66);
1994 emit_byte(0x89);
1995 emit_byte(0x84+8*s);
1996 emit_byte(baser+8*index+0x40*fi);
1997 emit_long(base);
1998 }
1999 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2000
2001 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2002 {
2003 int fi;
2004
2005 switch(factor) {
2006 case 1: fi=0; break;
2007 case 2: fi=1; break;
2008 case 4: fi=2; break;
2009 case 8: fi=3; break;
2010 default: abort();
2011 }
2012
2013 emit_byte(0x88);
2014 emit_byte(0x84+8*s);
2015 emit_byte(baser+8*index+0x40*fi);
2016 emit_long(base);
2017 }
2018 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2019
2020 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2021 {
2022 int fi;
2023
2024 switch(factor) {
2025 case 1: fi=0; break;
2026 case 2: fi=1; break;
2027 case 4: fi=2; break;
2028 case 8: fi=3; break;
2029 default: abort();
2030 }
2031
2032 emit_byte(0x8b);
2033 emit_byte(0x84+8*d);
2034 emit_byte(baser+8*index+0x40*fi);
2035 emit_long(base);
2036 }
2037 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2038
2039 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2040 {
2041 int fi;
2042
2043 switch(factor) {
2044 case 1: fi=0; break;
2045 case 2: fi=1; break;
2046 case 4: fi=2; break;
2047 case 8: fi=3; break;
2048 default: abort();
2049 }
2050
2051 emit_byte(0x66);
2052 emit_byte(0x8b);
2053 emit_byte(0x84+8*d);
2054 emit_byte(baser+8*index+0x40*fi);
2055 emit_long(base);
2056 }
2057 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2058
2059 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2060 {
2061 int fi;
2062
2063 switch(factor) {
2064 case 1: fi=0; break;
2065 case 2: fi=1; break;
2066 case 4: fi=2; break;
2067 case 8: fi=3; break;
2068 default: abort();
2069 }
2070
2071 emit_byte(0x8a);
2072 emit_byte(0x84+8*d);
2073 emit_byte(baser+8*index+0x40*fi);
2074 emit_long(base);
2075 }
2076 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2077
2078 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2079 {
2080 int fi;
2081 switch(factor) {
2082 case 1: fi=0; break;
2083 case 2: fi=1; break;
2084 case 4: fi=2; break;
2085 case 8: fi=3; break;
2086 default:
2087 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2088 abort();
2089 }
2090 emit_byte(0x8b);
2091 emit_byte(0x04+8*d);
2092 emit_byte(0x05+8*index+64*fi);
2093 emit_long(base);
2094 }
2095 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2096
2097 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2098 {
2099 int fi;
2100 switch(factor) {
2101 case 1: fi=0; break;
2102 case 2: fi=1; break;
2103 case 4: fi=2; break;
2104 case 8: fi=3; break;
2105 default:
2106 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2107 abort();
2108 }
2109 if (have_cmov) {
2110 emit_byte(0x0f);
2111 emit_byte(0x40+cond);
2112 emit_byte(0x04+8*d);
2113 emit_byte(0x05+8*index+64*fi);
2114 emit_long(base);
2115 }
2116 else { /* replacement using branch and mov */
2117 int uncc=(cond^1);
2118 emit_byte(0x70+uncc);
2119 emit_byte(7); /* skip next 7 bytes if not cc=true */
2120 emit_byte(0x8b);
2121 emit_byte(0x04+8*d);
2122 emit_byte(0x05+8*index+64*fi);
2123 emit_long(base);
2124 }
2125 }
2126 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2127
2128 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2129 {
2130 if (have_cmov) {
2131 emit_byte(0x0f);
2132 emit_byte(0x40+cond);
2133 emit_byte(0x05+8*d);
2134 emit_long(mem);
2135 }
2136 else { /* replacement using branch and mov */
2137 int uncc=(cond^1);
2138 emit_byte(0x70+uncc);
2139 emit_byte(6); /* skip next 6 bytes if not cc=true */
2140 emit_byte(0x8b);
2141 emit_byte(0x05+8*d);
2142 emit_long(mem);
2143 }
2144 }
2145 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2146
2147 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2148 {
2149 Dif(!isbyte(offset)) abort();
2150 emit_byte(0x8b);
2151 emit_byte(0x40+8*d+s);
2152 emit_byte(offset);
2153 }
2154 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2155
2156 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2157 {
2158 Dif(!isbyte(offset)) abort();
2159 emit_byte(0x66);
2160 emit_byte(0x8b);
2161 emit_byte(0x40+8*d+s);
2162 emit_byte(offset);
2163 }
2164 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2165
2166 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2167 {
2168 Dif(!isbyte(offset)) abort();
2169 emit_byte(0x8a);
2170 emit_byte(0x40+8*d+s);
2171 emit_byte(offset);
2172 }
2173 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2174
2175 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2176 {
2177 emit_byte(0x8b);
2178 emit_byte(0x80+8*d+s);
2179 emit_long(offset);
2180 }
2181 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2182
2183 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2184 {
2185 emit_byte(0x66);
2186 emit_byte(0x8b);
2187 emit_byte(0x80+8*d+s);
2188 emit_long(offset);
2189 }
2190 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2191
2192 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2193 {
2194 emit_byte(0x8a);
2195 emit_byte(0x80+8*d+s);
2196 emit_long(offset);
2197 }
2198 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2199
2200 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2201 {
2202 Dif(!isbyte(offset)) abort();
2203 emit_byte(0xc7);
2204 emit_byte(0x40+d);
2205 emit_byte(offset);
2206 emit_long(i);
2207 }
2208 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2209
2210 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2211 {
2212 Dif(!isbyte(offset)) abort();
2213 emit_byte(0x66);
2214 emit_byte(0xc7);
2215 emit_byte(0x40+d);
2216 emit_byte(offset);
2217 emit_word(i);
2218 }
2219 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2220
2221 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2222 {
2223 Dif(!isbyte(offset)) abort();
2224 emit_byte(0xc6);
2225 emit_byte(0x40+d);
2226 emit_byte(offset);
2227 emit_byte(i);
2228 }
2229 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2230
2231 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2232 {
2233 Dif(!isbyte(offset)) abort();
2234 emit_byte(0x89);
2235 emit_byte(0x40+8*s+d);
2236 emit_byte(offset);
2237 }
2238 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2239
2240 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2241 {
2242 Dif(!isbyte(offset)) abort();
2243 emit_byte(0x66);
2244 emit_byte(0x89);
2245 emit_byte(0x40+8*s+d);
2246 emit_byte(offset);
2247 }
2248 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2249
2250 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2251 {
2252 Dif(!isbyte(offset)) abort();
2253 emit_byte(0x88);
2254 emit_byte(0x40+8*s+d);
2255 emit_byte(offset);
2256 }
2257 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2258
2259 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2260 {
2261 if (optimize_imm8 && isbyte(offset)) {
2262 emit_byte(0x8d);
2263 emit_byte(0x40+8*d+s);
2264 emit_byte(offset);
2265 }
2266 else {
2267 emit_byte(0x8d);
2268 emit_byte(0x80+8*d+s);
2269 emit_long(offset);
2270 }
2271 }
2272 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2273
2274 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2275 {
2276 int fi;
2277
2278 switch(factor) {
2279 case 1: fi=0; break;
2280 case 2: fi=1; break;
2281 case 4: fi=2; break;
2282 case 8: fi=3; break;
2283 default: abort();
2284 }
2285
2286 if (optimize_imm8 && isbyte(offset)) {
2287 emit_byte(0x8d);
2288 emit_byte(0x44+8*d);
2289 emit_byte(0x40*fi+8*index+s);
2290 emit_byte(offset);
2291 }
2292 else {
2293 emit_byte(0x8d);
2294 emit_byte(0x84+8*d);
2295 emit_byte(0x40*fi+8*index+s);
2296 emit_long(offset);
2297 }
2298 }
2299 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2300
2301 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2302 {
2303 int isebp=(s==5)?0x40:0;
2304 int fi;
2305
2306 switch(factor) {
2307 case 1: fi=0; break;
2308 case 2: fi=1; break;
2309 case 4: fi=2; break;
2310 case 8: fi=3; break;
2311 default: abort();
2312 }
2313
2314 emit_byte(0x8d);
2315 emit_byte(0x04+8*d+isebp);
2316 emit_byte(0x40*fi+8*index+s);
2317 if (isebp)
2318 emit_byte(0);
2319 }
2320 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2321
2322 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2323 {
2324 if (optimize_imm8 && isbyte(offset)) {
2325 emit_byte(0x89);
2326 emit_byte(0x40+8*s+d);
2327 emit_byte(offset);
2328 }
2329 else {
2330 emit_byte(0x89);
2331 emit_byte(0x80+8*s+d);
2332 emit_long(offset);
2333 }
2334 }
2335 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2336
2337 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2338 {
2339 emit_byte(0x66);
2340 emit_byte(0x89);
2341 emit_byte(0x80+8*s+d);
2342 emit_long(offset);
2343 }
2344 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2345
2346 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2347 {
2348 if (optimize_imm8 && isbyte(offset)) {
2349 emit_byte(0x88);
2350 emit_byte(0x40+8*s+d);
2351 emit_byte(offset);
2352 }
2353 else {
2354 emit_byte(0x88);
2355 emit_byte(0x80+8*s+d);
2356 emit_long(offset);
2357 }
2358 }
2359 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2360
2361 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2362 {
2363 emit_byte(0x0f);
2364 emit_byte(0xc8+r);
2365 }
2366 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2367
2368 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2369 {
2370 emit_byte(0x66);
2371 emit_byte(0xc1);
2372 emit_byte(0xc0+r);
2373 emit_byte(0x08);
2374 }
2375 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2376
2377 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2378 {
2379 emit_byte(0x89);
2380 emit_byte(0xc0+8*s+d);
2381 }
2382 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2383
2384 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2385 {
2386 emit_byte(0x89);
2387 emit_byte(0x05+8*s);
2388 emit_long(d);
2389 }
2390 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2391
2392 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2393 {
2394 emit_byte(0x66);
2395 emit_byte(0x89);
2396 emit_byte(0x05+8*s);
2397 emit_long(d);
2398 }
2399 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2400
2401 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2402 {
2403 emit_byte(0x66);
2404 emit_byte(0x8b);
2405 emit_byte(0x05+8*d);
2406 emit_long(s);
2407 }
2408 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2409
2410 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2411 {
2412 emit_byte(0x88);
2413 emit_byte(0x05+8*s);
2414 emit_long(d);
2415 }
2416 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2417
2418 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2419 {
2420 emit_byte(0x8a);
2421 emit_byte(0x05+8*d);
2422 emit_long(s);
2423 }
2424 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2425
2426 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2427 {
2428 emit_byte(0xb8+d);
2429 emit_long(s);
2430 }
2431 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2432
2433 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2434 {
2435 emit_byte(0x66);
2436 emit_byte(0xb8+d);
2437 emit_word(s);
2438 }
2439 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2440
2441 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2442 {
2443 emit_byte(0xb0+d);
2444 emit_byte(s);
2445 }
2446 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2447
2448 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2449 {
2450 emit_byte(0x81);
2451 emit_byte(0x15);
2452 emit_long(d);
2453 emit_long(s);
2454 }
2455 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2456
2457 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2458 {
2459 if (optimize_imm8 && isbyte(s)) {
2460 emit_byte(0x83);
2461 emit_byte(0x05);
2462 emit_long(d);
2463 emit_byte(s);
2464 }
2465 else {
2466 emit_byte(0x81);
2467 emit_byte(0x05);
2468 emit_long(d);
2469 emit_long(s);
2470 }
2471 }
2472 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2473
2474 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2475 {
2476 emit_byte(0x66);
2477 emit_byte(0x81);
2478 emit_byte(0x05);
2479 emit_long(d);
2480 emit_word(s);
2481 }
2482 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2483
2484 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2485 {
2486 emit_byte(0x80);
2487 emit_byte(0x05);
2488 emit_long(d);
2489 emit_byte(s);
2490 }
2491 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2492
2493 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2494 {
2495 if (optimize_accum && isaccum(d))
2496 emit_byte(0xa9);
2497 else {
2498 emit_byte(0xf7);
2499 emit_byte(0xc0+d);
2500 }
2501 emit_long(i);
2502 }
2503 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2504
2505 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2506 {
2507 emit_byte(0x85);
2508 emit_byte(0xc0+8*s+d);
2509 }
2510 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2511
2512 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2513 {
2514 emit_byte(0x66);
2515 emit_byte(0x85);
2516 emit_byte(0xc0+8*s+d);
2517 }
2518 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2519
2520 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2521 {
2522 emit_byte(0x84);
2523 emit_byte(0xc0+8*s+d);
2524 }
2525 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2526
2527 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2528 {
2529 emit_byte(0x81);
2530 emit_byte(0xf0+d);
2531 emit_long(i);
2532 }
2533 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2534
2535 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2536 {
2537 if (optimize_imm8 && isbyte(i)) {
2538 emit_byte(0x83);
2539 emit_byte(0xe0+d);
2540 emit_byte(i);
2541 }
2542 else {
2543 if (optimize_accum && isaccum(d))
2544 emit_byte(0x25);
2545 else {
2546 emit_byte(0x81);
2547 emit_byte(0xe0+d);
2548 }
2549 emit_long(i);
2550 }
2551 }
2552 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2553
2554 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2555 {
2556 emit_byte(0x66);
2557 if (optimize_imm8 && isbyte(i)) {
2558 emit_byte(0x83);
2559 emit_byte(0xe0+d);
2560 emit_byte(i);
2561 }
2562 else {
2563 if (optimize_accum && isaccum(d))
2564 emit_byte(0x25);
2565 else {
2566 emit_byte(0x81);
2567 emit_byte(0xe0+d);
2568 }
2569 emit_word(i);
2570 }
2571 }
2572 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2573
2574 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2575 {
2576 emit_byte(0x21);
2577 emit_byte(0xc0+8*s+d);
2578 }
2579 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2580
2581 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2582 {
2583 emit_byte(0x66);
2584 emit_byte(0x21);
2585 emit_byte(0xc0+8*s+d);
2586 }
2587 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2588
2589 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2590 {
2591 emit_byte(0x20);
2592 emit_byte(0xc0+8*s+d);
2593 }
2594 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2595
2596 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2597 {
2598 if (optimize_imm8 && isbyte(i)) {
2599 emit_byte(0x83);
2600 emit_byte(0xc8+d);
2601 emit_byte(i);
2602 }
2603 else {
2604 if (optimize_accum && isaccum(d))
2605 emit_byte(0x0d);
2606 else {
2607 emit_byte(0x81);
2608 emit_byte(0xc8+d);
2609 }
2610 emit_long(i);
2611 }
2612 }
2613 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2614
2615 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2616 {
2617 emit_byte(0x09);
2618 emit_byte(0xc0+8*s+d);
2619 }
2620 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2621
2622 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2623 {
2624 emit_byte(0x66);
2625 emit_byte(0x09);
2626 emit_byte(0xc0+8*s+d);
2627 }
2628 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2629
2630 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2631 {
2632 emit_byte(0x08);
2633 emit_byte(0xc0+8*s+d);
2634 }
2635 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2636
2637 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2638 {
2639 emit_byte(0x11);
2640 emit_byte(0xc0+8*s+d);
2641 }
2642 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2643
2644 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2645 {
2646 emit_byte(0x66);
2647 emit_byte(0x11);
2648 emit_byte(0xc0+8*s+d);
2649 }
2650 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2651
2652 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2653 {
2654 emit_byte(0x10);
2655 emit_byte(0xc0+8*s+d);
2656 }
2657 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2658
2659 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2660 {
2661 emit_byte(0x01);
2662 emit_byte(0xc0+8*s+d);
2663 }
2664 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2665
2666 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2667 {
2668 emit_byte(0x66);
2669 emit_byte(0x01);
2670 emit_byte(0xc0+8*s+d);
2671 }
2672 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2673
2674 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2675 {
2676 emit_byte(0x00);
2677 emit_byte(0xc0+8*s+d);
2678 }
2679 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2680
2681 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2682 {
2683 if (isbyte(i)) {
2684 emit_byte(0x83);
2685 emit_byte(0xe8+d);
2686 emit_byte(i);
2687 }
2688 else {
2689 if (optimize_accum && isaccum(d))
2690 emit_byte(0x2d);
2691 else {
2692 emit_byte(0x81);
2693 emit_byte(0xe8+d);
2694 }
2695 emit_long(i);
2696 }
2697 }
2698 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2699
2700 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2701 {
2702 if (optimize_accum && isaccum(d))
2703 emit_byte(0x2c);
2704 else {
2705 emit_byte(0x80);
2706 emit_byte(0xe8+d);
2707 }
2708 emit_byte(i);
2709 }
2710 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2711
2712 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2713 {
2714 if (isbyte(i)) {
2715 emit_byte(0x83);
2716 emit_byte(0xc0+d);
2717 emit_byte(i);
2718 }
2719 else {
2720 if (optimize_accum && isaccum(d))
2721 emit_byte(0x05);
2722 else {
2723 emit_byte(0x81);
2724 emit_byte(0xc0+d);
2725 }
2726 emit_long(i);
2727 }
2728 }
2729 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2730
2731 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2732 {
2733 emit_byte(0x66);
2734 if (isbyte(i)) {
2735 emit_byte(0x83);
2736 emit_byte(0xc0+d);
2737 emit_byte(i);
2738 }
2739 else {
2740 if (optimize_accum && isaccum(d))
2741 emit_byte(0x05);
2742 else {
2743 emit_byte(0x81);
2744 emit_byte(0xc0+d);
2745 }
2746 emit_word(i);
2747 }
2748 }
2749 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2750
2751 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2752 {
2753 if (optimize_accum && isaccum(d))
2754 emit_byte(0x04);
2755 else {
2756 emit_byte(0x80);
2757 emit_byte(0xc0+d);
2758 }
2759 emit_byte(i);
2760 }
2761 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2762
2763 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2764 {
2765 emit_byte(0x19);
2766 emit_byte(0xc0+8*s+d);
2767 }
2768 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2769
2770 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2771 {
2772 emit_byte(0x66);
2773 emit_byte(0x19);
2774 emit_byte(0xc0+8*s+d);
2775 }
2776 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2777
2778 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2779 {
2780 emit_byte(0x18);
2781 emit_byte(0xc0+8*s+d);
2782 }
2783 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2784
2785 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2786 {
2787 emit_byte(0x29);
2788 emit_byte(0xc0+8*s+d);
2789 }
2790 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2791
2792 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2793 {
2794 emit_byte(0x66);
2795 emit_byte(0x29);
2796 emit_byte(0xc0+8*s+d);
2797 }
2798 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2799
2800 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2801 {
2802 emit_byte(0x28);
2803 emit_byte(0xc0+8*s+d);
2804 }
2805 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2806
2807 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2808 {
2809 emit_byte(0x39);
2810 emit_byte(0xc0+8*s+d);
2811 }
2812 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2813
2814 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2815 {
2816 if (optimize_imm8 && isbyte(i)) {
2817 emit_byte(0x83);
2818 emit_byte(0xf8+r);
2819 emit_byte(i);
2820 }
2821 else {
2822 if (optimize_accum && isaccum(r))
2823 emit_byte(0x3d);
2824 else {
2825 emit_byte(0x81);
2826 emit_byte(0xf8+r);
2827 }
2828 emit_long(i);
2829 }
2830 }
2831 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2832
2833 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2834 {
2835 emit_byte(0x66);
2836 emit_byte(0x39);
2837 emit_byte(0xc0+8*s+d);
2838 }
2839 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2840
2841 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2842 {
2843 emit_byte(0x80);
2844 emit_byte(0x3d);
2845 emit_long(d);
2846 emit_byte(s);
2847 }
2848 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2849
2850 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2851 {
2852 if (optimize_accum && isaccum(d))
2853 emit_byte(0x3c);
2854 else {
2855 emit_byte(0x80);
2856 emit_byte(0xf8+d);
2857 }
2858 emit_byte(i);
2859 }
2860 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2861
2862 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2863 {
2864 emit_byte(0x38);
2865 emit_byte(0xc0+8*s+d);
2866 }
2867 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2868
2869 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2870 {
2871 int fi;
2872
2873 switch(factor) {
2874 case 1: fi=0; break;
2875 case 2: fi=1; break;
2876 case 4: fi=2; break;
2877 case 8: fi=3; break;
2878 default: abort();
2879 }
2880 emit_byte(0x39);
2881 emit_byte(0x04+8*d);
2882 emit_byte(5+8*index+0x40*fi);
2883 emit_long(offset);
2884 }
2885 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2886
2887 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2888 {
2889 emit_byte(0x31);
2890 emit_byte(0xc0+8*s+d);
2891 }
2892 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2893
2894 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2895 {
2896 emit_byte(0x66);
2897 emit_byte(0x31);
2898 emit_byte(0xc0+8*s+d);
2899 }
2900 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2901
2902 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2903 {
2904 emit_byte(0x30);
2905 emit_byte(0xc0+8*s+d);
2906 }
2907 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2908
2909 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2910 {
2911 if (optimize_imm8 && isbyte(s)) {
2912 emit_byte(0x83);
2913 emit_byte(0x2d);
2914 emit_long(d);
2915 emit_byte(s);
2916 }
2917 else {
2918 emit_byte(0x81);
2919 emit_byte(0x2d);
2920 emit_long(d);
2921 emit_long(s);
2922 }
2923 }
2924 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2925
2926 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2927 {
2928 if (optimize_imm8 && isbyte(s)) {
2929 emit_byte(0x83);
2930 emit_byte(0x3d);
2931 emit_long(d);
2932 emit_byte(s);
2933 }
2934 else {
2935 emit_byte(0x81);
2936 emit_byte(0x3d);
2937 emit_long(d);
2938 emit_long(s);
2939 }
2940 }
2941 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2942
2943 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2944 {
2945 emit_byte(0x87);
2946 emit_byte(0xc0+8*r1+r2);
2947 }
2948 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2949
2950 /*************************************************************************
2951 * FIXME: mem access modes probably wrong *
2952 *************************************************************************/
2953
2954 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2955 {
2956 emit_byte(0x9c);
2957 }
2958 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2959
2960 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2961 {
2962 emit_byte(0x9d);
2963 }
2964 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2965
2966 #endif
2967
2968 /*************************************************************************
2969 * Unoptimizable stuff --- jump *
2970 *************************************************************************/
2971
2972 static __inline__ void raw_call_r(R4 r)
2973 {
2974 #if USE_NEW_RTASM
2975 CALLsr(r);
2976 #else
2977 emit_byte(0xff);
2978 emit_byte(0xd0+r);
2979 #endif
2980 }
2981
2982 static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2983 {
2984 #if USE_NEW_RTASM
2985 CALLsm(base, X86_NOREG, r, m);
2986 #else
2987 int mu;
2988 switch(m) {
2989 case 1: mu=0; break;
2990 case 2: mu=1; break;
2991 case 4: mu=2; break;
2992 case 8: mu=3; break;
2993 default: abort();
2994 }
2995 emit_byte(0xff);
2996 emit_byte(0x14);
2997 emit_byte(0x05+8*r+0x40*mu);
2998 emit_long(base);
2999 #endif
3000 }
3001
3002 static __inline__ void raw_jmp_r(R4 r)
3003 {
3004 #if USE_NEW_RTASM
3005 JMPsr(r);
3006 #else
3007 emit_byte(0xff);
3008 emit_byte(0xe0+r);
3009 #endif
3010 }
3011
3012 static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3013 {
3014 #if USE_NEW_RTASM
3015 JMPsm(base, X86_NOREG, r, m);
3016 #else
3017 int mu;
3018 switch(m) {
3019 case 1: mu=0; break;
3020 case 2: mu=1; break;
3021 case 4: mu=2; break;
3022 case 8: mu=3; break;
3023 default: abort();
3024 }
3025 emit_byte(0xff);
3026 emit_byte(0x24);
3027 emit_byte(0x05+8*r+0x40*mu);
3028 emit_long(base);
3029 #endif
3030 }
3031
3032 static __inline__ void raw_jmp_m(uae_u32 base)
3033 {
3034 emit_byte(0xff);
3035 emit_byte(0x25);
3036 emit_long(base);
3037 }
3038
3039
3040 static __inline__ void raw_call(uae_u32 t)
3041 {
3042 #if USE_NEW_RTASM
3043 CALLm(t);
3044 #else
3045 emit_byte(0xe8);
3046 emit_long(t-(uae_u32)target-4);
3047 #endif
3048 }
3049
3050 static __inline__ void raw_jmp(uae_u32 t)
3051 {
3052 #if USE_NEW_RTASM
3053 JMPm(t);
3054 #else
3055 emit_byte(0xe9);
3056 emit_long(t-(uae_u32)target-4);
3057 #endif
3058 }
3059
3060 static __inline__ void raw_jl(uae_u32 t)
3061 {
3062 emit_byte(0x0f);
3063 emit_byte(0x8c);
3064 emit_long(t-(uintptr)target-4);
3065 }
3066
3067 static __inline__ void raw_jz(uae_u32 t)
3068 {
3069 emit_byte(0x0f);
3070 emit_byte(0x84);
3071 emit_long(t-(uintptr)target-4);
3072 }
3073
3074 static __inline__ void raw_jnz(uae_u32 t)
3075 {
3076 emit_byte(0x0f);
3077 emit_byte(0x85);
3078 emit_long(t-(uintptr)target-4);
3079 }
3080
3081 static __inline__ void raw_jnz_l_oponly(void)
3082 {
3083 emit_byte(0x0f);
3084 emit_byte(0x85);
3085 }
3086
3087 static __inline__ void raw_jcc_l_oponly(int cc)
3088 {
3089 emit_byte(0x0f);
3090 emit_byte(0x80+cc);
3091 }
3092
3093 static __inline__ void raw_jnz_b_oponly(void)
3094 {
3095 emit_byte(0x75);
3096 }
3097
3098 static __inline__ void raw_jz_b_oponly(void)
3099 {
3100 emit_byte(0x74);
3101 }
3102
3103 static __inline__ void raw_jcc_b_oponly(int cc)
3104 {
3105 emit_byte(0x70+cc);
3106 }
3107
3108 static __inline__ void raw_jmp_l_oponly(void)
3109 {
3110 emit_byte(0xe9);
3111 }
3112
3113 static __inline__ void raw_jmp_b_oponly(void)
3114 {
3115 emit_byte(0xeb);
3116 }
3117
3118 static __inline__ void raw_ret(void)
3119 {
3120 emit_byte(0xc3);
3121 }
3122
3123 static __inline__ void raw_nop(void)
3124 {
3125 emit_byte(0x90);
3126 }
3127
3128 static __inline__ void raw_emit_nop_filler(int nbytes)
3129 {
3130 /* Source: GNU Binutils 2.12.90.0.15 */
3131 /* Various efficient no-op patterns for aligning code labels.
3132 Note: Don't try to assemble the instructions in the comments.
3133 0L and 0w are not legal. */
3134 static const uae_u8 f32_1[] =
3135 {0x90}; /* nop */
3136 static const uae_u8 f32_2[] =
3137 {0x89,0xf6}; /* movl %esi,%esi */
3138 static const uae_u8 f32_3[] =
3139 {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3140 static const uae_u8 f32_4[] =
3141 {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3142 static const uae_u8 f32_5[] =
3143 {0x90, /* nop */
3144 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3145 static const uae_u8 f32_6[] =
3146 {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3147 static const uae_u8 f32_7[] =
3148 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3149 static const uae_u8 f32_8[] =
3150 {0x90, /* nop */
3151 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3152 static const uae_u8 f32_9[] =
3153 {0x89,0xf6, /* movl %esi,%esi */
3154 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3155 static const uae_u8 f32_10[] =
3156 {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3157 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3158 static const uae_u8 f32_11[] =
3159 {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3160 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3161 static const uae_u8 f32_12[] =
3162 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3163 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3164 static const uae_u8 f32_13[] =
3165 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3166 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3167 static const uae_u8 f32_14[] =
3168 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3169 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3170 static const uae_u8 f32_15[] =
3171 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3172 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3173 static const uae_u8 f32_16[] =
3174 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3175 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3176 static const uae_u8 *const f32_patt[] = {
3177 f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3178 f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3179 };
3180 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3181
3182 #if defined(__x86_64__)
3183 /* The recommended way to pad 64bit code is to use NOPs preceded by
3184 maximally four 0x66 prefixes. Balance the size of nops. */
3185 if (nbytes == 0)
3186 return;
3187
3188 int i;
3189 int nnops = (nbytes + 3) / 4;
3190 int len = nbytes / nnops;
3191 int remains = nbytes - nnops * len;
3192
3193 for (i = 0; i < remains; i++) {
3194 emit_block(prefixes, len);
3195 raw_nop();
3196 }
3197 for (; i < nnops; i++) {
3198 emit_block(prefixes, len - 1);
3199 raw_nop();
3200 }
3201 #else
3202 int nloops = nbytes / 16;
3203 while (nloops-- > 0)
3204 emit_block(f32_16, sizeof(f32_16));
3205
3206 nbytes %= 16;
3207 if (nbytes)
3208 emit_block(f32_patt[nbytes - 1], nbytes);
3209 #endif
3210 }
3211
3212
3213 /*************************************************************************
3214 * Flag handling, to and fro UAE flag register *
3215 *************************************************************************/
3216
3217 #ifdef SAHF_SETO_PROFITABLE
3218
3219 #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3220
3221 static __inline__ void raw_flags_to_reg(int r)
3222 {
3223 raw_lahf(0); /* Most flags in AH */
3224 //raw_setcc(r,0); /* V flag in AL */
3225 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3226
3227 #if 1 /* Let's avoid those nasty partial register stalls */
3228 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3229 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3230 //live.state[FLAGTMP].status=CLEAN;
3231 live.state[FLAGTMP].status=INMEM;
3232 live.state[FLAGTMP].realreg=-1;
3233 /* We just "evicted" FLAGTMP. */
3234 if (live.nat[r].nholds!=1) {
3235 /* Huh? */
3236 abort();
3237 }
3238 live.nat[r].nholds=0;
3239 #endif
3240 }
3241
3242 #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3243 static __inline__ void raw_reg_to_flags(int r)
3244 {
3245 raw_cmp_b_ri(r,-127); /* set V */
3246 raw_sahf(0);
3247 }
3248
3249 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3250 static __inline__ void raw_flags_set_zero(int s, int tmp)
3251 {
3252 raw_mov_l_rr(tmp,s);
3253 raw_lahf(s); /* flags into ah */
3254 raw_and_l_ri(s,0xffffbfff);
3255 raw_and_l_ri(tmp,0x00004000);
3256 raw_xor_l_ri(tmp,0x00004000);
3257 raw_or_l(s,tmp);
3258 raw_sahf(s);
3259 }
3260
3261 #else
3262
3263 #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3264 static __inline__ void raw_flags_to_reg(int r)
3265 {
3266 raw_pushfl();
3267 raw_pop_l_r(r);
3268 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3269 // live.state[FLAGTMP].status=CLEAN;
3270 live.state[FLAGTMP].status=INMEM;
3271 live.state[FLAGTMP].realreg=-1;
3272 /* We just "evicted" FLAGTMP. */
3273 if (live.nat[r].nholds!=1) {
3274 /* Huh? */
3275 abort();
3276 }
3277 live.nat[r].nholds=0;
3278 }
3279
3280 #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3281 static __inline__ void raw_reg_to_flags(int r)
3282 {
3283 raw_push_l_r(r);
3284 raw_popfl();
3285 }
3286
3287 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3288 static __inline__ void raw_flags_set_zero(int s, int tmp)
3289 {
3290 raw_mov_l_rr(tmp,s);
3291 raw_pushfl();
3292 raw_pop_l_r(s);
3293 raw_and_l_ri(s,0xffffffbf);
3294 raw_and_l_ri(tmp,0x00000040);
3295 raw_xor_l_ri(tmp,0x00000040);
3296 raw_or_l(s,tmp);
3297 raw_push_l_r(s);
3298 raw_popfl();
3299 }
3300 #endif
3301
3302 /* Apparently, there are enough instructions between flag store and
3303 flag reload to avoid the partial memory stall */
3304 static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3305 {
3306 #if 1
3307 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3308 #else
3309 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3310 raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3311 #endif
3312 }
3313
3314 /* FLAGX is byte sized, and we *do* write it at that size */
3315 static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3316 {
3317 if (live.nat[target].canbyte)
3318 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3319 else if (live.nat[target].canword)
3320 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3321 else
3322 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3323 }
3324
3325 static __inline__ void raw_inc_sp(int off)
3326 {
3327 raw_add_l_ri(ESP_INDEX,off);
3328 }
3329
3330 /*************************************************************************
3331 * Handling mistaken direct memory access *
3332 *************************************************************************/
3333
3334 // gb-- I don't need that part for JIT Basilisk II
3335 #if defined(NATMEM_OFFSET) && 0
3336 #include <asm/sigcontext.h>
3337 #include <signal.h>
3338
3339 #define SIG_READ 1
3340 #define SIG_WRITE 2
3341
3342 static int in_handler=0;
3343 static uae_u8 veccode[256];
3344
3345 static void vec(int x, struct sigcontext sc)
3346 {
3347 uae_u8* i=(uae_u8*)sc.eip;
3348 uae_u32 addr=sc.cr2;
3349 int r=-1;
3350 int size=4;
3351 int dir=-1;
3352 int len=0;
3353 int j;
3354
3355 write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3356 if (!canbang)
3357 write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3358 if (in_handler)
3359 write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3360
3361 if (canbang && i>=compiled_code && i<=current_compile_p) {
3362 if (*i==0x66) {
3363 i++;
3364 size=2;
3365 len++;
3366 }
3367
3368 switch(i[0]) {
3369 case 0x8a:
3370 if ((i[1]&0xc0)==0x80) {
3371 r=(i[1]>>3)&7;
3372 dir=SIG_READ;
3373 size=1;
3374 len+=6;
3375 break;
3376 }
3377 break;
3378 case 0x88:
3379 if ((i[1]&0xc0)==0x80) {
3380 r=(i[1]>>3)&7;
3381 dir=SIG_WRITE;
3382 size=1;
3383 len+=6;
3384 break;
3385 }
3386 break;
3387 case 0x8b:
3388 if ((i[1]&0xc0)==0x80) {
3389 r=(i[1]>>3)&7;
3390 dir=SIG_READ;
3391 len+=6;
3392 break;
3393 }
3394 if ((i[1]&0xc0)==0x40) {
3395 r=(i[1]>>3)&7;
3396 dir=SIG_READ;
3397 len+=3;
3398 break;
3399 }
3400 break;
3401 case 0x89:
3402 if ((i[1]&0xc0)==0x80) {
3403 r=(i[1]>>3)&7;
3404 dir=SIG_WRITE;
3405 len+=6;
3406 break;
3407 }
3408 if ((i[1]&0xc0)==0x40) {
3409 r=(i[1]>>3)&7;
3410 dir=SIG_WRITE;
3411 len+=3;
3412 break;
3413 }
3414 break;
3415 }
3416 }
3417
3418 if (r!=-1) {
3419 void* pr=NULL;
3420 write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3421
3422 switch(r) {
3423 case 0: pr=&(sc.eax); break;
3424 case 1: pr=&(sc.ecx); break;
3425 case 2: pr=&(sc.edx); break;
3426 case 3: pr=&(sc.ebx); break;
3427 case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3428 case 5: pr=(size>1)?
3429 (void*)(&(sc.ebp)):
3430 (void*)(((uae_u8*)&(sc.ecx))+1); break;
3431 case 6: pr=(size>1)?
3432 (void*)(&(sc.esi)):
3433 (void*)(((uae_u8*)&(sc.edx))+1); break;
3434 case 7: pr=(size>1)?
3435 (void*)(&(sc.edi)):
3436 (void*)(((uae_u8*)&(sc.ebx))+1); break;
3437 default: abort();
3438 }
3439 if (pr) {
3440 blockinfo* bi;
3441
3442 if (currprefs.comp_oldsegv) {
3443 addr-=NATMEM_OFFSET;
3444
3445 if ((addr>=0x10000000 && addr<0x40000000) ||
3446 (addr>=0x50000000)) {
3447 write_log("Suspicious address in %x SEGV handler.\n",addr);
3448 }
3449 if (dir==SIG_READ) {
3450 switch(size) {
3451 case 1: *((uae_u8*)pr)=get_byte(addr); break;
3452 case 2: *((uae_u16*)pr)=get_word(addr); break;
3453 case 4: *((uae_u32*)pr)=get_long(addr); break;
3454 default: abort();
3455 }
3456 }
3457 else { /* write */
3458 switch(size) {
3459 case 1: put_byte(addr,*((uae_u8*)pr)); break;
3460 case 2: put_word(addr,*((uae_u16*)pr)); break;
3461 case 4: put_long(addr,*((uae_u32*)pr)); break;
3462 default: abort();
3463 }
3464 }
3465 write_log("Handled one access!\n");
3466 fflush(stdout);
3467 segvcount++;
3468 sc.eip+=len;
3469 }
3470 else {
3471 void* tmp=target;
3472 int i;
3473 uae_u8 vecbuf[5];
3474
3475 addr-=NATMEM_OFFSET;
3476
3477 if ((addr>=0x10000000 && addr<0x40000000) ||
3478 (addr>=0x50000000)) {
3479 write_log("Suspicious address in %x SEGV handler.\n",addr);
3480 }
3481
3482 target=(uae_u8*)sc.eip;
3483 for (i=0;i<5;i++)
3484 vecbuf[i]=target[i];
3485 emit_byte(0xe9);
3486 emit_long((uintptr)veccode-(uintptr)target-4);
3487 write_log("Create jump to %p\n",veccode);
3488
3489 write_log("Handled one access!\n");
3490 fflush(stdout);
3491 segvcount++;
3492
3493 target=veccode;
3494
3495 if (dir==SIG_READ) {
3496 switch(size) {
3497 case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3498 case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3499 case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3500 default: abort();
3501 }
3502 }
3503 else { /* write */
3504 switch(size) {
3505 case 1: put_byte(addr,*((uae_u8*)pr)); break;
3506 case 2: put_word(addr,*((uae_u16*)pr)); break;
3507 case 4: put_long(addr,*((uae_u32*)pr)); break;
3508 default: abort();
3509 }
3510 }
3511 for (i=0;i<5;i++)
3512 raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3513 raw_mov_l_mi((uintptr)&in_handler,0);
3514 emit_byte(0xe9);
3515 emit_long(sc.eip+len-(uintptr)target-4);
3516 in_handler=1;
3517 target=tmp;
3518 }
3519 bi=active;
3520 while (bi) {
3521 if (bi->handler &&
3522 (uae_u8*)bi->direct_handler<=i &&
3523 (uae_u8*)bi->nexthandler>i) {
3524 write_log("deleted trigger (%p<%p<%p) %p\n",
3525 bi->handler,
3526 i,
3527 bi->nexthandler,
3528 bi->pc_p);
3529 invalidate_block(bi);
3530 raise_in_cl_list(bi);
3531 set_special(0);
3532 return;
3533 }
3534 bi=bi->next;
3535 }
3536 /* Not found in the active list. Might be a rom routine that
3537 is in the dormant list */
3538 bi=dormant;
3539 while (bi) {
3540 if (bi->handler &&
3541 (uae_u8*)bi->direct_handler<=i &&
3542 (uae_u8*)bi->nexthandler>i) {
3543 write_log("deleted trigger (%p<%p<%p) %p\n",
3544 bi->handler,
3545 i,
3546 bi->nexthandler,
3547 bi->pc_p);
3548 invalidate_block(bi);
3549 raise_in_cl_list(bi);
3550 set_special(0);
3551 return;
3552 }
3553 bi=bi->next;
3554 }
3555 write_log("Huh? Could not find trigger!\n");
3556 return;
3557 }
3558 }
3559 write_log("Can't handle access!\n");
3560 for (j=0;j<10;j++) {
3561 write_log("instruction byte %2d is %02x\n",j,i[j]);
3562 }
3563 write_log("Please send the above info (starting at \"fault address\") to\n"
3564 "bmeyer@csse.monash.edu.au\n"
3565 "This shouldn't happen ;-)\n");
3566 fflush(stdout);
3567 signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3568 }
3569 #endif
3570
3571
3572 /*************************************************************************
3573 * Checking for CPU features *
3574 *************************************************************************/
3575
3576 struct cpuinfo_x86 {
3577 uae_u8 x86; // CPU family
3578 uae_u8 x86_vendor; // CPU vendor
3579 uae_u8 x86_processor; // CPU canonical processor type
3580 uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3581 uae_u32 x86_hwcap;
3582 uae_u8 x86_model;
3583 uae_u8 x86_mask;
3584 int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3585 char x86_vendor_id[16];
3586 };
3587 struct cpuinfo_x86 cpuinfo;
3588
3589 enum {
3590 X86_VENDOR_INTEL = 0,
3591 X86_VENDOR_CYRIX = 1,
3592 X86_VENDOR_AMD = 2,
3593 X86_VENDOR_UMC = 3,
3594 X86_VENDOR_NEXGEN = 4,
3595 X86_VENDOR_CENTAUR = 5,
3596 X86_VENDOR_RISE = 6,
3597 X86_VENDOR_TRANSMETA = 7,
3598 X86_VENDOR_NSC = 8,
3599 X86_VENDOR_UNKNOWN = 0xff
3600 };
3601
3602 enum {
3603 X86_PROCESSOR_I386, /* 80386 */
3604 X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3605 X86_PROCESSOR_PENTIUM,
3606 X86_PROCESSOR_PENTIUMPRO,
3607 X86_PROCESSOR_K6,
3608 X86_PROCESSOR_ATHLON,
3609 X86_PROCESSOR_PENTIUM4,
3610 X86_PROCESSOR_K8,
3611 X86_PROCESSOR_max
3612 };
3613
3614 static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3615 "80386",
3616 "80486",
3617 "Pentium",
3618 "PentiumPro",
3619 "K6",
3620 "Athlon",
3621 "Pentium4",
3622 "K8"
3623 };
3624
3625 static struct ptt {
3626 const int align_loop;
3627 const int align_loop_max_skip;
3628 const int align_jump;
3629 const int align_jump_max_skip;
3630 const int align_func;
3631 }
3632 x86_alignments[X86_PROCESSOR_max] = {
3633 { 4, 3, 4, 3, 4 },
3634 { 16, 15, 16, 15, 16 },
3635 { 16, 7, 16, 7, 16 },
3636 { 16, 15, 16, 7, 16 },
3637 { 32, 7, 32, 7, 32 },
3638 { 16, 7, 16, 7, 16 },
3639 { 0, 0, 0, 0, 0 },
3640 { 16, 7, 16, 7, 16 }
3641 };
3642
3643 static void
3644 x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3645 {
3646 char *v = c->x86_vendor_id;
3647
3648 if (!strcmp(v, "GenuineIntel"))
3649 c->x86_vendor = X86_VENDOR_INTEL;
3650 else if (!strcmp(v, "AuthenticAMD"))
3651 c->x86_vendor = X86_VENDOR_AMD;
3652 else if (!strcmp(v, "CyrixInstead"))
3653 c->x86_vendor = X86_VENDOR_CYRIX;
3654 else if (!strcmp(v, "Geode by NSC"))
3655 c->x86_vendor = X86_VENDOR_NSC;
3656 else if (!strcmp(v, "UMC UMC UMC "))
3657 c->x86_vendor = X86_VENDOR_UMC;
3658 else if (!strcmp(v, "CentaurHauls"))
3659 c->x86_vendor = X86_VENDOR_CENTAUR;
3660 else if (!strcmp(v, "NexGenDriven"))
3661 c->x86_vendor = X86_VENDOR_NEXGEN;
3662 else if (!strcmp(v, "RiseRiseRise"))
3663 c->x86_vendor = X86_VENDOR_RISE;
3664 else if (!strcmp(v, "GenuineTMx86") ||
3665 !strcmp(v, "TransmetaCPU"))
3666 c->x86_vendor = X86_VENDOR_TRANSMETA;
3667 else
3668 c->x86_vendor = X86_VENDOR_UNKNOWN;
3669 }
3670
3671 static void
3672 cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3673 {
3674 static uae_u8 cpuid_space[256];
3675 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3676 uae_u8* tmp=get_target();
3677
3678 s_op = op;
3679 set_target(cpuid_space);
3680 raw_push_l_r(0); /* eax */
3681 raw_push_l_r(1); /* ecx */
3682 raw_push_l_r(2); /* edx */
3683 raw_push_l_r(3); /* ebx */
3684 raw_mov_l_rm(0,(uintptr)&s_op);
3685 raw_cpuid(0);
3686 raw_mov_l_mr((uintptr)&s_eax,0);
3687 raw_mov_l_mr((uintptr)&s_ebx,3);
3688 raw_mov_l_mr((uintptr)&s_ecx,1);
3689 raw_mov_l_mr((uintptr)&s_edx,2);
3690 raw_pop_l_r(3);
3691 raw_pop_l_r(2);
3692 raw_pop_l_r(1);
3693 raw_pop_l_r(0);
3694 raw_ret();
3695 set_target(tmp);
3696
3697 ((cpuop_func*)cpuid_space)(0);
3698 if (eax != NULL) *eax = s_eax;
3699 if (ebx != NULL) *ebx = s_ebx;
3700 if (ecx != NULL) *ecx = s_ecx;
3701 if (edx != NULL) *edx = s_edx;
3702 }
3703
3704 static void
3705 raw_init_cpu(void)
3706 {
3707 struct cpuinfo_x86 *c = &cpuinfo;
3708
3709 /* Defaults */
3710 c->x86_processor = X86_PROCESSOR_max;
3711 c->x86_vendor = X86_VENDOR_UNKNOWN;
3712 c->cpuid_level = -1; /* CPUID not detected */
3713 c->x86_model = c->x86_mask = 0; /* So far unknown... */
3714 c->x86_vendor_id[0] = '\0'; /* Unset */
3715 c->x86_hwcap = 0;
3716
3717 /* Get vendor name */
3718 c->x86_vendor_id[12] = '\0';
3719 cpuid(0x00000000,
3720 (uae_u32 *)&c->cpuid_level,
3721 (uae_u32 *)&c->x86_vendor_id[0],
3722 (uae_u32 *)&c->x86_vendor_id[8],
3723 (uae_u32 *)&c->x86_vendor_id[4]);
3724 x86_get_cpu_vendor(c);
3725
3726 /* Intel-defined flags: level 0x00000001 */
3727 c->x86_brand_id = 0;
3728 if ( c->cpuid_level >= 0x00000001 ) {
3729 uae_u32 tfms, brand_id;
3730 cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3731 c->x86 = (tfms >> 8) & 15;
3732 c->x86_model = (tfms >> 4) & 15;
3733 c->x86_brand_id = brand_id & 0xff;
3734 if ( (c->x86_vendor == X86_VENDOR_AMD) &&
3735 (c->x86 == 0xf)) {
3736 /* AMD Extended Family and Model Values */
3737 c->x86 += (tfms >> 20) & 0xff;
3738 c->x86_model += (tfms >> 12) & 0xf0;
3739 }
3740 c->x86_mask = tfms & 15;
3741 } else {
3742 /* Have CPUID level 0 only - unheard of */
3743 c->x86 = 4;
3744 }
3745
3746 /* AMD-defined flags: level 0x80000001 */
3747 uae_u32 xlvl;
3748 cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3749 if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3750 if ( xlvl >= 0x80000001 ) {
3751 uae_u32 features;
3752 cpuid(0x80000001, NULL, NULL, NULL, &features);
3753 if (features & (1 << 29)) {
3754 /* Assume x86-64 if long mode is supported */
3755 c->x86_processor = X86_PROCESSOR_K8;
3756 }
3757 }
3758 }
3759
3760 /* Canonicalize processor ID */
3761 switch (c->x86) {
3762 case 3:
3763 c->x86_processor = X86_PROCESSOR_I386;
3764 break;
3765 case 4:
3766 c->x86_processor = X86_PROCESSOR_I486;
3767 break;
3768 case 5:
3769 if (c->x86_vendor == X86_VENDOR_AMD)
3770 c->x86_processor = X86_PROCESSOR_K6;
3771 else
3772 c->x86_processor = X86_PROCESSOR_PENTIUM;
3773 break;
3774 case 6:
3775 if (c->x86_vendor == X86_VENDOR_AMD)
3776 c->x86_processor = X86_PROCESSOR_ATHLON;
3777 else
3778 c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3779 break;
3780 case 15:
3781 if (c->x86_vendor == X86_VENDOR_INTEL) {
3782 /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3783 if (c->x86_brand_id >= 8)
3784 c->x86_processor = X86_PROCESSOR_PENTIUM4;
3785 }
3786 if (c->x86_vendor == X86_VENDOR_AMD) {
3787 /* Assume an Athlon processor if family == 15 and it was not
3788 detected as an x86-64 so far */
3789 if (c->x86_processor == X86_PROCESSOR_max)
3790 c->x86_processor = X86_PROCESSOR_ATHLON;
3791 }
3792 break;
3793 }
3794 if (c->x86_processor == X86_PROCESSOR_max) {
3795 fprintf(stderr, "Error: unknown processor type\n");
3796 fprintf(stderr, " Family : %d\n", c->x86);
3797 fprintf(stderr, " Model : %d\n", c->x86_model);
3798 fprintf(stderr, " Mask : %d\n", c->x86_mask);
3799 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3800 if (c->x86_brand_id)
3801 fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3802 abort();
3803 }
3804
3805 /* Have CMOV support? */
3806 have_cmov = c->x86_hwcap & (1 << 15);
3807
3808 /* Can the host CPU suffer from partial register stalls? */
3809 have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3810 #if 1
3811 /* It appears that partial register writes are a bad idea even on
3812 AMD K7 cores, even though they are not supposed to have the
3813 dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3814 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3815 have_rat_stall = true;
3816 #endif
3817
3818 /* Alignments */
3819 if (tune_alignment) {
3820 align_loops = x86_alignments[c->x86_processor].align_loop;
3821 align_jumps = x86_alignments[c->x86_processor].align_jump;
3822 }
3823
3824 write_log("Max CPUID level=%d Processor is %s [%s]\n",
3825 c->cpuid_level, c->x86_vendor_id,
3826 x86_processor_string_table[c->x86_processor]);
3827 }
3828
3829 static bool target_check_bsf(void)
3830 {
3831 bool mismatch = false;
3832 for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3833 for (int g_CF = 0; g_CF <= 1; g_CF++) {
3834 for (int g_OF = 0; g_OF <= 1; g_OF++) {
3835 for (int g_SF = 0; g_SF <= 1; g_SF++) {
3836 for (int value = -1; value <= 1; value++) {
3837 int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3838 int tmp = value;
3839 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3840 : "+r" (flags), "+r" (tmp) : : "cc");
3841 int OF = (flags >> 11) & 1;
3842 int SF = (flags >> 7) & 1;
3843 int ZF = (flags >> 6) & 1;
3844 int CF = flags & 1;
3845 tmp = (value == 0);
3846 if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3847 mismatch = true;
3848 }
3849 }}}}
3850 if (mismatch)
3851 write_log("Target CPU defines all flags on BSF instruction\n");
3852 return !mismatch;
3853 }
3854
3855
3856 /*************************************************************************
3857 * FPU stuff *
3858 *************************************************************************/
3859
3860
3861 static __inline__ void raw_fp_init(void)
3862 {
3863 int i;
3864
3865 for (i=0;i<N_FREGS;i++)
3866 live.spos[i]=-2;
3867 live.tos=-1; /* Stack is empty */
3868 }
3869
3870 static __inline__ void raw_fp_cleanup_drop(void)
3871 {
3872 #if 0
3873 /* using FINIT instead of popping all the entries.
3874 Seems to have side effects --- there is display corruption in
3875 Quake when this is used */
3876 if (live.tos>1) {
3877 emit_byte(0x9b);
3878 emit_byte(0xdb);
3879 emit_byte(0xe3);
3880 live.tos=-1;
3881 }
3882 #endif
3883 while (live.tos>=1) {
3884 emit_byte(0xde);
3885 emit_byte(0xd9);
3886 live.tos-=2;
3887 }
3888 while (live.tos>=0) {
3889 emit_byte(0xdd);
3890 emit_byte(0xd8);
3891 live.tos--;
3892 }
3893 raw_fp_init();
3894 }
3895
3896 static __inline__ void make_tos(int r)
3897 {
3898 int p,q;
3899
3900 if (live.spos[r]<0) { /* Register not yet on stack */
3901 emit_byte(0xd9);
3902 emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3903 live.tos++;
3904 live.spos[r]=live.tos;
3905 live.onstack[live.tos]=r;
3906 return;
3907 }
3908 /* Register is on stack */
3909 if (live.tos==live.spos[r])
3910 return;
3911 p=live.spos[r];
3912 q=live.onstack[live.tos];
3913
3914 emit_byte(0xd9);
3915 emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3916 live.onstack[live.tos]=r;
3917 live.spos[r]=live.tos;
3918 live.onstack[p]=q;
3919 live.spos[q]=p;
3920 }
3921
3922 static __inline__ void make_tos2(int r, int r2)
3923 {
3924 int q;
3925
3926 make_tos(r2); /* Put the reg that's supposed to end up in position2
3927 on top */
3928
3929 if (live.spos[r]<0) { /* Register not yet on stack */
3930 make_tos(r); /* This will extend the stack */
3931 return;
3932 }
3933 /* Register is on stack */
3934 emit_byte(0xd9);
3935 emit_byte(0xc9); /* Move r2 into position 2 */
3936
3937 q=live.onstack[live.tos-1];
3938 live.onstack[live.tos]=q;
3939 live.spos[q]=live.tos;
3940 live.onstack[live.tos-1]=r2;
3941 live.spos[r2]=live.tos-1;
3942
3943 make_tos(r); /* And r into 1 */
3944 }
3945
3946 static __inline__ int stackpos(int r)
3947 {
3948 if (live.spos[r]<0)
3949 abort();
3950 if (live.tos<live.spos[r]) {
3951 printf("Looking for spos for fnreg %d\n",r);
3952 abort();
3953 }
3954 return live.tos-live.spos[r];
3955 }
3956
3957 static __inline__ void usereg(int r)
3958 {
3959 if (live.spos[r]<0)
3960 make_tos(r);
3961 }
3962
3963 /* This is called with one FP value in a reg *above* tos, which it will
3964 pop off the stack if necessary */
3965 static __inline__ void tos_make(int r)
3966 {
3967 if (live.spos[r]<0) {
3968 live.tos++;
3969 live.spos[r]=live.tos;
3970 live.onstack[live.tos]=r;
3971 return;
3972 }
3973 emit_byte(0xdd);
3974 emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3975 and pop it*/
3976 }
3977
3978 /* FP helper functions */
3979 #if USE_NEW_RTASM
3980 #define DEFINE_OP(NAME, GEN) \
3981 static inline void raw_##NAME(uint32 m) \
3982 { \
3983 GEN(m, X86_NOREG, X86_NOREG, 1); \
3984 }
3985 DEFINE_OP(fstl, FSTLm);
3986 DEFINE_OP(fstpl, FSTPLm);
3987 DEFINE_OP(fldl, FLDLm);
3988 DEFINE_OP(fildl, FILDLm);
3989 DEFINE_OP(fistl, FISTLm);
3990 DEFINE_OP(flds, FLDSm);
3991 DEFINE_OP(fsts, FSTSm);
3992 DEFINE_OP(fstpt, FSTPTm);
3993 DEFINE_OP(fldt, FLDTm);
3994 #else
3995 #define DEFINE_OP(NAME, OP1, OP2) \
3996 static inline void raw_##NAME(uint32 m) \
3997 { \
3998 emit_byte(OP1); \
3999 emit_byte(OP2); \
4000 emit_long(m); \
4001 }
4002 DEFINE_OP(fstl, 0xdd, 0x15);
4003 DEFINE_OP(fstpl, 0xdd, 0x1d);
4004 DEFINE_OP(fldl, 0xdd, 0x05);
4005 DEFINE_OP(fildl, 0xdb, 0x05);
4006 DEFINE_OP(fistl, 0xdb, 0x15);
4007 DEFINE_OP(flds, 0xd9, 0x05);
4008 DEFINE_OP(fsts, 0xd9, 0x15);
4009 DEFINE_OP(fstpt, 0xdb, 0x3d);
4010 DEFINE_OP(fldt, 0xdb, 0x2d);
4011 #endif
4012 #undef DEFINE_OP
4013
4014 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4015 {
4016 make_tos(r);
4017 raw_fstl(m);
4018 }
4019 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4020
4021 LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4022 {
4023 make_tos(r);
4024 raw_fstpl(m);
4025 live.onstack[live.tos]=-1;
4026 live.tos--;
4027 live.spos[r]=-2;
4028 }
4029 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4030
4031 LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4032 {
4033 raw_fldl(m);
4034 tos_make(r);
4035 }
4036 LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4037
4038 LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4039 {
4040 raw_fildl(m);
4041 tos_make(r);
4042 }
4043 LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4044
4045 LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4046 {
4047 make_tos(r);
4048 raw_fistl(m);
4049 }
4050 LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4051
4052 LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4053 {
4054 raw_flds(m);
4055 tos_make(r);
4056 }
4057 LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4058
4059 LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4060 {
4061 make_tos(r);
4062 raw_fsts(m);
4063 }
4064 LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4065
4066 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4067 {
4068 int rs;
4069
4070 /* Stupid x87 can't write a long double to mem without popping the
4071 stack! */
4072 usereg(r);
4073 rs=stackpos(r);
4074 emit_byte(0xd9); /* Get a copy to the top of stack */
4075 emit_byte(0xc0+rs);
4076
4077 raw_fstpt(m); /* store and pop it */
4078 }
4079 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4080
4081 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4082 {
4083 int rs;
4084
4085 make_tos(r);
4086 raw_fstpt(m); /* store and pop it */
4087 live.onstack[live.tos]=-1;
4088 live.tos--;
4089 live.spos[r]=-2;
4090 }
4091 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4092
4093 LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4094 {
4095 raw_fldt(m);
4096 tos_make(r);
4097 }
4098 LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4099
4100 LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4101 {
4102 emit_byte(0xd9);
4103 emit_byte(0xeb);
4104 tos_make(r);
4105 }
4106 LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4107
4108 LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4109 {
4110 emit_byte(0xd9);
4111 emit_byte(0xec);
4112 tos_make(r);
4113 }
4114 LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4115
4116 LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4117 {
4118 emit_byte(0xd9);
4119 emit_byte(0xea);
4120 tos_make(r);
4121 }
4122 LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4123
4124 LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4125 {
4126 emit_byte(0xd9);
4127 emit_byte(0xed);
4128 tos_make(r);
4129 }
4130 LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4131
4132 LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4133 {
4134 emit_byte(0xd9);
4135 emit_byte(0xe8);
4136 tos_make(r);
4137 }
4138 LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4139
4140 LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4141 {
4142 emit_byte(0xd9);
4143 emit_byte(0xee);
4144 tos_make(r);
4145 }
4146 LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4147
4148 LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4149 {
4150 int ds;
4151
4152 usereg(s);
4153 ds=stackpos(s);
4154 if (ds==0 && live.spos[d]>=0) {
4155 /* source is on top of stack, and we already have the dest */
4156 int dd=stackpos(d);
4157 emit_byte(0xdd);
4158 emit_byte(0xd0+dd);
4159 }
4160 else {
4161 emit_byte(0xd9);
4162 emit_byte(0xc0+ds); /* duplicate source on tos */
4163 tos_make(d); /* store to destination, pop if necessary */
4164 }
4165 }
4166 LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4167
4168 LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4169 {
4170 emit_byte(0xd9);
4171 emit_byte(0xa8+index);
4172 emit_long(base);
4173 }
4174 LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4175
4176
4177 LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4178 {
4179 int ds;
4180
4181 if (d!=s) {
4182 usereg(s);
4183 ds=stackpos(s);
4184 emit_byte(0xd9);
4185 emit_byte(0xc0+ds); /* duplicate source */
4186 emit_byte(0xd9);
4187 emit_byte(0xfa); /* take square root */
4188 tos_make(d); /* store to destination */
4189 }
4190 else {
4191 make_tos(d);
4192 emit_byte(0xd9);
4193 emit_byte(0xfa); /* take square root */
4194 }
4195 }
4196 LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4197
4198 LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4199 {
4200 int ds;
4201
4202 if (d!=s) {
4203 usereg(s);
4204 ds=stackpos(s);
4205 emit_byte(0xd9);
4206 emit_byte(0xc0+ds); /* duplicate source */
4207 emit_byte(0xd9);
4208 emit_byte(0xe1); /* take fabs */
4209 tos_make(d); /* store to destination */
4210 }
4211 else {
4212 make_tos(d);
4213 emit_byte(0xd9);
4214 emit_byte(0xe1); /* take fabs */
4215 }
4216 }
4217 LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4218
4219 LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4220 {
4221 int ds;
4222
4223 if (d!=s) {
4224 usereg(s);
4225 ds=stackpos(s);
4226 emit_byte(0xd9);
4227 emit_byte(0xc0+ds); /* duplicate source */
4228 emit_byte(0xd9);
4229 emit_byte(0xfc); /* take frndint */
4230 tos_make(d); /* store to destination */
4231 }
4232 else {
4233 make_tos(d);
4234 emit_byte(0xd9);
4235 emit_byte(0xfc); /* take frndint */
4236 }
4237 }
4238 LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4239
4240 LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4241 {
4242 int ds;
4243
4244 if (d!=s) {
4245 usereg(s);
4246 ds=stackpos(s);
4247 emit_byte(0xd9);
4248 emit_byte(0xc0+ds); /* duplicate source */
4249 emit_byte(0xd9);
4250 emit_byte(0xff); /* take cos */
4251 tos_make(d); /* store to destination */
4252 }
4253 else {
4254 make_tos(d);
4255 emit_byte(0xd9);
4256 emit_byte(0xff); /* take cos */
4257 }
4258 }
4259 LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4260
4261 LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4262 {
4263 int ds;
4264
4265 if (d!=s) {
4266 usereg(s);
4267 ds=stackpos(s);
4268 emit_byte(0xd9);
4269 emit_byte(0xc0+ds); /* duplicate source */
4270 emit_byte(0xd9);
4271 emit_byte(0xfe); /* take sin */
4272 tos_make(d); /* store to destination */
4273 }
4274 else {
4275 make_tos(d);
4276 emit_byte(0xd9);
4277 emit_byte(0xfe); /* take sin */
4278 }
4279 }
4280 LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4281
4282 double one=1;
4283 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4284 {
4285 int ds;
4286
4287 usereg(s);
4288 ds=stackpos(s);
4289 emit_byte(0xd9);
4290 emit_byte(0xc0+ds); /* duplicate source */
4291
4292 emit_byte(0xd9);
4293 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4294 emit_byte(0xd9);
4295 emit_byte(0xfc); /* rndint */
4296 emit_byte(0xd9);
4297 emit_byte(0xc9); /* swap top two elements */
4298 emit_byte(0xd8);
4299 emit_byte(0xe1); /* subtract rounded from original */
4300 emit_byte(0xd9);
4301 emit_byte(0xf0); /* f2xm1 */
4302 emit_byte(0xdc);
4303 emit_byte(0x05);
4304 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4305 emit_byte(0xd9);
4306 emit_byte(0xfd); /* and scale it */
4307 emit_byte(0xdd);
4308 emit_byte(0xd9); /* take he rounded value off */
4309 tos_make(d); /* store to destination */
4310 }
4311 LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4312
4313 LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4314 {
4315 int ds;
4316
4317 usereg(s);
4318 ds=stackpos(s);
4319 emit_byte(0xd9);
4320 emit_byte(0xc0+ds); /* duplicate source */
4321 emit_byte(0xd9);
4322 emit_byte(0xea); /* fldl2e */
4323 emit_byte(0xde);
4324 emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4325
4326 emit_byte(0xd9);
4327 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4328 emit_byte(0xd9);
4329 emit_byte(0xfc); /* rndint */
4330 emit_byte(0xd9);
4331 emit_byte(0xc9); /* swap top two elements */
4332 emit_byte(0xd8);
4333 emit_byte(0xe1); /* subtract rounded from original */
4334 emit_byte(0xd9);
4335 emit_byte(0xf0); /* f2xm1 */
4336 emit_byte(0xdc);
4337 emit_byte(0x05);
4338 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4339 emit_byte(0xd9);
4340 emit_byte(0xfd); /* and scale it */
4341 emit_byte(0xdd);
4342 emit_byte(0xd9); /* take he rounded value off */
4343 tos_make(d); /* store to destination */
4344 }
4345 LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4346
4347 LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4348 {
4349 int ds;
4350
4351 usereg(s);
4352 ds=stackpos(s);
4353 emit_byte(0xd9);
4354 emit_byte(0xc0+ds); /* duplicate source */
4355 emit_byte(0xd9);
4356 emit_byte(0xe8); /* push '1' */
4357 emit_byte(0xd9);
4358 emit_byte(0xc9); /* swap top two */
4359 emit_byte(0xd9);
4360 emit_byte(0xf1); /* take 1*log2(x) */
4361 tos_make(d); /* store to destination */
4362 }
4363 LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4364
4365
4366 LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4367 {
4368 int ds;
4369
4370 if (d!=s) {
4371 usereg(s);
4372 ds=stackpos(s);
4373 emit_byte(0xd9);
4374 emit_byte(0xc0+ds); /* duplicate source */
4375 emit_byte(0xd9);
4376 emit_byte(0xe0); /* take fchs */
4377 tos_make(d); /* store to destination */
4378 }
4379 else {
4380 make_tos(d);
4381 emit_byte(0xd9);
4382 emit_byte(0xe0); /* take fchs */
4383 }
4384 }
4385 LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4386
4387 LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4388 {
4389 int ds;
4390
4391 usereg(s);
4392 usereg(d);
4393
4394 if (live.spos[s]==live.tos) {
4395 /* Source is on top of stack */
4396 ds=stackpos(d);
4397 emit_byte(0xdc);
4398 emit_byte(0xc0+ds); /* add source to dest*/
4399 }
4400 else {
4401 make_tos(d);
4402 ds=stackpos(s);
4403
4404 emit_byte(0xd8);
4405 emit_byte(0xc0+ds); /* add source to dest*/
4406 }
4407 }
4408 LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4409
4410 LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4411 {
4412 int ds;
4413
4414 usereg(s);
4415 usereg(d);
4416
4417 if (live.spos[s]==live.tos) {
4418 /* Source is on top of stack */
4419 ds=stackpos(d);
4420 emit_byte(0xdc);
4421 emit_byte(0xe8+ds); /* sub source from dest*/
4422 }
4423 else {
4424 make_tos(d);
4425 ds=stackpos(s);
4426
4427 emit_byte(0xd8);
4428 emit_byte(0xe0+ds); /* sub src from dest */
4429 }
4430 }
4431 LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4432
4433 LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4434 {
4435 int ds;
4436
4437 usereg(s);
4438 usereg(d);
4439
4440 make_tos(d);
4441 ds=stackpos(s);
4442
4443 emit_byte(0xdd);
4444 emit_byte(0xe0+ds); /* cmp dest with source*/
4445 }
4446 LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4447
4448 LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4449 {
4450 int ds;
4451
4452 usereg(s);
4453 usereg(d);
4454
4455 if (live.spos[s]==live.tos) {
4456 /* Source is on top of stack */
4457 ds=stackpos(d);
4458 emit_byte(0xdc);
4459 emit_byte(0xc8+ds); /* mul dest by source*/
4460 }
4461 else {
4462 make_tos(d);
4463 ds=stackpos(s);
4464
4465 emit_byte(0xd8);
4466 emit_byte(0xc8+ds); /* mul dest by source*/
4467 }
4468 }
4469 LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4470
4471 LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4472 {
4473 int ds;
4474
4475 usereg(s);
4476 usereg(d);
4477
4478 if (live.spos[s]==live.tos) {
4479 /* Source is on top of stack */
4480 ds=stackpos(d);
4481 emit_byte(0xdc);
4482 emit_byte(0xf8+ds); /* div dest by source */
4483 }
4484 else {
4485 make_tos(d);
4486 ds=stackpos(s);
4487
4488 emit_byte(0xd8);
4489 emit_byte(0xf0+ds); /* div dest by source*/
4490 }
4491 }
4492 LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4493
4494 LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4495 {
4496 int ds;
4497
4498 usereg(s);
4499 usereg(d);
4500
4501 make_tos2(d,s);
4502 ds=stackpos(s);
4503
4504 if (ds!=1) {
4505 printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4506 abort();
4507 }
4508 emit_byte(0xd9);
4509 emit_byte(0xf8); /* take rem from dest by source */
4510 }
4511 LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4512
4513 LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4514 {
4515 int ds;
4516
4517 usereg(s);
4518 usereg(d);
4519
4520 make_tos2(d,s);
4521 ds=stackpos(s);
4522
4523 if (ds!=1) {
4524 printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4525 abort();
4526 }
4527 emit_byte(0xd9);
4528 emit_byte(0xf5); /* take rem1 from dest by source */
4529 }
4530 LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4531
4532
4533 LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4534 {
4535 make_tos(r);
4536 emit_byte(0xd9); /* ftst */
4537 emit_byte(0xe4);
4538 }
4539 LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4540
4541 /* %eax register is clobbered if target processor doesn't support fucomi */
4542 #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4543 #define FFLAG_NREG EAX_INDEX
4544
4545 static __inline__ void raw_fflags_into_flags(int r)
4546 {
4547 int p;
4548
4549 usereg(r);
4550 p=stackpos(r);
4551
4552 emit_byte(0xd9);
4553 emit_byte(0xee); /* Push 0 */
4554 emit_byte(0xd9);
4555 emit_byte(0xc9+p); /* swap top two around */
4556 if (have_cmov) {
4557 // gb-- fucomi is for P6 cores only, not K6-2 then...
4558 emit_byte(0xdb);
4559 emit_byte(0xe9+p); /* fucomi them */
4560 }
4561 else {
4562 emit_byte(0xdd);
4563 emit_byte(0xe1+p); /* fucom them */
4564 emit_byte(0x9b);
4565 emit_byte(0xdf);
4566 emit_byte(0xe0); /* fstsw ax */
4567 raw_sahf(0); /* sahf */
4568 }
4569 emit_byte(0xdd);
4570 emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4571 }