ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
Revision: 1.28
Committed: 2005-04-21T09:08:57Z (19 years, 2 months ago) by gbeauche
Branch: MAIN
Changes since 1.27: +7 -5 lines
Log Message:
Recognize lahf_lm from Dual Core Opterons. This enables use of LAHF/SETO
instructions in long mode (64-bit). However, there seems to be another bug
in the JIT preventing it from being fully supported. m68k.h & codegen_x86.h
are easily fixed bug another patch is still needed.

File Contents

# Content
1 /*
2 * compiler/codegen_x86.cpp - IA-32 code generator
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2005 Christian Bauer
10 *
11 * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 /* This should eventually end up in machdep/, but for now, x86 is the
29 only target, and it's easier this way... */
30
31 #include "flags_x86.h"
32
33 /*************************************************************************
34 * Some basic information about the the target CPU *
35 *************************************************************************/
36
37 #define EAX_INDEX 0
38 #define ECX_INDEX 1
39 #define EDX_INDEX 2
40 #define EBX_INDEX 3
41 #define ESP_INDEX 4
42 #define EBP_INDEX 5
43 #define ESI_INDEX 6
44 #define EDI_INDEX 7
45 #if defined(__x86_64__)
46 #define R8_INDEX 8
47 #define R9_INDEX 9
48 #define R10_INDEX 10
49 #define R11_INDEX 11
50 #define R12_INDEX 12
51 #define R13_INDEX 13
52 #define R14_INDEX 14
53 #define R15_INDEX 15
54 #endif
55
56 /* The register in which subroutines return an integer return value */
57 #define REG_RESULT EAX_INDEX
58
59 /* The registers subroutines take their first and second argument in */
60 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
61 /* Handle the _fastcall parameters of ECX and EDX */
62 #define REG_PAR1 ECX_INDEX
63 #define REG_PAR2 EDX_INDEX
64 #elif defined(__x86_64__)
65 #define REG_PAR1 EDI_INDEX
66 #define REG_PAR2 ESI_INDEX
67 #else
68 #define REG_PAR1 EAX_INDEX
69 #define REG_PAR2 EDX_INDEX
70 #endif
71
72 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
73 #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
74 #define REG_PC_TMP EAX_INDEX
75 #else
76 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
77 #endif
78
79 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
80 -1 if any reg will do */
81 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
82 #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
83
84 uae_s8 always_used[]={4,-1};
85 #if defined(__x86_64__)
86 uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
87 uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
88 #else
89 uae_s8 can_byte[]={0,1,2,3,-1};
90 uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
91 #endif
92
93 #if USE_OPTIMIZED_CALLS
94 /* Make sure interpretive core does not use cpuopti */
95 uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
96 #error FIXME: code not ready
97 #else
98 /* cpuopti mutate instruction handlers to assume registers are saved
99 by the caller */
100 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
101 #endif
102
103 /* This *should* be the same as call_saved. But:
104 - We might not really know which registers are saved, and which aren't,
105 so we need to preserve some, but don't want to rely on everyone else
106 also saving those registers
107 - Special registers (such like the stack pointer) should not be "preserved"
108 by pushing, even though they are "saved" across function calls
109 */
110 #if defined(__x86_64__)
111 /* callee-saved registers as defined by Linux/x86_64 ABI: rbx, rbp, rsp, r12 - r15 */
112 /* preserve r11 because it's generally used to hold pointers to functions */
113 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
114 #else
115 static const uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
116 #endif
117
118 /* Whether classes of instructions do or don't clobber the native flags */
119 #define CLOBBER_MOV
120 #define CLOBBER_LEA
121 #define CLOBBER_CMOV
122 #define CLOBBER_POP
123 #define CLOBBER_PUSH
124 #define CLOBBER_SUB clobber_flags()
125 #define CLOBBER_SBB clobber_flags()
126 #define CLOBBER_CMP clobber_flags()
127 #define CLOBBER_ADD clobber_flags()
128 #define CLOBBER_ADC clobber_flags()
129 #define CLOBBER_AND clobber_flags()
130 #define CLOBBER_OR clobber_flags()
131 #define CLOBBER_XOR clobber_flags()
132
133 #define CLOBBER_ROL clobber_flags()
134 #define CLOBBER_ROR clobber_flags()
135 #define CLOBBER_SHLL clobber_flags()
136 #define CLOBBER_SHRL clobber_flags()
137 #define CLOBBER_SHRA clobber_flags()
138 #define CLOBBER_TEST clobber_flags()
139 #define CLOBBER_CL16
140 #define CLOBBER_CL8
141 #define CLOBBER_SE32
142 #define CLOBBER_SE16
143 #define CLOBBER_SE8
144 #define CLOBBER_ZE32
145 #define CLOBBER_ZE16
146 #define CLOBBER_ZE8
147 #define CLOBBER_SW16 clobber_flags()
148 #define CLOBBER_SW32
149 #define CLOBBER_SETCC
150 #define CLOBBER_MUL clobber_flags()
151 #define CLOBBER_BT clobber_flags()
152 #define CLOBBER_BSF clobber_flags()
153
154 /* FIXME: disabled until that's proofread. */
155 #if defined(__x86_64__)
156 #define USE_NEW_RTASM 1
157 #endif
158
159 #if USE_NEW_RTASM
160
161 #if defined(__x86_64__)
162 #define X86_TARGET_64BIT 1
163 #endif
164 #define X86_FLAT_REGISTERS 0
165 #define X86_OPTIMIZE_ALU 1
166 #define X86_OPTIMIZE_ROTSHI 1
167 #include "codegen_x86.h"
168
169 #define x86_emit_byte(B) emit_byte(B)
170 #define x86_emit_word(W) emit_word(W)
171 #define x86_emit_long(L) emit_long(L)
172 #define x86_emit_quad(Q) emit_quad(Q)
173 #define x86_get_target() get_target()
174 #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
175
176 static void jit_fail(const char *msg, const char *file, int line, const char *function)
177 {
178 fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
179 function, file, line, msg);
180 abort();
181 }
182
183 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
184 {
185 #if defined(__x86_64__)
186 PUSHQr(r);
187 #else
188 PUSHLr(r);
189 #endif
190 }
191 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
192
193 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
194 {
195 #if defined(__x86_64__)
196 POPQr(r);
197 #else
198 POPLr(r);
199 #endif
200 }
201 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
202
203 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
204 {
205 #if defined(__x86_64__)
206 POPQm(d, X86_NOREG, X86_NOREG, 1);
207 #else
208 POPLm(d, X86_NOREG, X86_NOREG, 1);
209 #endif
210 }
211 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
212
213 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
214 {
215 BTLir(i, r);
216 }
217 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
218
219 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
220 {
221 BTLrr(b, r);
222 }
223 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
224
225 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
226 {
227 BTCLir(i, r);
228 }
229 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
230
231 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
232 {
233 BTCLrr(b, r);
234 }
235 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
236
237 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
238 {
239 BTRLir(i, r);
240 }
241 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
242
243 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
244 {
245 BTRLrr(b, r);
246 }
247 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
248
249 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
250 {
251 BTSLir(i, r);
252 }
253 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
254
255 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
256 {
257 BTSLrr(b, r);
258 }
259 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
260
261 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
262 {
263 SUBWir(i, d);
264 }
265 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
266
267 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
268 {
269 MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
270 }
271 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
272
273 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
274 {
275 MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
276 }
277 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
278
279 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
280 {
281 MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
282 }
283 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
284
285 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
286 {
287 MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
288 }
289 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
290
291 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
292 {
293 ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
294 }
295 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
296
297 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
298 {
299 ROLBir(i, r);
300 }
301 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
302
303 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
304 {
305 ROLWir(i, r);
306 }
307 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
308
309 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
310 {
311 ROLLir(i, r);
312 }
313 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
314
315 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
316 {
317 ROLLrr(r, d);
318 }
319 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
320
321 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
322 {
323 ROLWrr(r, d);
324 }
325 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
326
327 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
328 {
329 ROLBrr(r, d);
330 }
331 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
332
333 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
334 {
335 SHLLrr(r, d);
336 }
337 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
338
339 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
340 {
341 SHLWrr(r, d);
342 }
343 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
344
345 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
346 {
347 SHLBrr(r, d);
348 }
349 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
350
351 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
352 {
353 RORBir(i, r);
354 }
355 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
356
357 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
358 {
359 RORWir(i, r);
360 }
361 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
362
363 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
364 {
365 ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
366 }
367 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
368
369 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
370 {
371 RORLir(i, r);
372 }
373 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
374
375 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
376 {
377 RORLrr(r, d);
378 }
379 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
380
381 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
382 {
383 RORWrr(r, d);
384 }
385 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
386
387 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
388 {
389 RORBrr(r, d);
390 }
391 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
392
393 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
394 {
395 SHRLrr(r, d);
396 }
397 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
398
399 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
400 {
401 SHRWrr(r, d);
402 }
403 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
404
405 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
406 {
407 SHRBrr(r, d);
408 }
409 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
410
411 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
412 {
413 SARLrr(r, d);
414 }
415 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
416
417 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
418 {
419 SARWrr(r, d);
420 }
421 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
422
423 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
424 {
425 SARBrr(r, d);
426 }
427 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
428
429 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
430 {
431 SHLLir(i, r);
432 }
433 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
434
435 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
436 {
437 SHLWir(i, r);
438 }
439 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
440
441 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
442 {
443 SHLBir(i, r);
444 }
445 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
446
447 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
448 {
449 SHRLir(i, r);
450 }
451 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
452
453 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
454 {
455 SHRWir(i, r);
456 }
457 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
458
459 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
460 {
461 SHRBir(i, r);
462 }
463 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
464
465 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
466 {
467 SARLir(i, r);
468 }
469 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
470
471 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
472 {
473 SARWir(i, r);
474 }
475 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
476
477 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
478 {
479 SARBir(i, r);
480 }
481 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
482
483 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
484 {
485 SAHF();
486 }
487 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
488
489 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
490 {
491 CPUID();
492 }
493 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
494
495 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
496 {
497 LAHF();
498 }
499 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
500
501 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
502 {
503 SETCCir(cc, d);
504 }
505 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
506
507 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
508 {
509 SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
510 }
511 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
512
513 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
514 {
515 if (have_cmov)
516 CMOVLrr(cc, s, d);
517 else { /* replacement using branch and mov */
518 #if defined(__x86_64__)
519 write_log("x86-64 implementations are bound to have CMOV!\n");
520 abort();
521 #endif
522 JCCSii(cc^1, 2);
523 MOVLrr(s, d);
524 }
525 }
526 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
527
528 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
529 {
530 BSFLrr(s, d);
531 }
532 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
533
534 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
535 {
536 MOVSLQrr(s, d);
537 }
538 LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
539
540 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
541 {
542 MOVSWLrr(s, d);
543 }
544 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
545
546 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
547 {
548 MOVSBLrr(s, d);
549 }
550 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
551
552 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
553 {
554 MOVZWLrr(s, d);
555 }
556 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
557
558 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
559 {
560 MOVZBLrr(s, d);
561 }
562 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
563
564 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
565 {
566 IMULLrr(s, d);
567 }
568 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
569
570 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
571 {
572 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
573 write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
574 abort();
575 }
576 IMULLr(s);
577 }
578 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
579
580 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
581 {
582 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
583 write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
584 abort();
585 }
586 MULLr(s);
587 }
588 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
589
590 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
591 {
592 abort(); /* %^$&%^$%#^ x86! */
593 }
594 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
595
596 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
597 {
598 MOVBrr(s, d);
599 }
600 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
601
602 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
603 {
604 MOVWrr(s, d);
605 }
606 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
607
608 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
609 {
610 MOVLmr(0, baser, index, factor, d);
611 }
612 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
613
614 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
615 {
616 MOVWmr(0, baser, index, factor, d);
617 }
618 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
619
620 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
621 {
622 MOVBmr(0, baser, index, factor, d);
623 }
624 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
625
626 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
627 {
628 MOVLrm(s, 0, baser, index, factor);
629 }
630 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
631
632 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
633 {
634 MOVWrm(s, 0, baser, index, factor);
635 }
636 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
637
638 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
639 {
640 MOVBrm(s, 0, baser, index, factor);
641 }
642 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
643
644 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
645 {
646 MOVLrm(s, base, baser, index, factor);
647 }
648 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
649
650 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
651 {
652 MOVWrm(s, base, baser, index, factor);
653 }
654 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
655
656 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
657 {
658 MOVBrm(s, base, baser, index, factor);
659 }
660 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
661
662 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
663 {
664 MOVLmr(base, baser, index, factor, d);
665 }
666 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
667
668 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
669 {
670 MOVWmr(base, baser, index, factor, d);
671 }
672 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
673
674 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
675 {
676 MOVBmr(base, baser, index, factor, d);
677 }
678 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
679
680 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
681 {
682 MOVLmr(base, X86_NOREG, index, factor, d);
683 }
684 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
685
686 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
687 {
688 if (have_cmov)
689 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
690 else { /* replacement using branch and mov */
691 #if defined(__x86_64__)
692 write_log("x86-64 implementations are bound to have CMOV!\n");
693 abort();
694 #endif
695 JCCSii(cond^1, 7);
696 MOVLmr(base, X86_NOREG, index, factor, d);
697 }
698 }
699 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
700
701 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
702 {
703 if (have_cmov)
704 CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
705 else { /* replacement using branch and mov */
706 #if defined(__x86_64__)
707 write_log("x86-64 implementations are bound to have CMOV!\n");
708 abort();
709 #endif
710 JCCSii(cond^1, 6);
711 MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
712 }
713 }
714 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
715
716 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
717 {
718 MOVLmr(offset, s, X86_NOREG, 1, d);
719 }
720 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
721
722 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
723 {
724 MOVWmr(offset, s, X86_NOREG, 1, d);
725 }
726 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
727
728 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
729 {
730 MOVBmr(offset, s, X86_NOREG, 1, d);
731 }
732 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
733
734 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
735 {
736 MOVLmr(offset, s, X86_NOREG, 1, d);
737 }
738 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
739
740 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
741 {
742 MOVWmr(offset, s, X86_NOREG, 1, d);
743 }
744 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
745
746 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
747 {
748 MOVBmr(offset, s, X86_NOREG, 1, d);
749 }
750 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
751
752 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
753 {
754 MOVLim(i, offset, d, X86_NOREG, 1);
755 }
756 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
757
758 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
759 {
760 MOVWim(i, offset, d, X86_NOREG, 1);
761 }
762 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
763
764 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
765 {
766 MOVBim(i, offset, d, X86_NOREG, 1);
767 }
768 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
769
770 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
771 {
772 MOVLrm(s, offset, d, X86_NOREG, 1);
773 }
774 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
775
776 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
777 {
778 MOVWrm(s, offset, d, X86_NOREG, 1);
779 }
780 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
781
782 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
783 {
784 MOVBrm(s, offset, d, X86_NOREG, 1);
785 }
786 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
787
788 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
789 {
790 LEALmr(offset, s, X86_NOREG, 1, d);
791 }
792 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
793
794 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
795 {
796 LEALmr(offset, s, index, factor, d);
797 }
798 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
799
800 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
801 {
802 LEALmr(0, s, index, factor, d);
803 }
804 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
805
806 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
807 {
808 MOVLrm(s, offset, d, X86_NOREG, 1);
809 }
810 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
811
812 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
813 {
814 MOVWrm(s, offset, d, X86_NOREG, 1);
815 }
816 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
817
818 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
819 {
820 MOVBrm(s, offset, d, X86_NOREG, 1);
821 }
822 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
823
824 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
825 {
826 BSWAPLr(r);
827 }
828 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
829
830 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
831 {
832 ROLWir(8, r);
833 }
834 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
835
836 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
837 {
838 MOVLrr(s, d);
839 }
840 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
841
842 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
843 {
844 MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
845 }
846 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
847
848 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
849 {
850 MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
851 }
852 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
853
854 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
855 {
856 MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
857 }
858 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
859
860 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
861 {
862 MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
863 }
864 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
865
866 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
867 {
868 MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
869 }
870 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
871
872 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
873 {
874 MOVLir(s, d);
875 }
876 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
877
878 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
879 {
880 MOVWir(s, d);
881 }
882 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
883
884 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
885 {
886 MOVBir(s, d);
887 }
888 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
889
890 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
891 {
892 ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
893 }
894 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
895
896 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
897 {
898 ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
899 }
900 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
901
902 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
903 {
904 ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
905 }
906 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
907
908 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
909 {
910 ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
911 }
912 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
913
914 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
915 {
916 TESTLir(i, d);
917 }
918 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
919
920 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
921 {
922 TESTLrr(s, d);
923 }
924 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
925
926 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
927 {
928 TESTWrr(s, d);
929 }
930 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
931
932 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
933 {
934 TESTBrr(s, d);
935 }
936 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
937
938 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
939 {
940 XORLir(i, d);
941 }
942 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
943
944 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
945 {
946 ANDLir(i, d);
947 }
948 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
949
950 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
951 {
952 ANDWir(i, d);
953 }
954 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
955
956 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
957 {
958 ANDLrr(s, d);
959 }
960 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
961
962 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
963 {
964 ANDWrr(s, d);
965 }
966 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
967
968 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
969 {
970 ANDBrr(s, d);
971 }
972 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
973
974 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
975 {
976 ORLir(i, d);
977 }
978 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
979
980 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
981 {
982 ORLrr(s, d);
983 }
984 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
985
986 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
987 {
988 ORWrr(s, d);
989 }
990 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
991
992 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
993 {
994 ORBrr(s, d);
995 }
996 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
997
998 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
999 {
1000 ADCLrr(s, d);
1001 }
1002 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1003
1004 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1005 {
1006 ADCWrr(s, d);
1007 }
1008 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1009
1010 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1011 {
1012 ADCBrr(s, d);
1013 }
1014 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1015
1016 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1017 {
1018 ADDLrr(s, d);
1019 }
1020 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1021
1022 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1023 {
1024 ADDWrr(s, d);
1025 }
1026 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1027
1028 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1029 {
1030 ADDBrr(s, d);
1031 }
1032 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1033
1034 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1035 {
1036 SUBLir(i, d);
1037 }
1038 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1039
1040 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1041 {
1042 SUBBir(i, d);
1043 }
1044 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1045
1046 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1047 {
1048 ADDLir(i, d);
1049 }
1050 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1051
1052 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1053 {
1054 ADDWir(i, d);
1055 }
1056 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1057
1058 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1059 {
1060 ADDBir(i, d);
1061 }
1062 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1063
1064 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1065 {
1066 SBBLrr(s, d);
1067 }
1068 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1069
1070 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1071 {
1072 SBBWrr(s, d);
1073 }
1074 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1075
1076 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1077 {
1078 SBBBrr(s, d);
1079 }
1080 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1081
1082 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1083 {
1084 SUBLrr(s, d);
1085 }
1086 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1087
1088 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1089 {
1090 SUBWrr(s, d);
1091 }
1092 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1093
1094 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1095 {
1096 SUBBrr(s, d);
1097 }
1098 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1099
1100 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1101 {
1102 CMPLrr(s, d);
1103 }
1104 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1105
1106 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1107 {
1108 CMPLir(i, r);
1109 }
1110 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1111
1112 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1113 {
1114 CMPWrr(s, d);
1115 }
1116 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1117
1118 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1119 {
1120 CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1121 }
1122 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1123
1124 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1125 {
1126 CMPBir(i, d);
1127 }
1128 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1129
1130 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1131 {
1132 CMPBrr(s, d);
1133 }
1134 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1135
1136 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1137 {
1138 CMPLmr(offset, X86_NOREG, index, factor, d);
1139 }
1140 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1141
1142 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1143 {
1144 XORLrr(s, d);
1145 }
1146 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1147
1148 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1149 {
1150 XORWrr(s, d);
1151 }
1152 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1153
1154 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1155 {
1156 XORBrr(s, d);
1157 }
1158 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1159
1160 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1161 {
1162 SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1163 }
1164 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1165
1166 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1167 {
1168 CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1169 }
1170 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1171
1172 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1173 {
1174 XCHGLrr(r2, r1);
1175 }
1176 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1177
1178 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1179 {
1180 PUSHF();
1181 }
1182 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1183
1184 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1185 {
1186 POPF();
1187 }
1188 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1189
1190 #else
1191
1192 const bool optimize_accum = true;
1193 const bool optimize_imm8 = true;
1194 const bool optimize_shift_once = true;
1195
1196 /*************************************************************************
1197 * Actual encoding of the instructions on the target CPU *
1198 *************************************************************************/
1199
1200 static __inline__ int isaccum(int r)
1201 {
1202 return (r == EAX_INDEX);
1203 }
1204
1205 static __inline__ int isbyte(uae_s32 x)
1206 {
1207 return (x>=-128 && x<=127);
1208 }
1209
1210 static __inline__ int isword(uae_s32 x)
1211 {
1212 return (x>=-32768 && x<=32767);
1213 }
1214
1215 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1216 {
1217 emit_byte(0x50+r);
1218 }
1219 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1220
1221 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1222 {
1223 emit_byte(0x58+r);
1224 }
1225 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1226
1227 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1228 {
1229 emit_byte(0x8f);
1230 emit_byte(0x05);
1231 emit_long(d);
1232 }
1233 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1234
1235 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1236 {
1237 emit_byte(0x0f);
1238 emit_byte(0xba);
1239 emit_byte(0xe0+r);
1240 emit_byte(i);
1241 }
1242 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1243
1244 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1245 {
1246 emit_byte(0x0f);
1247 emit_byte(0xa3);
1248 emit_byte(0xc0+8*b+r);
1249 }
1250 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1251
1252 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1253 {
1254 emit_byte(0x0f);
1255 emit_byte(0xba);
1256 emit_byte(0xf8+r);
1257 emit_byte(i);
1258 }
1259 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1260
1261 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1262 {
1263 emit_byte(0x0f);
1264 emit_byte(0xbb);
1265 emit_byte(0xc0+8*b+r);
1266 }
1267 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1268
1269
1270 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1271 {
1272 emit_byte(0x0f);
1273 emit_byte(0xba);
1274 emit_byte(0xf0+r);
1275 emit_byte(i);
1276 }
1277 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1278
1279 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1280 {
1281 emit_byte(0x0f);
1282 emit_byte(0xb3);
1283 emit_byte(0xc0+8*b+r);
1284 }
1285 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1286
1287 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1288 {
1289 emit_byte(0x0f);
1290 emit_byte(0xba);
1291 emit_byte(0xe8+r);
1292 emit_byte(i);
1293 }
1294 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1295
1296 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1297 {
1298 emit_byte(0x0f);
1299 emit_byte(0xab);
1300 emit_byte(0xc0+8*b+r);
1301 }
1302 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1303
1304 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1305 {
1306 emit_byte(0x66);
1307 if (isbyte(i)) {
1308 emit_byte(0x83);
1309 emit_byte(0xe8+d);
1310 emit_byte(i);
1311 }
1312 else {
1313 if (optimize_accum && isaccum(d))
1314 emit_byte(0x2d);
1315 else {
1316 emit_byte(0x81);
1317 emit_byte(0xe8+d);
1318 }
1319 emit_word(i);
1320 }
1321 }
1322 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1323
1324
1325 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1326 {
1327 emit_byte(0x8b);
1328 emit_byte(0x05+8*d);
1329 emit_long(s);
1330 }
1331 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1332
1333 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1334 {
1335 emit_byte(0xc7);
1336 emit_byte(0x05);
1337 emit_long(d);
1338 emit_long(s);
1339 }
1340 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1341
1342 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1343 {
1344 emit_byte(0x66);
1345 emit_byte(0xc7);
1346 emit_byte(0x05);
1347 emit_long(d);
1348 emit_word(s);
1349 }
1350 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1351
1352 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1353 {
1354 emit_byte(0xc6);
1355 emit_byte(0x05);
1356 emit_long(d);
1357 emit_byte(s);
1358 }
1359 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1360
1361 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1362 {
1363 if (optimize_shift_once && (i == 1)) {
1364 emit_byte(0xd0);
1365 emit_byte(0x05);
1366 emit_long(d);
1367 }
1368 else {
1369 emit_byte(0xc0);
1370 emit_byte(0x05);
1371 emit_long(d);
1372 emit_byte(i);
1373 }
1374 }
1375 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1376
1377 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1378 {
1379 if (optimize_shift_once && (i == 1)) {
1380 emit_byte(0xd0);
1381 emit_byte(0xc0+r);
1382 }
1383 else {
1384 emit_byte(0xc0);
1385 emit_byte(0xc0+r);
1386 emit_byte(i);
1387 }
1388 }
1389 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1390
1391 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1392 {
1393 emit_byte(0x66);
1394 emit_byte(0xc1);
1395 emit_byte(0xc0+r);
1396 emit_byte(i);
1397 }
1398 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1399
1400 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1401 {
1402 if (optimize_shift_once && (i == 1)) {
1403 emit_byte(0xd1);
1404 emit_byte(0xc0+r);
1405 }
1406 else {
1407 emit_byte(0xc1);
1408 emit_byte(0xc0+r);
1409 emit_byte(i);
1410 }
1411 }
1412 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1413
1414 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1415 {
1416 emit_byte(0xd3);
1417 emit_byte(0xc0+d);
1418 }
1419 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1420
1421 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1422 {
1423 emit_byte(0x66);
1424 emit_byte(0xd3);
1425 emit_byte(0xc0+d);
1426 }
1427 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1428
1429 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1430 {
1431 emit_byte(0xd2);
1432 emit_byte(0xc0+d);
1433 }
1434 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1435
1436 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1437 {
1438 emit_byte(0xd3);
1439 emit_byte(0xe0+d);
1440 }
1441 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1442
1443 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1444 {
1445 emit_byte(0x66);
1446 emit_byte(0xd3);
1447 emit_byte(0xe0+d);
1448 }
1449 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1450
1451 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1452 {
1453 emit_byte(0xd2);
1454 emit_byte(0xe0+d);
1455 }
1456 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1457
1458 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1459 {
1460 if (optimize_shift_once && (i == 1)) {
1461 emit_byte(0xd0);
1462 emit_byte(0xc8+r);
1463 }
1464 else {
1465 emit_byte(0xc0);
1466 emit_byte(0xc8+r);
1467 emit_byte(i);
1468 }
1469 }
1470 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1471
1472 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1473 {
1474 emit_byte(0x66);
1475 emit_byte(0xc1);
1476 emit_byte(0xc8+r);
1477 emit_byte(i);
1478 }
1479 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1480
1481 // gb-- used for making an fpcr value in compemu_fpp.cpp
1482 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1483 {
1484 emit_byte(0x0b);
1485 emit_byte(0x05+8*d);
1486 emit_long(s);
1487 }
1488 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1489
1490 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1491 {
1492 if (optimize_shift_once && (i == 1)) {
1493 emit_byte(0xd1);
1494 emit_byte(0xc8+r);
1495 }
1496 else {
1497 emit_byte(0xc1);
1498 emit_byte(0xc8+r);
1499 emit_byte(i);
1500 }
1501 }
1502 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1503
1504 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1505 {
1506 emit_byte(0xd3);
1507 emit_byte(0xc8+d);
1508 }
1509 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1510
1511 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1512 {
1513 emit_byte(0x66);
1514 emit_byte(0xd3);
1515 emit_byte(0xc8+d);
1516 }
1517 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1518
1519 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1520 {
1521 emit_byte(0xd2);
1522 emit_byte(0xc8+d);
1523 }
1524 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1525
1526 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1527 {
1528 emit_byte(0xd3);
1529 emit_byte(0xe8+d);
1530 }
1531 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1532
1533 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1534 {
1535 emit_byte(0x66);
1536 emit_byte(0xd3);
1537 emit_byte(0xe8+d);
1538 }
1539 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1540
1541 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1542 {
1543 emit_byte(0xd2);
1544 emit_byte(0xe8+d);
1545 }
1546 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1547
1548 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1549 {
1550 emit_byte(0xd3);
1551 emit_byte(0xf8+d);
1552 }
1553 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1554
1555 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1556 {
1557 emit_byte(0x66);
1558 emit_byte(0xd3);
1559 emit_byte(0xf8+d);
1560 }
1561 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1562
1563 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1564 {
1565 emit_byte(0xd2);
1566 emit_byte(0xf8+d);
1567 }
1568 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1569
1570 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1571 {
1572 if (optimize_shift_once && (i == 1)) {
1573 emit_byte(0xd1);
1574 emit_byte(0xe0+r);
1575 }
1576 else {
1577 emit_byte(0xc1);
1578 emit_byte(0xe0+r);
1579 emit_byte(i);
1580 }
1581 }
1582 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1583
1584 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1585 {
1586 emit_byte(0x66);
1587 emit_byte(0xc1);
1588 emit_byte(0xe0+r);
1589 emit_byte(i);
1590 }
1591 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1592
1593 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1594 {
1595 if (optimize_shift_once && (i == 1)) {
1596 emit_byte(0xd0);
1597 emit_byte(0xe0+r);
1598 }
1599 else {
1600 emit_byte(0xc0);
1601 emit_byte(0xe0+r);
1602 emit_byte(i);
1603 }
1604 }
1605 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1606
1607 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1608 {
1609 if (optimize_shift_once && (i == 1)) {
1610 emit_byte(0xd1);
1611 emit_byte(0xe8+r);
1612 }
1613 else {
1614 emit_byte(0xc1);
1615 emit_byte(0xe8+r);
1616 emit_byte(i);
1617 }
1618 }
1619 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1620
1621 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1622 {
1623 emit_byte(0x66);
1624 emit_byte(0xc1);
1625 emit_byte(0xe8+r);
1626 emit_byte(i);
1627 }
1628 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1629
1630 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1631 {
1632 if (optimize_shift_once && (i == 1)) {
1633 emit_byte(0xd0);
1634 emit_byte(0xe8+r);
1635 }
1636 else {
1637 emit_byte(0xc0);
1638 emit_byte(0xe8+r);
1639 emit_byte(i);
1640 }
1641 }
1642 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1643
1644 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1645 {
1646 if (optimize_shift_once && (i == 1)) {
1647 emit_byte(0xd1);
1648 emit_byte(0xf8+r);
1649 }
1650 else {
1651 emit_byte(0xc1);
1652 emit_byte(0xf8+r);
1653 emit_byte(i);
1654 }
1655 }
1656 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1657
1658 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1659 {
1660 emit_byte(0x66);
1661 emit_byte(0xc1);
1662 emit_byte(0xf8+r);
1663 emit_byte(i);
1664 }
1665 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1666
1667 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1668 {
1669 if (optimize_shift_once && (i == 1)) {
1670 emit_byte(0xd0);
1671 emit_byte(0xf8+r);
1672 }
1673 else {
1674 emit_byte(0xc0);
1675 emit_byte(0xf8+r);
1676 emit_byte(i);
1677 }
1678 }
1679 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1680
1681 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1682 {
1683 emit_byte(0x9e);
1684 }
1685 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1686
1687 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1688 {
1689 emit_byte(0x0f);
1690 emit_byte(0xa2);
1691 }
1692 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1693
1694 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1695 {
1696 emit_byte(0x9f);
1697 }
1698 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1699
1700 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1701 {
1702 emit_byte(0x0f);
1703 emit_byte(0x90+cc);
1704 emit_byte(0xc0+d);
1705 }
1706 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1707
1708 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1709 {
1710 emit_byte(0x0f);
1711 emit_byte(0x90+cc);
1712 emit_byte(0x05);
1713 emit_long(d);
1714 }
1715 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1716
1717 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1718 {
1719 if (have_cmov) {
1720 emit_byte(0x0f);
1721 emit_byte(0x40+cc);
1722 emit_byte(0xc0+8*d+s);
1723 }
1724 else { /* replacement using branch and mov */
1725 int uncc=(cc^1);
1726 emit_byte(0x70+uncc);
1727 emit_byte(2); /* skip next 2 bytes if not cc=true */
1728 emit_byte(0x89);
1729 emit_byte(0xc0+8*s+d);
1730 }
1731 }
1732 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1733
1734 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1735 {
1736 emit_byte(0x0f);
1737 emit_byte(0xbc);
1738 emit_byte(0xc0+8*d+s);
1739 }
1740 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1741
1742 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1743 {
1744 emit_byte(0x0f);
1745 emit_byte(0xbf);
1746 emit_byte(0xc0+8*d+s);
1747 }
1748 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1749
1750 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1751 {
1752 emit_byte(0x0f);
1753 emit_byte(0xbe);
1754 emit_byte(0xc0+8*d+s);
1755 }
1756 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1757
1758 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1759 {
1760 emit_byte(0x0f);
1761 emit_byte(0xb7);
1762 emit_byte(0xc0+8*d+s);
1763 }
1764 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1765
1766 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1767 {
1768 emit_byte(0x0f);
1769 emit_byte(0xb6);
1770 emit_byte(0xc0+8*d+s);
1771 }
1772 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1773
1774 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1775 {
1776 emit_byte(0x0f);
1777 emit_byte(0xaf);
1778 emit_byte(0xc0+8*d+s);
1779 }
1780 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1781
1782 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1783 {
1784 if (d!=MUL_NREG1 || s!=MUL_NREG2)
1785 abort();
1786 emit_byte(0xf7);
1787 emit_byte(0xea);
1788 }
1789 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1790
1791 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1792 {
1793 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1794 printf("Bad register in MUL: d=%d, s=%d\n",d,s);
1795 abort();
1796 }
1797 emit_byte(0xf7);
1798 emit_byte(0xe2);
1799 }
1800 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1801
1802 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1803 {
1804 abort(); /* %^$&%^$%#^ x86! */
1805 emit_byte(0x0f);
1806 emit_byte(0xaf);
1807 emit_byte(0xc0+8*d+s);
1808 }
1809 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1810
1811 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1812 {
1813 emit_byte(0x88);
1814 emit_byte(0xc0+8*s+d);
1815 }
1816 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1817
1818 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1819 {
1820 emit_byte(0x66);
1821 emit_byte(0x89);
1822 emit_byte(0xc0+8*s+d);
1823 }
1824 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1825
1826 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1827 {
1828 int isebp=(baser==5)?0x40:0;
1829 int fi;
1830
1831 switch(factor) {
1832 case 1: fi=0; break;
1833 case 2: fi=1; break;
1834 case 4: fi=2; break;
1835 case 8: fi=3; break;
1836 default: abort();
1837 }
1838
1839
1840 emit_byte(0x8b);
1841 emit_byte(0x04+8*d+isebp);
1842 emit_byte(baser+8*index+0x40*fi);
1843 if (isebp)
1844 emit_byte(0x00);
1845 }
1846 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1847
1848 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1849 {
1850 int fi;
1851 int isebp;
1852
1853 switch(factor) {
1854 case 1: fi=0; break;
1855 case 2: fi=1; break;
1856 case 4: fi=2; break;
1857 case 8: fi=3; break;
1858 default: abort();
1859 }
1860 isebp=(baser==5)?0x40:0;
1861
1862 emit_byte(0x66);
1863 emit_byte(0x8b);
1864 emit_byte(0x04+8*d+isebp);
1865 emit_byte(baser+8*index+0x40*fi);
1866 if (isebp)
1867 emit_byte(0x00);
1868 }
1869 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1870
1871 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1872 {
1873 int fi;
1874 int isebp;
1875
1876 switch(factor) {
1877 case 1: fi=0; break;
1878 case 2: fi=1; break;
1879 case 4: fi=2; break;
1880 case 8: fi=3; break;
1881 default: abort();
1882 }
1883 isebp=(baser==5)?0x40:0;
1884
1885 emit_byte(0x8a);
1886 emit_byte(0x04+8*d+isebp);
1887 emit_byte(baser+8*index+0x40*fi);
1888 if (isebp)
1889 emit_byte(0x00);
1890 }
1891 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1892
1893 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1894 {
1895 int fi;
1896 int isebp;
1897
1898 switch(factor) {
1899 case 1: fi=0; break;
1900 case 2: fi=1; break;
1901 case 4: fi=2; break;
1902 case 8: fi=3; break;
1903 default: abort();
1904 }
1905
1906
1907 isebp=(baser==5)?0x40:0;
1908
1909 emit_byte(0x89);
1910 emit_byte(0x04+8*s+isebp);
1911 emit_byte(baser+8*index+0x40*fi);
1912 if (isebp)
1913 emit_byte(0x00);
1914 }
1915 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
1916
1917 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1918 {
1919 int fi;
1920 int isebp;
1921
1922 switch(factor) {
1923 case 1: fi=0; break;
1924 case 2: fi=1; break;
1925 case 4: fi=2; break;
1926 case 8: fi=3; break;
1927 default: abort();
1928 }
1929 isebp=(baser==5)?0x40:0;
1930
1931 emit_byte(0x66);
1932 emit_byte(0x89);
1933 emit_byte(0x04+8*s+isebp);
1934 emit_byte(baser+8*index+0x40*fi);
1935 if (isebp)
1936 emit_byte(0x00);
1937 }
1938 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
1939
1940 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1941 {
1942 int fi;
1943 int isebp;
1944
1945 switch(factor) {
1946 case 1: fi=0; break;
1947 case 2: fi=1; break;
1948 case 4: fi=2; break;
1949 case 8: fi=3; break;
1950 default: abort();
1951 }
1952 isebp=(baser==5)?0x40:0;
1953
1954 emit_byte(0x88);
1955 emit_byte(0x04+8*s+isebp);
1956 emit_byte(baser+8*index+0x40*fi);
1957 if (isebp)
1958 emit_byte(0x00);
1959 }
1960 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
1961
1962 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1963 {
1964 int fi;
1965
1966 switch(factor) {
1967 case 1: fi=0; break;
1968 case 2: fi=1; break;
1969 case 4: fi=2; break;
1970 case 8: fi=3; break;
1971 default: abort();
1972 }
1973
1974 emit_byte(0x89);
1975 emit_byte(0x84+8*s);
1976 emit_byte(baser+8*index+0x40*fi);
1977 emit_long(base);
1978 }
1979 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
1980
1981 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
1982 {
1983 int fi;
1984
1985 switch(factor) {
1986 case 1: fi=0; break;
1987 case 2: fi=1; break;
1988 case 4: fi=2; break;
1989 case 8: fi=3; break;
1990 default: abort();
1991 }
1992
1993 emit_byte(0x66);
1994 emit_byte(0x89);
1995 emit_byte(0x84+8*s);
1996 emit_byte(baser+8*index+0x40*fi);
1997 emit_long(base);
1998 }
1999 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2000
2001 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2002 {
2003 int fi;
2004
2005 switch(factor) {
2006 case 1: fi=0; break;
2007 case 2: fi=1; break;
2008 case 4: fi=2; break;
2009 case 8: fi=3; break;
2010 default: abort();
2011 }
2012
2013 emit_byte(0x88);
2014 emit_byte(0x84+8*s);
2015 emit_byte(baser+8*index+0x40*fi);
2016 emit_long(base);
2017 }
2018 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2019
2020 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2021 {
2022 int fi;
2023
2024 switch(factor) {
2025 case 1: fi=0; break;
2026 case 2: fi=1; break;
2027 case 4: fi=2; break;
2028 case 8: fi=3; break;
2029 default: abort();
2030 }
2031
2032 emit_byte(0x8b);
2033 emit_byte(0x84+8*d);
2034 emit_byte(baser+8*index+0x40*fi);
2035 emit_long(base);
2036 }
2037 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2038
2039 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2040 {
2041 int fi;
2042
2043 switch(factor) {
2044 case 1: fi=0; break;
2045 case 2: fi=1; break;
2046 case 4: fi=2; break;
2047 case 8: fi=3; break;
2048 default: abort();
2049 }
2050
2051 emit_byte(0x66);
2052 emit_byte(0x8b);
2053 emit_byte(0x84+8*d);
2054 emit_byte(baser+8*index+0x40*fi);
2055 emit_long(base);
2056 }
2057 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2058
2059 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2060 {
2061 int fi;
2062
2063 switch(factor) {
2064 case 1: fi=0; break;
2065 case 2: fi=1; break;
2066 case 4: fi=2; break;
2067 case 8: fi=3; break;
2068 default: abort();
2069 }
2070
2071 emit_byte(0x8a);
2072 emit_byte(0x84+8*d);
2073 emit_byte(baser+8*index+0x40*fi);
2074 emit_long(base);
2075 }
2076 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2077
2078 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2079 {
2080 int fi;
2081 switch(factor) {
2082 case 1: fi=0; break;
2083 case 2: fi=1; break;
2084 case 4: fi=2; break;
2085 case 8: fi=3; break;
2086 default:
2087 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2088 abort();
2089 }
2090 emit_byte(0x8b);
2091 emit_byte(0x04+8*d);
2092 emit_byte(0x05+8*index+64*fi);
2093 emit_long(base);
2094 }
2095 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2096
2097 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2098 {
2099 int fi;
2100 switch(factor) {
2101 case 1: fi=0; break;
2102 case 2: fi=1; break;
2103 case 4: fi=2; break;
2104 case 8: fi=3; break;
2105 default:
2106 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2107 abort();
2108 }
2109 if (have_cmov) {
2110 emit_byte(0x0f);
2111 emit_byte(0x40+cond);
2112 emit_byte(0x04+8*d);
2113 emit_byte(0x05+8*index+64*fi);
2114 emit_long(base);
2115 }
2116 else { /* replacement using branch and mov */
2117 int uncc=(cond^1);
2118 emit_byte(0x70+uncc);
2119 emit_byte(7); /* skip next 7 bytes if not cc=true */
2120 emit_byte(0x8b);
2121 emit_byte(0x04+8*d);
2122 emit_byte(0x05+8*index+64*fi);
2123 emit_long(base);
2124 }
2125 }
2126 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2127
2128 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2129 {
2130 if (have_cmov) {
2131 emit_byte(0x0f);
2132 emit_byte(0x40+cond);
2133 emit_byte(0x05+8*d);
2134 emit_long(mem);
2135 }
2136 else { /* replacement using branch and mov */
2137 int uncc=(cond^1);
2138 emit_byte(0x70+uncc);
2139 emit_byte(6); /* skip next 6 bytes if not cc=true */
2140 emit_byte(0x8b);
2141 emit_byte(0x05+8*d);
2142 emit_long(mem);
2143 }
2144 }
2145 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2146
2147 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2148 {
2149 Dif(!isbyte(offset)) abort();
2150 emit_byte(0x8b);
2151 emit_byte(0x40+8*d+s);
2152 emit_byte(offset);
2153 }
2154 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2155
2156 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2157 {
2158 Dif(!isbyte(offset)) abort();
2159 emit_byte(0x66);
2160 emit_byte(0x8b);
2161 emit_byte(0x40+8*d+s);
2162 emit_byte(offset);
2163 }
2164 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2165
2166 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2167 {
2168 Dif(!isbyte(offset)) abort();
2169 emit_byte(0x8a);
2170 emit_byte(0x40+8*d+s);
2171 emit_byte(offset);
2172 }
2173 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2174
2175 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2176 {
2177 emit_byte(0x8b);
2178 emit_byte(0x80+8*d+s);
2179 emit_long(offset);
2180 }
2181 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2182
2183 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2184 {
2185 emit_byte(0x66);
2186 emit_byte(0x8b);
2187 emit_byte(0x80+8*d+s);
2188 emit_long(offset);
2189 }
2190 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2191
2192 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2193 {
2194 emit_byte(0x8a);
2195 emit_byte(0x80+8*d+s);
2196 emit_long(offset);
2197 }
2198 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2199
2200 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2201 {
2202 Dif(!isbyte(offset)) abort();
2203 emit_byte(0xc7);
2204 emit_byte(0x40+d);
2205 emit_byte(offset);
2206 emit_long(i);
2207 }
2208 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2209
2210 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2211 {
2212 Dif(!isbyte(offset)) abort();
2213 emit_byte(0x66);
2214 emit_byte(0xc7);
2215 emit_byte(0x40+d);
2216 emit_byte(offset);
2217 emit_word(i);
2218 }
2219 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2220
2221 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2222 {
2223 Dif(!isbyte(offset)) abort();
2224 emit_byte(0xc6);
2225 emit_byte(0x40+d);
2226 emit_byte(offset);
2227 emit_byte(i);
2228 }
2229 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2230
2231 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2232 {
2233 Dif(!isbyte(offset)) abort();
2234 emit_byte(0x89);
2235 emit_byte(0x40+8*s+d);
2236 emit_byte(offset);
2237 }
2238 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2239
2240 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2241 {
2242 Dif(!isbyte(offset)) abort();
2243 emit_byte(0x66);
2244 emit_byte(0x89);
2245 emit_byte(0x40+8*s+d);
2246 emit_byte(offset);
2247 }
2248 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2249
2250 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2251 {
2252 Dif(!isbyte(offset)) abort();
2253 emit_byte(0x88);
2254 emit_byte(0x40+8*s+d);
2255 emit_byte(offset);
2256 }
2257 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2258
2259 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2260 {
2261 if (optimize_imm8 && isbyte(offset)) {
2262 emit_byte(0x8d);
2263 emit_byte(0x40+8*d+s);
2264 emit_byte(offset);
2265 }
2266 else {
2267 emit_byte(0x8d);
2268 emit_byte(0x80+8*d+s);
2269 emit_long(offset);
2270 }
2271 }
2272 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2273
2274 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2275 {
2276 int fi;
2277
2278 switch(factor) {
2279 case 1: fi=0; break;
2280 case 2: fi=1; break;
2281 case 4: fi=2; break;
2282 case 8: fi=3; break;
2283 default: abort();
2284 }
2285
2286 if (optimize_imm8 && isbyte(offset)) {
2287 emit_byte(0x8d);
2288 emit_byte(0x44+8*d);
2289 emit_byte(0x40*fi+8*index+s);
2290 emit_byte(offset);
2291 }
2292 else {
2293 emit_byte(0x8d);
2294 emit_byte(0x84+8*d);
2295 emit_byte(0x40*fi+8*index+s);
2296 emit_long(offset);
2297 }
2298 }
2299 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2300
2301 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2302 {
2303 int isebp=(s==5)?0x40:0;
2304 int fi;
2305
2306 switch(factor) {
2307 case 1: fi=0; break;
2308 case 2: fi=1; break;
2309 case 4: fi=2; break;
2310 case 8: fi=3; break;
2311 default: abort();
2312 }
2313
2314 emit_byte(0x8d);
2315 emit_byte(0x04+8*d+isebp);
2316 emit_byte(0x40*fi+8*index+s);
2317 if (isebp)
2318 emit_byte(0);
2319 }
2320 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2321
2322 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2323 {
2324 if (optimize_imm8 && isbyte(offset)) {
2325 emit_byte(0x89);
2326 emit_byte(0x40+8*s+d);
2327 emit_byte(offset);
2328 }
2329 else {
2330 emit_byte(0x89);
2331 emit_byte(0x80+8*s+d);
2332 emit_long(offset);
2333 }
2334 }
2335 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2336
2337 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2338 {
2339 emit_byte(0x66);
2340 emit_byte(0x89);
2341 emit_byte(0x80+8*s+d);
2342 emit_long(offset);
2343 }
2344 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2345
2346 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2347 {
2348 if (optimize_imm8 && isbyte(offset)) {
2349 emit_byte(0x88);
2350 emit_byte(0x40+8*s+d);
2351 emit_byte(offset);
2352 }
2353 else {
2354 emit_byte(0x88);
2355 emit_byte(0x80+8*s+d);
2356 emit_long(offset);
2357 }
2358 }
2359 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2360
2361 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2362 {
2363 emit_byte(0x0f);
2364 emit_byte(0xc8+r);
2365 }
2366 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2367
2368 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2369 {
2370 emit_byte(0x66);
2371 emit_byte(0xc1);
2372 emit_byte(0xc0+r);
2373 emit_byte(0x08);
2374 }
2375 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2376
2377 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2378 {
2379 emit_byte(0x89);
2380 emit_byte(0xc0+8*s+d);
2381 }
2382 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2383
2384 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2385 {
2386 emit_byte(0x89);
2387 emit_byte(0x05+8*s);
2388 emit_long(d);
2389 }
2390 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2391
2392 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2393 {
2394 emit_byte(0x66);
2395 emit_byte(0x89);
2396 emit_byte(0x05+8*s);
2397 emit_long(d);
2398 }
2399 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2400
2401 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2402 {
2403 emit_byte(0x66);
2404 emit_byte(0x8b);
2405 emit_byte(0x05+8*d);
2406 emit_long(s);
2407 }
2408 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2409
2410 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2411 {
2412 emit_byte(0x88);
2413 emit_byte(0x05+8*s);
2414 emit_long(d);
2415 }
2416 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2417
2418 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2419 {
2420 emit_byte(0x8a);
2421 emit_byte(0x05+8*d);
2422 emit_long(s);
2423 }
2424 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2425
2426 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2427 {
2428 emit_byte(0xb8+d);
2429 emit_long(s);
2430 }
2431 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2432
2433 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2434 {
2435 emit_byte(0x66);
2436 emit_byte(0xb8+d);
2437 emit_word(s);
2438 }
2439 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2440
2441 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2442 {
2443 emit_byte(0xb0+d);
2444 emit_byte(s);
2445 }
2446 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2447
2448 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2449 {
2450 emit_byte(0x81);
2451 emit_byte(0x15);
2452 emit_long(d);
2453 emit_long(s);
2454 }
2455 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2456
2457 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2458 {
2459 if (optimize_imm8 && isbyte(s)) {
2460 emit_byte(0x83);
2461 emit_byte(0x05);
2462 emit_long(d);
2463 emit_byte(s);
2464 }
2465 else {
2466 emit_byte(0x81);
2467 emit_byte(0x05);
2468 emit_long(d);
2469 emit_long(s);
2470 }
2471 }
2472 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2473
2474 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2475 {
2476 emit_byte(0x66);
2477 emit_byte(0x81);
2478 emit_byte(0x05);
2479 emit_long(d);
2480 emit_word(s);
2481 }
2482 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2483
2484 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2485 {
2486 emit_byte(0x80);
2487 emit_byte(0x05);
2488 emit_long(d);
2489 emit_byte(s);
2490 }
2491 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2492
2493 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2494 {
2495 if (optimize_accum && isaccum(d))
2496 emit_byte(0xa9);
2497 else {
2498 emit_byte(0xf7);
2499 emit_byte(0xc0+d);
2500 }
2501 emit_long(i);
2502 }
2503 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2504
2505 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2506 {
2507 emit_byte(0x85);
2508 emit_byte(0xc0+8*s+d);
2509 }
2510 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2511
2512 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2513 {
2514 emit_byte(0x66);
2515 emit_byte(0x85);
2516 emit_byte(0xc0+8*s+d);
2517 }
2518 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2519
2520 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2521 {
2522 emit_byte(0x84);
2523 emit_byte(0xc0+8*s+d);
2524 }
2525 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2526
2527 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2528 {
2529 emit_byte(0x81);
2530 emit_byte(0xf0+d);
2531 emit_long(i);
2532 }
2533 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2534
2535 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2536 {
2537 if (optimize_imm8 && isbyte(i)) {
2538 emit_byte(0x83);
2539 emit_byte(0xe0+d);
2540 emit_byte(i);
2541 }
2542 else {
2543 if (optimize_accum && isaccum(d))
2544 emit_byte(0x25);
2545 else {
2546 emit_byte(0x81);
2547 emit_byte(0xe0+d);
2548 }
2549 emit_long(i);
2550 }
2551 }
2552 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2553
2554 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2555 {
2556 emit_byte(0x66);
2557 if (optimize_imm8 && isbyte(i)) {
2558 emit_byte(0x83);
2559 emit_byte(0xe0+d);
2560 emit_byte(i);
2561 }
2562 else {
2563 if (optimize_accum && isaccum(d))
2564 emit_byte(0x25);
2565 else {
2566 emit_byte(0x81);
2567 emit_byte(0xe0+d);
2568 }
2569 emit_word(i);
2570 }
2571 }
2572 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2573
2574 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2575 {
2576 emit_byte(0x21);
2577 emit_byte(0xc0+8*s+d);
2578 }
2579 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2580
2581 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2582 {
2583 emit_byte(0x66);
2584 emit_byte(0x21);
2585 emit_byte(0xc0+8*s+d);
2586 }
2587 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2588
2589 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2590 {
2591 emit_byte(0x20);
2592 emit_byte(0xc0+8*s+d);
2593 }
2594 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2595
2596 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2597 {
2598 if (optimize_imm8 && isbyte(i)) {
2599 emit_byte(0x83);
2600 emit_byte(0xc8+d);
2601 emit_byte(i);
2602 }
2603 else {
2604 if (optimize_accum && isaccum(d))
2605 emit_byte(0x0d);
2606 else {
2607 emit_byte(0x81);
2608 emit_byte(0xc8+d);
2609 }
2610 emit_long(i);
2611 }
2612 }
2613 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2614
2615 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2616 {
2617 emit_byte(0x09);
2618 emit_byte(0xc0+8*s+d);
2619 }
2620 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2621
2622 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2623 {
2624 emit_byte(0x66);
2625 emit_byte(0x09);
2626 emit_byte(0xc0+8*s+d);
2627 }
2628 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2629
2630 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2631 {
2632 emit_byte(0x08);
2633 emit_byte(0xc0+8*s+d);
2634 }
2635 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2636
2637 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2638 {
2639 emit_byte(0x11);
2640 emit_byte(0xc0+8*s+d);
2641 }
2642 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2643
2644 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2645 {
2646 emit_byte(0x66);
2647 emit_byte(0x11);
2648 emit_byte(0xc0+8*s+d);
2649 }
2650 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2651
2652 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2653 {
2654 emit_byte(0x10);
2655 emit_byte(0xc0+8*s+d);
2656 }
2657 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2658
2659 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2660 {
2661 emit_byte(0x01);
2662 emit_byte(0xc0+8*s+d);
2663 }
2664 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2665
2666 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2667 {
2668 emit_byte(0x66);
2669 emit_byte(0x01);
2670 emit_byte(0xc0+8*s+d);
2671 }
2672 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2673
2674 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2675 {
2676 emit_byte(0x00);
2677 emit_byte(0xc0+8*s+d);
2678 }
2679 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2680
2681 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2682 {
2683 if (isbyte(i)) {
2684 emit_byte(0x83);
2685 emit_byte(0xe8+d);
2686 emit_byte(i);
2687 }
2688 else {
2689 if (optimize_accum && isaccum(d))
2690 emit_byte(0x2d);
2691 else {
2692 emit_byte(0x81);
2693 emit_byte(0xe8+d);
2694 }
2695 emit_long(i);
2696 }
2697 }
2698 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2699
2700 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2701 {
2702 if (optimize_accum && isaccum(d))
2703 emit_byte(0x2c);
2704 else {
2705 emit_byte(0x80);
2706 emit_byte(0xe8+d);
2707 }
2708 emit_byte(i);
2709 }
2710 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2711
2712 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2713 {
2714 if (isbyte(i)) {
2715 emit_byte(0x83);
2716 emit_byte(0xc0+d);
2717 emit_byte(i);
2718 }
2719 else {
2720 if (optimize_accum && isaccum(d))
2721 emit_byte(0x05);
2722 else {
2723 emit_byte(0x81);
2724 emit_byte(0xc0+d);
2725 }
2726 emit_long(i);
2727 }
2728 }
2729 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2730
2731 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2732 {
2733 emit_byte(0x66);
2734 if (isbyte(i)) {
2735 emit_byte(0x83);
2736 emit_byte(0xc0+d);
2737 emit_byte(i);
2738 }
2739 else {
2740 if (optimize_accum && isaccum(d))
2741 emit_byte(0x05);
2742 else {
2743 emit_byte(0x81);
2744 emit_byte(0xc0+d);
2745 }
2746 emit_word(i);
2747 }
2748 }
2749 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2750
2751 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2752 {
2753 if (optimize_accum && isaccum(d))
2754 emit_byte(0x04);
2755 else {
2756 emit_byte(0x80);
2757 emit_byte(0xc0+d);
2758 }
2759 emit_byte(i);
2760 }
2761 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2762
2763 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2764 {
2765 emit_byte(0x19);
2766 emit_byte(0xc0+8*s+d);
2767 }
2768 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2769
2770 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2771 {
2772 emit_byte(0x66);
2773 emit_byte(0x19);
2774 emit_byte(0xc0+8*s+d);
2775 }
2776 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2777
2778 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2779 {
2780 emit_byte(0x18);
2781 emit_byte(0xc0+8*s+d);
2782 }
2783 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2784
2785 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2786 {
2787 emit_byte(0x29);
2788 emit_byte(0xc0+8*s+d);
2789 }
2790 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2791
2792 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2793 {
2794 emit_byte(0x66);
2795 emit_byte(0x29);
2796 emit_byte(0xc0+8*s+d);
2797 }
2798 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2799
2800 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2801 {
2802 emit_byte(0x28);
2803 emit_byte(0xc0+8*s+d);
2804 }
2805 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2806
2807 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2808 {
2809 emit_byte(0x39);
2810 emit_byte(0xc0+8*s+d);
2811 }
2812 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2813
2814 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2815 {
2816 if (optimize_imm8 && isbyte(i)) {
2817 emit_byte(0x83);
2818 emit_byte(0xf8+r);
2819 emit_byte(i);
2820 }
2821 else {
2822 if (optimize_accum && isaccum(r))
2823 emit_byte(0x3d);
2824 else {
2825 emit_byte(0x81);
2826 emit_byte(0xf8+r);
2827 }
2828 emit_long(i);
2829 }
2830 }
2831 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2832
2833 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2834 {
2835 emit_byte(0x66);
2836 emit_byte(0x39);
2837 emit_byte(0xc0+8*s+d);
2838 }
2839 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2840
2841 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2842 {
2843 emit_byte(0x80);
2844 emit_byte(0x3d);
2845 emit_long(d);
2846 emit_byte(s);
2847 }
2848 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2849
2850 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2851 {
2852 if (optimize_accum && isaccum(d))
2853 emit_byte(0x3c);
2854 else {
2855 emit_byte(0x80);
2856 emit_byte(0xf8+d);
2857 }
2858 emit_byte(i);
2859 }
2860 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2861
2862 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2863 {
2864 emit_byte(0x38);
2865 emit_byte(0xc0+8*s+d);
2866 }
2867 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2868
2869 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2870 {
2871 int fi;
2872
2873 switch(factor) {
2874 case 1: fi=0; break;
2875 case 2: fi=1; break;
2876 case 4: fi=2; break;
2877 case 8: fi=3; break;
2878 default: abort();
2879 }
2880 emit_byte(0x39);
2881 emit_byte(0x04+8*d);
2882 emit_byte(5+8*index+0x40*fi);
2883 emit_long(offset);
2884 }
2885 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2886
2887 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2888 {
2889 emit_byte(0x31);
2890 emit_byte(0xc0+8*s+d);
2891 }
2892 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
2893
2894 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2895 {
2896 emit_byte(0x66);
2897 emit_byte(0x31);
2898 emit_byte(0xc0+8*s+d);
2899 }
2900 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
2901
2902 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2903 {
2904 emit_byte(0x30);
2905 emit_byte(0xc0+8*s+d);
2906 }
2907 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
2908
2909 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2910 {
2911 if (optimize_imm8 && isbyte(s)) {
2912 emit_byte(0x83);
2913 emit_byte(0x2d);
2914 emit_long(d);
2915 emit_byte(s);
2916 }
2917 else {
2918 emit_byte(0x81);
2919 emit_byte(0x2d);
2920 emit_long(d);
2921 emit_long(s);
2922 }
2923 }
2924 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
2925
2926 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2927 {
2928 if (optimize_imm8 && isbyte(s)) {
2929 emit_byte(0x83);
2930 emit_byte(0x3d);
2931 emit_long(d);
2932 emit_byte(s);
2933 }
2934 else {
2935 emit_byte(0x81);
2936 emit_byte(0x3d);
2937 emit_long(d);
2938 emit_long(s);
2939 }
2940 }
2941 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2942
2943 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2944 {
2945 emit_byte(0x87);
2946 emit_byte(0xc0+8*r1+r2);
2947 }
2948 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2949
2950 /*************************************************************************
2951 * FIXME: mem access modes probably wrong *
2952 *************************************************************************/
2953
2954 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
2955 {
2956 emit_byte(0x9c);
2957 }
2958 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
2959
2960 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2961 {
2962 emit_byte(0x9d);
2963 }
2964 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2965
2966 #endif
2967
2968 /*************************************************************************
2969 * Unoptimizable stuff --- jump *
2970 *************************************************************************/
2971
2972 static __inline__ void raw_call_r(R4 r)
2973 {
2974 #if USE_NEW_RTASM
2975 CALLsr(r);
2976 #else
2977 emit_byte(0xff);
2978 emit_byte(0xd0+r);
2979 #endif
2980 }
2981
2982 static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
2983 {
2984 #if USE_NEW_RTASM
2985 CALLsm(base, X86_NOREG, r, m);
2986 #else
2987 int mu;
2988 switch(m) {
2989 case 1: mu=0; break;
2990 case 2: mu=1; break;
2991 case 4: mu=2; break;
2992 case 8: mu=3; break;
2993 default: abort();
2994 }
2995 emit_byte(0xff);
2996 emit_byte(0x14);
2997 emit_byte(0x05+8*r+0x40*mu);
2998 emit_long(base);
2999 #endif
3000 }
3001
3002 static __inline__ void raw_jmp_r(R4 r)
3003 {
3004 #if USE_NEW_RTASM
3005 JMPsr(r);
3006 #else
3007 emit_byte(0xff);
3008 emit_byte(0xe0+r);
3009 #endif
3010 }
3011
3012 static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3013 {
3014 #if USE_NEW_RTASM
3015 JMPsm(base, X86_NOREG, r, m);
3016 #else
3017 int mu;
3018 switch(m) {
3019 case 1: mu=0; break;
3020 case 2: mu=1; break;
3021 case 4: mu=2; break;
3022 case 8: mu=3; break;
3023 default: abort();
3024 }
3025 emit_byte(0xff);
3026 emit_byte(0x24);
3027 emit_byte(0x05+8*r+0x40*mu);
3028 emit_long(base);
3029 #endif
3030 }
3031
3032 static __inline__ void raw_jmp_m(uae_u32 base)
3033 {
3034 emit_byte(0xff);
3035 emit_byte(0x25);
3036 emit_long(base);
3037 }
3038
3039
3040 static __inline__ void raw_call(uae_u32 t)
3041 {
3042 #if USE_NEW_RTASM
3043 CALLm(t);
3044 #else
3045 emit_byte(0xe8);
3046 emit_long(t-(uae_u32)target-4);
3047 #endif
3048 }
3049
3050 static __inline__ void raw_jmp(uae_u32 t)
3051 {
3052 #if USE_NEW_RTASM
3053 JMPm(t);
3054 #else
3055 emit_byte(0xe9);
3056 emit_long(t-(uae_u32)target-4);
3057 #endif
3058 }
3059
3060 static __inline__ void raw_jl(uae_u32 t)
3061 {
3062 emit_byte(0x0f);
3063 emit_byte(0x8c);
3064 emit_long(t-(uintptr)target-4);
3065 }
3066
3067 static __inline__ void raw_jz(uae_u32 t)
3068 {
3069 emit_byte(0x0f);
3070 emit_byte(0x84);
3071 emit_long(t-(uintptr)target-4);
3072 }
3073
3074 static __inline__ void raw_jnz(uae_u32 t)
3075 {
3076 emit_byte(0x0f);
3077 emit_byte(0x85);
3078 emit_long(t-(uintptr)target-4);
3079 }
3080
3081 static __inline__ void raw_jnz_l_oponly(void)
3082 {
3083 emit_byte(0x0f);
3084 emit_byte(0x85);
3085 }
3086
3087 static __inline__ void raw_jcc_l_oponly(int cc)
3088 {
3089 emit_byte(0x0f);
3090 emit_byte(0x80+cc);
3091 }
3092
3093 static __inline__ void raw_jnz_b_oponly(void)
3094 {
3095 emit_byte(0x75);
3096 }
3097
3098 static __inline__ void raw_jz_b_oponly(void)
3099 {
3100 emit_byte(0x74);
3101 }
3102
3103 static __inline__ void raw_jcc_b_oponly(int cc)
3104 {
3105 emit_byte(0x70+cc);
3106 }
3107
3108 static __inline__ void raw_jmp_l_oponly(void)
3109 {
3110 emit_byte(0xe9);
3111 }
3112
3113 static __inline__ void raw_jmp_b_oponly(void)
3114 {
3115 emit_byte(0xeb);
3116 }
3117
3118 static __inline__ void raw_ret(void)
3119 {
3120 emit_byte(0xc3);
3121 }
3122
3123 static __inline__ void raw_nop(void)
3124 {
3125 emit_byte(0x90);
3126 }
3127
3128 static __inline__ void raw_emit_nop_filler(int nbytes)
3129 {
3130 /* Source: GNU Binutils 2.12.90.0.15 */
3131 /* Various efficient no-op patterns for aligning code labels.
3132 Note: Don't try to assemble the instructions in the comments.
3133 0L and 0w are not legal. */
3134 static const uae_u8 f32_1[] =
3135 {0x90}; /* nop */
3136 static const uae_u8 f32_2[] =
3137 {0x89,0xf6}; /* movl %esi,%esi */
3138 static const uae_u8 f32_3[] =
3139 {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3140 static const uae_u8 f32_4[] =
3141 {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3142 static const uae_u8 f32_5[] =
3143 {0x90, /* nop */
3144 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3145 static const uae_u8 f32_6[] =
3146 {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3147 static const uae_u8 f32_7[] =
3148 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3149 static const uae_u8 f32_8[] =
3150 {0x90, /* nop */
3151 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3152 static const uae_u8 f32_9[] =
3153 {0x89,0xf6, /* movl %esi,%esi */
3154 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3155 static const uae_u8 f32_10[] =
3156 {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3157 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3158 static const uae_u8 f32_11[] =
3159 {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3160 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3161 static const uae_u8 f32_12[] =
3162 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3163 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3164 static const uae_u8 f32_13[] =
3165 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3166 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3167 static const uae_u8 f32_14[] =
3168 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3169 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3170 static const uae_u8 f32_15[] =
3171 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3172 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3173 static const uae_u8 f32_16[] =
3174 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3175 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3176 static const uae_u8 *const f32_patt[] = {
3177 f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3178 f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3179 };
3180 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3181
3182 #if defined(__x86_64__)
3183 /* The recommended way to pad 64bit code is to use NOPs preceded by
3184 maximally four 0x66 prefixes. Balance the size of nops. */
3185 if (nbytes == 0)
3186 return;
3187
3188 int i;
3189 int nnops = (nbytes + 3) / 4;
3190 int len = nbytes / nnops;
3191 int remains = nbytes - nnops * len;
3192
3193 for (i = 0; i < remains; i++) {
3194 emit_block(prefixes, len);
3195 raw_nop();
3196 }
3197 for (; i < nnops; i++) {
3198 emit_block(prefixes, len - 1);
3199 raw_nop();
3200 }
3201 #else
3202 int nloops = nbytes / 16;
3203 while (nloops-- > 0)
3204 emit_block(f32_16, sizeof(f32_16));
3205
3206 nbytes %= 16;
3207 if (nbytes)
3208 emit_block(f32_patt[nbytes - 1], nbytes);
3209 #endif
3210 }
3211
3212
3213 /*************************************************************************
3214 * Flag handling, to and fro UAE flag register *
3215 *************************************************************************/
3216
3217 #ifdef SAHF_SETO_PROFITABLE
3218
3219 #define FLAG_NREG1 0 /* Set to -1 if any register will do */
3220
3221 static __inline__ void raw_flags_to_reg(int r)
3222 {
3223 raw_lahf(0); /* Most flags in AH */
3224 //raw_setcc(r,0); /* V flag in AL */
3225 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3226
3227 #if 1 /* Let's avoid those nasty partial register stalls */
3228 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3229 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
3230 //live.state[FLAGTMP].status=CLEAN;
3231 live.state[FLAGTMP].status=INMEM;
3232 live.state[FLAGTMP].realreg=-1;
3233 /* We just "evicted" FLAGTMP. */
3234 if (live.nat[r].nholds!=1) {
3235 /* Huh? */
3236 abort();
3237 }
3238 live.nat[r].nholds=0;
3239 #endif
3240 }
3241
3242 #define FLAG_NREG2 0 /* Set to -1 if any register will do */
3243 static __inline__ void raw_reg_to_flags(int r)
3244 {
3245 raw_cmp_b_ri(r,-127); /* set V */
3246 raw_sahf(0);
3247 }
3248
3249 #define FLAG_NREG3 0 /* Set to -1 if any register will do */
3250 static __inline__ void raw_flags_set_zero(int s, int tmp)
3251 {
3252 raw_mov_l_rr(tmp,s);
3253 raw_lahf(s); /* flags into ah */
3254 raw_and_l_ri(s,0xffffbfff);
3255 raw_and_l_ri(tmp,0x00004000);
3256 raw_xor_l_ri(tmp,0x00004000);
3257 raw_or_l(s,tmp);
3258 raw_sahf(s);
3259 }
3260
3261 #else
3262
3263 #define FLAG_NREG1 -1 /* Set to -1 if any register will do */
3264 static __inline__ void raw_flags_to_reg(int r)
3265 {
3266 raw_pushfl();
3267 raw_pop_l_r(r);
3268 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3269 // live.state[FLAGTMP].status=CLEAN;
3270 live.state[FLAGTMP].status=INMEM;
3271 live.state[FLAGTMP].realreg=-1;
3272 /* We just "evicted" FLAGTMP. */
3273 if (live.nat[r].nholds!=1) {
3274 /* Huh? */
3275 abort();
3276 }
3277 live.nat[r].nholds=0;
3278 }
3279
3280 #define FLAG_NREG2 -1 /* Set to -1 if any register will do */
3281 static __inline__ void raw_reg_to_flags(int r)
3282 {
3283 raw_push_l_r(r);
3284 raw_popfl();
3285 }
3286
3287 #define FLAG_NREG3 -1 /* Set to -1 if any register will do */
3288 static __inline__ void raw_flags_set_zero(int s, int tmp)
3289 {
3290 raw_mov_l_rr(tmp,s);
3291 raw_pushfl();
3292 raw_pop_l_r(s);
3293 raw_and_l_ri(s,0xffffffbf);
3294 raw_and_l_ri(tmp,0x00000040);
3295 raw_xor_l_ri(tmp,0x00000040);
3296 raw_or_l(s,tmp);
3297 raw_push_l_r(s);
3298 raw_popfl();
3299 }
3300 #endif
3301
3302 /* Apparently, there are enough instructions between flag store and
3303 flag reload to avoid the partial memory stall */
3304 static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
3305 {
3306 #if 1
3307 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3308 #else
3309 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3310 raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3311 #endif
3312 }
3313
3314 /* FLAGX is byte sized, and we *do* write it at that size */
3315 static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
3316 {
3317 if (live.nat[target].canbyte)
3318 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3319 else if (live.nat[target].canword)
3320 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3321 else
3322 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3323 }
3324
3325 static __inline__ void raw_inc_sp(int off)
3326 {
3327 raw_add_l_ri(ESP_INDEX,off);
3328 }
3329
3330 /*************************************************************************
3331 * Handling mistaken direct memory access *
3332 *************************************************************************/
3333
3334 // gb-- I don't need that part for JIT Basilisk II
3335 #if defined(NATMEM_OFFSET) && 0
3336 #include <asm/sigcontext.h>
3337 #include <signal.h>
3338
3339 #define SIG_READ 1
3340 #define SIG_WRITE 2
3341
3342 static int in_handler=0;
3343 static uae_u8 veccode[256];
3344
3345 static void vec(int x, struct sigcontext sc)
3346 {
3347 uae_u8* i=(uae_u8*)sc.eip;
3348 uae_u32 addr=sc.cr2;
3349 int r=-1;
3350 int size=4;
3351 int dir=-1;
3352 int len=0;
3353 int j;
3354
3355 write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
3356 if (!canbang)
3357 write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
3358 if (in_handler)
3359 write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
3360
3361 if (canbang && i>=compiled_code && i<=current_compile_p) {
3362 if (*i==0x66) {
3363 i++;
3364 size=2;
3365 len++;
3366 }
3367
3368 switch(i[0]) {
3369 case 0x8a:
3370 if ((i[1]&0xc0)==0x80) {
3371 r=(i[1]>>3)&7;
3372 dir=SIG_READ;
3373 size=1;
3374 len+=6;
3375 break;
3376 }
3377 break;
3378 case 0x88:
3379 if ((i[1]&0xc0)==0x80) {
3380 r=(i[1]>>3)&7;
3381 dir=SIG_WRITE;
3382 size=1;
3383 len+=6;
3384 break;
3385 }
3386 break;
3387 case 0x8b:
3388 if ((i[1]&0xc0)==0x80) {
3389 r=(i[1]>>3)&7;
3390 dir=SIG_READ;
3391 len+=6;
3392 break;
3393 }
3394 if ((i[1]&0xc0)==0x40) {
3395 r=(i[1]>>3)&7;
3396 dir=SIG_READ;
3397 len+=3;
3398 break;
3399 }
3400 break;
3401 case 0x89:
3402 if ((i[1]&0xc0)==0x80) {
3403 r=(i[1]>>3)&7;
3404 dir=SIG_WRITE;
3405 len+=6;
3406 break;
3407 }
3408 if ((i[1]&0xc0)==0x40) {
3409 r=(i[1]>>3)&7;
3410 dir=SIG_WRITE;
3411 len+=3;
3412 break;
3413 }
3414 break;
3415 }
3416 }
3417
3418 if (r!=-1) {
3419 void* pr=NULL;
3420 write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
3421
3422 switch(r) {
3423 case 0: pr=&(sc.eax); break;
3424 case 1: pr=&(sc.ecx); break;
3425 case 2: pr=&(sc.edx); break;
3426 case 3: pr=&(sc.ebx); break;
3427 case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
3428 case 5: pr=(size>1)?
3429 (void*)(&(sc.ebp)):
3430 (void*)(((uae_u8*)&(sc.ecx))+1); break;
3431 case 6: pr=(size>1)?
3432 (void*)(&(sc.esi)):
3433 (void*)(((uae_u8*)&(sc.edx))+1); break;
3434 case 7: pr=(size>1)?
3435 (void*)(&(sc.edi)):
3436 (void*)(((uae_u8*)&(sc.ebx))+1); break;
3437 default: abort();
3438 }
3439 if (pr) {
3440 blockinfo* bi;
3441
3442 if (currprefs.comp_oldsegv) {
3443 addr-=NATMEM_OFFSET;
3444
3445 if ((addr>=0x10000000 && addr<0x40000000) ||
3446 (addr>=0x50000000)) {
3447 write_log("Suspicious address in %x SEGV handler.\n",addr);
3448 }
3449 if (dir==SIG_READ) {
3450 switch(size) {
3451 case 1: *((uae_u8*)pr)=get_byte(addr); break;
3452 case 2: *((uae_u16*)pr)=get_word(addr); break;
3453 case 4: *((uae_u32*)pr)=get_long(addr); break;
3454 default: abort();
3455 }
3456 }
3457 else { /* write */
3458 switch(size) {
3459 case 1: put_byte(addr,*((uae_u8*)pr)); break;
3460 case 2: put_word(addr,*((uae_u16*)pr)); break;
3461 case 4: put_long(addr,*((uae_u32*)pr)); break;
3462 default: abort();
3463 }
3464 }
3465 write_log("Handled one access!\n");
3466 fflush(stdout);
3467 segvcount++;
3468 sc.eip+=len;
3469 }
3470 else {
3471 void* tmp=target;
3472 int i;
3473 uae_u8 vecbuf[5];
3474
3475 addr-=NATMEM_OFFSET;
3476
3477 if ((addr>=0x10000000 && addr<0x40000000) ||
3478 (addr>=0x50000000)) {
3479 write_log("Suspicious address in %x SEGV handler.\n",addr);
3480 }
3481
3482 target=(uae_u8*)sc.eip;
3483 for (i=0;i<5;i++)
3484 vecbuf[i]=target[i];
3485 emit_byte(0xe9);
3486 emit_long((uintptr)veccode-(uintptr)target-4);
3487 write_log("Create jump to %p\n",veccode);
3488
3489 write_log("Handled one access!\n");
3490 fflush(stdout);
3491 segvcount++;
3492
3493 target=veccode;
3494
3495 if (dir==SIG_READ) {
3496 switch(size) {
3497 case 1: raw_mov_b_ri(r,get_byte(addr)); break;
3498 case 2: raw_mov_w_ri(r,get_byte(addr)); break;
3499 case 4: raw_mov_l_ri(r,get_byte(addr)); break;
3500 default: abort();
3501 }
3502 }
3503 else { /* write */
3504 switch(size) {
3505 case 1: put_byte(addr,*((uae_u8*)pr)); break;
3506 case 2: put_word(addr,*((uae_u16*)pr)); break;
3507 case 4: put_long(addr,*((uae_u32*)pr)); break;
3508 default: abort();
3509 }
3510 }
3511 for (i=0;i<5;i++)
3512 raw_mov_b_mi(sc.eip+i,vecbuf[i]);
3513 raw_mov_l_mi((uintptr)&in_handler,0);
3514 emit_byte(0xe9);
3515 emit_long(sc.eip+len-(uintptr)target-4);
3516 in_handler=1;
3517 target=tmp;
3518 }
3519 bi=active;
3520 while (bi) {
3521 if (bi->handler &&
3522 (uae_u8*)bi->direct_handler<=i &&
3523 (uae_u8*)bi->nexthandler>i) {
3524 write_log("deleted trigger (%p<%p<%p) %p\n",
3525 bi->handler,
3526 i,
3527 bi->nexthandler,
3528 bi->pc_p);
3529 invalidate_block(bi);
3530 raise_in_cl_list(bi);
3531 set_special(0);
3532 return;
3533 }
3534 bi=bi->next;
3535 }
3536 /* Not found in the active list. Might be a rom routine that
3537 is in the dormant list */
3538 bi=dormant;
3539 while (bi) {
3540 if (bi->handler &&
3541 (uae_u8*)bi->direct_handler<=i &&
3542 (uae_u8*)bi->nexthandler>i) {
3543 write_log("deleted trigger (%p<%p<%p) %p\n",
3544 bi->handler,
3545 i,
3546 bi->nexthandler,
3547 bi->pc_p);
3548 invalidate_block(bi);
3549 raise_in_cl_list(bi);
3550 set_special(0);
3551 return;
3552 }
3553 bi=bi->next;
3554 }
3555 write_log("Huh? Could not find trigger!\n");
3556 return;
3557 }
3558 }
3559 write_log("Can't handle access!\n");
3560 for (j=0;j<10;j++) {
3561 write_log("instruction byte %2d is %02x\n",j,i[j]);
3562 }
3563 write_log("Please send the above info (starting at \"fault address\") to\n"
3564 "bmeyer@csse.monash.edu.au\n"
3565 "This shouldn't happen ;-)\n");
3566 fflush(stdout);
3567 signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
3568 }
3569 #endif
3570
3571
3572 /*************************************************************************
3573 * Checking for CPU features *
3574 *************************************************************************/
3575
3576 struct cpuinfo_x86 {
3577 uae_u8 x86; // CPU family
3578 uae_u8 x86_vendor; // CPU vendor
3579 uae_u8 x86_processor; // CPU canonical processor type
3580 uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3581 uae_u32 x86_hwcap;
3582 uae_u8 x86_model;
3583 uae_u8 x86_mask;
3584 int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3585 char x86_vendor_id[16];
3586 };
3587 struct cpuinfo_x86 cpuinfo;
3588
3589 enum {
3590 X86_VENDOR_INTEL = 0,
3591 X86_VENDOR_CYRIX = 1,
3592 X86_VENDOR_AMD = 2,
3593 X86_VENDOR_UMC = 3,
3594 X86_VENDOR_NEXGEN = 4,
3595 X86_VENDOR_CENTAUR = 5,
3596 X86_VENDOR_RISE = 6,
3597 X86_VENDOR_TRANSMETA = 7,
3598 X86_VENDOR_NSC = 8,
3599 X86_VENDOR_UNKNOWN = 0xff
3600 };
3601
3602 enum {
3603 X86_PROCESSOR_I386, /* 80386 */
3604 X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3605 X86_PROCESSOR_PENTIUM,
3606 X86_PROCESSOR_PENTIUMPRO,
3607 X86_PROCESSOR_K6,
3608 X86_PROCESSOR_ATHLON,
3609 X86_PROCESSOR_PENTIUM4,
3610 X86_PROCESSOR_X86_64,
3611 X86_PROCESSOR_max
3612 };
3613
3614 static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3615 "80386",
3616 "80486",
3617 "Pentium",
3618 "PentiumPro",
3619 "K6",
3620 "Athlon",
3621 "Pentium4",
3622 "x86-64"
3623 };
3624
3625 static struct ptt {
3626 const int align_loop;
3627 const int align_loop_max_skip;
3628 const int align_jump;
3629 const int align_jump_max_skip;
3630 const int align_func;
3631 }
3632 x86_alignments[X86_PROCESSOR_max] = {
3633 { 4, 3, 4, 3, 4 },
3634 { 16, 15, 16, 15, 16 },
3635 { 16, 7, 16, 7, 16 },
3636 { 16, 15, 16, 7, 16 },
3637 { 32, 7, 32, 7, 32 },
3638 { 16, 7, 16, 7, 16 },
3639 { 0, 0, 0, 0, 0 },
3640 { 16, 7, 16, 7, 16 }
3641 };
3642
3643 static void
3644 x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3645 {
3646 char *v = c->x86_vendor_id;
3647
3648 if (!strcmp(v, "GenuineIntel"))
3649 c->x86_vendor = X86_VENDOR_INTEL;
3650 else if (!strcmp(v, "AuthenticAMD"))
3651 c->x86_vendor = X86_VENDOR_AMD;
3652 else if (!strcmp(v, "CyrixInstead"))
3653 c->x86_vendor = X86_VENDOR_CYRIX;
3654 else if (!strcmp(v, "Geode by NSC"))
3655 c->x86_vendor = X86_VENDOR_NSC;
3656 else if (!strcmp(v, "UMC UMC UMC "))
3657 c->x86_vendor = X86_VENDOR_UMC;
3658 else if (!strcmp(v, "CentaurHauls"))
3659 c->x86_vendor = X86_VENDOR_CENTAUR;
3660 else if (!strcmp(v, "NexGenDriven"))
3661 c->x86_vendor = X86_VENDOR_NEXGEN;
3662 else if (!strcmp(v, "RiseRiseRise"))
3663 c->x86_vendor = X86_VENDOR_RISE;
3664 else if (!strcmp(v, "GenuineTMx86") ||
3665 !strcmp(v, "TransmetaCPU"))
3666 c->x86_vendor = X86_VENDOR_TRANSMETA;
3667 else
3668 c->x86_vendor = X86_VENDOR_UNKNOWN;
3669 }
3670
3671 static void
3672 cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3673 {
3674 const int CPUID_SPACE = 4096;
3675 uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
3676 if (cpuid_space == VM_MAP_FAILED)
3677 abort();
3678 vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
3679
3680 static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
3681 uae_u8* tmp=get_target();
3682
3683 s_op = op;
3684 set_target(cpuid_space);
3685 raw_push_l_r(0); /* eax */
3686 raw_push_l_r(1); /* ecx */
3687 raw_push_l_r(2); /* edx */
3688 raw_push_l_r(3); /* ebx */
3689 raw_mov_l_rm(0,(uintptr)&s_op);
3690 raw_cpuid(0);
3691 raw_mov_l_mr((uintptr)&s_eax,0);
3692 raw_mov_l_mr((uintptr)&s_ebx,3);
3693 raw_mov_l_mr((uintptr)&s_ecx,1);
3694 raw_mov_l_mr((uintptr)&s_edx,2);
3695 raw_pop_l_r(3);
3696 raw_pop_l_r(2);
3697 raw_pop_l_r(1);
3698 raw_pop_l_r(0);
3699 raw_ret();
3700 set_target(tmp);
3701
3702 ((cpuop_func*)cpuid_space)(0);
3703 if (eax != NULL) *eax = s_eax;
3704 if (ebx != NULL) *ebx = s_ebx;
3705 if (ecx != NULL) *ecx = s_ecx;
3706 if (edx != NULL) *edx = s_edx;
3707
3708 vm_release(cpuid_space, CPUID_SPACE);
3709 }
3710
3711 static void
3712 raw_init_cpu(void)
3713 {
3714 struct cpuinfo_x86 *c = &cpuinfo;
3715
3716 /* Defaults */
3717 c->x86_processor = X86_PROCESSOR_max;
3718 c->x86_vendor = X86_VENDOR_UNKNOWN;
3719 c->cpuid_level = -1; /* CPUID not detected */
3720 c->x86_model = c->x86_mask = 0; /* So far unknown... */
3721 c->x86_vendor_id[0] = '\0'; /* Unset */
3722 c->x86_hwcap = 0;
3723
3724 /* Get vendor name */
3725 c->x86_vendor_id[12] = '\0';
3726 cpuid(0x00000000,
3727 (uae_u32 *)&c->cpuid_level,
3728 (uae_u32 *)&c->x86_vendor_id[0],
3729 (uae_u32 *)&c->x86_vendor_id[8],
3730 (uae_u32 *)&c->x86_vendor_id[4]);
3731 x86_get_cpu_vendor(c);
3732
3733 /* Intel-defined flags: level 0x00000001 */
3734 c->x86_brand_id = 0;
3735 if ( c->cpuid_level >= 0x00000001 ) {
3736 uae_u32 tfms, brand_id;
3737 cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
3738 c->x86 = (tfms >> 8) & 15;
3739 c->x86_model = (tfms >> 4) & 15;
3740 c->x86_brand_id = brand_id & 0xff;
3741 if ( (c->x86_vendor == X86_VENDOR_AMD) &&
3742 (c->x86 == 0xf)) {
3743 /* AMD Extended Family and Model Values */
3744 c->x86 += (tfms >> 20) & 0xff;
3745 c->x86_model += (tfms >> 12) & 0xf0;
3746 }
3747 c->x86_mask = tfms & 15;
3748 } else {
3749 /* Have CPUID level 0 only - unheard of */
3750 c->x86 = 4;
3751 }
3752
3753 /* AMD-defined flags: level 0x80000001 */
3754 uae_u32 xlvl;
3755 cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3756 if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3757 if ( xlvl >= 0x80000001 ) {
3758 uae_u32 features, extra_features;
3759 cpuid(0x80000001, NULL, NULL, &extra_features, &features);
3760 if (features & (1 << 29)) {
3761 /* Assume x86-64 if long mode is supported */
3762 c->x86_processor = X86_PROCESSOR_X86_64;
3763 }
3764 if (extra_features & (1 << 0))
3765 have_lahf_lm = true;
3766 }
3767 }
3768
3769 /* Canonicalize processor ID */
3770 switch (c->x86) {
3771 case 3:
3772 c->x86_processor = X86_PROCESSOR_I386;
3773 break;
3774 case 4:
3775 c->x86_processor = X86_PROCESSOR_I486;
3776 break;
3777 case 5:
3778 if (c->x86_vendor == X86_VENDOR_AMD)
3779 c->x86_processor = X86_PROCESSOR_K6;
3780 else
3781 c->x86_processor = X86_PROCESSOR_PENTIUM;
3782 break;
3783 case 6:
3784 if (c->x86_vendor == X86_VENDOR_AMD)
3785 c->x86_processor = X86_PROCESSOR_ATHLON;
3786 else
3787 c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3788 break;
3789 case 15:
3790 if (c->x86_vendor == X86_VENDOR_INTEL) {
3791 /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3792 if (c->x86_brand_id >= 8)
3793 c->x86_processor = X86_PROCESSOR_PENTIUM4;
3794 }
3795 if (c->x86_vendor == X86_VENDOR_AMD) {
3796 /* Assume an Athlon processor if family == 15 and it was not
3797 detected as an x86-64 so far */
3798 if (c->x86_processor == X86_PROCESSOR_max)
3799 c->x86_processor = X86_PROCESSOR_ATHLON;
3800 }
3801 break;
3802 }
3803 if (c->x86_processor == X86_PROCESSOR_max) {
3804 fprintf(stderr, "Error: unknown processor type\n");
3805 fprintf(stderr, " Family : %d\n", c->x86);
3806 fprintf(stderr, " Model : %d\n", c->x86_model);
3807 fprintf(stderr, " Mask : %d\n", c->x86_mask);
3808 fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3809 if (c->x86_brand_id)
3810 fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
3811 abort();
3812 }
3813
3814 /* Have CMOV support? */
3815 have_cmov = c->x86_hwcap & (1 << 15);
3816
3817 /* Can the host CPU suffer from partial register stalls? */
3818 have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
3819 #if 1
3820 /* It appears that partial register writes are a bad idea even on
3821 AMD K7 cores, even though they are not supposed to have the
3822 dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3823 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3824 have_rat_stall = true;
3825 #endif
3826
3827 /* Alignments */
3828 if (tune_alignment) {
3829 align_loops = x86_alignments[c->x86_processor].align_loop;
3830 align_jumps = x86_alignments[c->x86_processor].align_jump;
3831 }
3832
3833 write_log("Max CPUID level=%d Processor is %s [%s]\n",
3834 c->cpuid_level, c->x86_vendor_id,
3835 x86_processor_string_table[c->x86_processor]);
3836 }
3837
3838 static bool target_check_bsf(void)
3839 {
3840 bool mismatch = false;
3841 for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3842 for (int g_CF = 0; g_CF <= 1; g_CF++) {
3843 for (int g_OF = 0; g_OF <= 1; g_OF++) {
3844 for (int g_SF = 0; g_SF <= 1; g_SF++) {
3845 for (int value = -1; value <= 1; value++) {
3846 unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3847 unsigned long tmp = value;
3848 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3849 : "+r" (flags), "+r" (tmp) : : "cc");
3850 int OF = (flags >> 11) & 1;
3851 int SF = (flags >> 7) & 1;
3852 int ZF = (flags >> 6) & 1;
3853 int CF = flags & 1;
3854 tmp = (value == 0);
3855 if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3856 mismatch = true;
3857 }
3858 }}}}
3859 if (mismatch)
3860 write_log("Target CPU defines all flags on BSF instruction\n");
3861 return !mismatch;
3862 }
3863
3864
3865 /*************************************************************************
3866 * FPU stuff *
3867 *************************************************************************/
3868
3869
3870 static __inline__ void raw_fp_init(void)
3871 {
3872 int i;
3873
3874 for (i=0;i<N_FREGS;i++)
3875 live.spos[i]=-2;
3876 live.tos=-1; /* Stack is empty */
3877 }
3878
3879 static __inline__ void raw_fp_cleanup_drop(void)
3880 {
3881 #if 0
3882 /* using FINIT instead of popping all the entries.
3883 Seems to have side effects --- there is display corruption in
3884 Quake when this is used */
3885 if (live.tos>1) {
3886 emit_byte(0x9b);
3887 emit_byte(0xdb);
3888 emit_byte(0xe3);
3889 live.tos=-1;
3890 }
3891 #endif
3892 while (live.tos>=1) {
3893 emit_byte(0xde);
3894 emit_byte(0xd9);
3895 live.tos-=2;
3896 }
3897 while (live.tos>=0) {
3898 emit_byte(0xdd);
3899 emit_byte(0xd8);
3900 live.tos--;
3901 }
3902 raw_fp_init();
3903 }
3904
3905 static __inline__ void make_tos(int r)
3906 {
3907 int p,q;
3908
3909 if (live.spos[r]<0) { /* Register not yet on stack */
3910 emit_byte(0xd9);
3911 emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3912 live.tos++;
3913 live.spos[r]=live.tos;
3914 live.onstack[live.tos]=r;
3915 return;
3916 }
3917 /* Register is on stack */
3918 if (live.tos==live.spos[r])
3919 return;
3920 p=live.spos[r];
3921 q=live.onstack[live.tos];
3922
3923 emit_byte(0xd9);
3924 emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3925 live.onstack[live.tos]=r;
3926 live.spos[r]=live.tos;
3927 live.onstack[p]=q;
3928 live.spos[q]=p;
3929 }
3930
3931 static __inline__ void make_tos2(int r, int r2)
3932 {
3933 int q;
3934
3935 make_tos(r2); /* Put the reg that's supposed to end up in position2
3936 on top */
3937
3938 if (live.spos[r]<0) { /* Register not yet on stack */
3939 make_tos(r); /* This will extend the stack */
3940 return;
3941 }
3942 /* Register is on stack */
3943 emit_byte(0xd9);
3944 emit_byte(0xc9); /* Move r2 into position 2 */
3945
3946 q=live.onstack[live.tos-1];
3947 live.onstack[live.tos]=q;
3948 live.spos[q]=live.tos;
3949 live.onstack[live.tos-1]=r2;
3950 live.spos[r2]=live.tos-1;
3951
3952 make_tos(r); /* And r into 1 */
3953 }
3954
3955 static __inline__ int stackpos(int r)
3956 {
3957 if (live.spos[r]<0)
3958 abort();
3959 if (live.tos<live.spos[r]) {
3960 printf("Looking for spos for fnreg %d\n",r);
3961 abort();
3962 }
3963 return live.tos-live.spos[r];
3964 }
3965
3966 static __inline__ void usereg(int r)
3967 {
3968 if (live.spos[r]<0)
3969 make_tos(r);
3970 }
3971
3972 /* This is called with one FP value in a reg *above* tos, which it will
3973 pop off the stack if necessary */
3974 static __inline__ void tos_make(int r)
3975 {
3976 if (live.spos[r]<0) {
3977 live.tos++;
3978 live.spos[r]=live.tos;
3979 live.onstack[live.tos]=r;
3980 return;
3981 }
3982 emit_byte(0xdd);
3983 emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
3984 and pop it*/
3985 }
3986
3987 /* FP helper functions */
3988 #if USE_NEW_RTASM
3989 #define DEFINE_OP(NAME, GEN) \
3990 static inline void raw_##NAME(uint32 m) \
3991 { \
3992 GEN(m, X86_NOREG, X86_NOREG, 1); \
3993 }
3994 DEFINE_OP(fstl, FSTLm);
3995 DEFINE_OP(fstpl, FSTPLm);
3996 DEFINE_OP(fldl, FLDLm);
3997 DEFINE_OP(fildl, FILDLm);
3998 DEFINE_OP(fistl, FISTLm);
3999 DEFINE_OP(flds, FLDSm);
4000 DEFINE_OP(fsts, FSTSm);
4001 DEFINE_OP(fstpt, FSTPTm);
4002 DEFINE_OP(fldt, FLDTm);
4003 #else
4004 #define DEFINE_OP(NAME, OP1, OP2) \
4005 static inline void raw_##NAME(uint32 m) \
4006 { \
4007 emit_byte(OP1); \
4008 emit_byte(OP2); \
4009 emit_long(m); \
4010 }
4011 DEFINE_OP(fstl, 0xdd, 0x15);
4012 DEFINE_OP(fstpl, 0xdd, 0x1d);
4013 DEFINE_OP(fldl, 0xdd, 0x05);
4014 DEFINE_OP(fildl, 0xdb, 0x05);
4015 DEFINE_OP(fistl, 0xdb, 0x15);
4016 DEFINE_OP(flds, 0xd9, 0x05);
4017 DEFINE_OP(fsts, 0xd9, 0x15);
4018 DEFINE_OP(fstpt, 0xdb, 0x3d);
4019 DEFINE_OP(fldt, 0xdb, 0x2d);
4020 #endif
4021 #undef DEFINE_OP
4022
4023 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4024 {
4025 make_tos(r);
4026 raw_fstl(m);
4027 }
4028 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4029
4030 LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4031 {
4032 make_tos(r);
4033 raw_fstpl(m);
4034 live.onstack[live.tos]=-1;
4035 live.tos--;
4036 live.spos[r]=-2;
4037 }
4038 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4039
4040 LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4041 {
4042 raw_fldl(m);
4043 tos_make(r);
4044 }
4045 LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4046
4047 LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4048 {
4049 raw_fildl(m);
4050 tos_make(r);
4051 }
4052 LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4053
4054 LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4055 {
4056 make_tos(r);
4057 raw_fistl(m);
4058 }
4059 LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4060
4061 LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4062 {
4063 raw_flds(m);
4064 tos_make(r);
4065 }
4066 LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4067
4068 LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4069 {
4070 make_tos(r);
4071 raw_fsts(m);
4072 }
4073 LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4074
4075 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4076 {
4077 int rs;
4078
4079 /* Stupid x87 can't write a long double to mem without popping the
4080 stack! */
4081 usereg(r);
4082 rs=stackpos(r);
4083 emit_byte(0xd9); /* Get a copy to the top of stack */
4084 emit_byte(0xc0+rs);
4085
4086 raw_fstpt(m); /* store and pop it */
4087 }
4088 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4089
4090 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4091 {
4092 int rs;
4093
4094 make_tos(r);
4095 raw_fstpt(m); /* store and pop it */
4096 live.onstack[live.tos]=-1;
4097 live.tos--;
4098 live.spos[r]=-2;
4099 }
4100 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4101
4102 LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4103 {
4104 raw_fldt(m);
4105 tos_make(r);
4106 }
4107 LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4108
4109 LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4110 {
4111 emit_byte(0xd9);
4112 emit_byte(0xeb);
4113 tos_make(r);
4114 }
4115 LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4116
4117 LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4118 {
4119 emit_byte(0xd9);
4120 emit_byte(0xec);
4121 tos_make(r);
4122 }
4123 LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4124
4125 LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4126 {
4127 emit_byte(0xd9);
4128 emit_byte(0xea);
4129 tos_make(r);
4130 }
4131 LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4132
4133 LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4134 {
4135 emit_byte(0xd9);
4136 emit_byte(0xed);
4137 tos_make(r);
4138 }
4139 LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4140
4141 LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4142 {
4143 emit_byte(0xd9);
4144 emit_byte(0xe8);
4145 tos_make(r);
4146 }
4147 LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4148
4149 LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4150 {
4151 emit_byte(0xd9);
4152 emit_byte(0xee);
4153 tos_make(r);
4154 }
4155 LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4156
4157 LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4158 {
4159 int ds;
4160
4161 usereg(s);
4162 ds=stackpos(s);
4163 if (ds==0 && live.spos[d]>=0) {
4164 /* source is on top of stack, and we already have the dest */
4165 int dd=stackpos(d);
4166 emit_byte(0xdd);
4167 emit_byte(0xd0+dd);
4168 }
4169 else {
4170 emit_byte(0xd9);
4171 emit_byte(0xc0+ds); /* duplicate source on tos */
4172 tos_make(d); /* store to destination, pop if necessary */
4173 }
4174 }
4175 LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4176
4177 LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4178 {
4179 emit_byte(0xd9);
4180 emit_byte(0xa8+index);
4181 emit_long(base);
4182 }
4183 LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
4184
4185
4186 LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4187 {
4188 int ds;
4189
4190 if (d!=s) {
4191 usereg(s);
4192 ds=stackpos(s);
4193 emit_byte(0xd9);
4194 emit_byte(0xc0+ds); /* duplicate source */
4195 emit_byte(0xd9);
4196 emit_byte(0xfa); /* take square root */
4197 tos_make(d); /* store to destination */
4198 }
4199 else {
4200 make_tos(d);
4201 emit_byte(0xd9);
4202 emit_byte(0xfa); /* take square root */
4203 }
4204 }
4205 LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4206
4207 LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4208 {
4209 int ds;
4210
4211 if (d!=s) {
4212 usereg(s);
4213 ds=stackpos(s);
4214 emit_byte(0xd9);
4215 emit_byte(0xc0+ds); /* duplicate source */
4216 emit_byte(0xd9);
4217 emit_byte(0xe1); /* take fabs */
4218 tos_make(d); /* store to destination */
4219 }
4220 else {
4221 make_tos(d);
4222 emit_byte(0xd9);
4223 emit_byte(0xe1); /* take fabs */
4224 }
4225 }
4226 LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4227
4228 LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4229 {
4230 int ds;
4231
4232 if (d!=s) {
4233 usereg(s);
4234 ds=stackpos(s);
4235 emit_byte(0xd9);
4236 emit_byte(0xc0+ds); /* duplicate source */
4237 emit_byte(0xd9);
4238 emit_byte(0xfc); /* take frndint */
4239 tos_make(d); /* store to destination */
4240 }
4241 else {
4242 make_tos(d);
4243 emit_byte(0xd9);
4244 emit_byte(0xfc); /* take frndint */
4245 }
4246 }
4247 LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4248
4249 LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4250 {
4251 int ds;
4252
4253 if (d!=s) {
4254 usereg(s);
4255 ds=stackpos(s);
4256 emit_byte(0xd9);
4257 emit_byte(0xc0+ds); /* duplicate source */
4258 emit_byte(0xd9);
4259 emit_byte(0xff); /* take cos */
4260 tos_make(d); /* store to destination */
4261 }
4262 else {
4263 make_tos(d);
4264 emit_byte(0xd9);
4265 emit_byte(0xff); /* take cos */
4266 }
4267 }
4268 LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4269
4270 LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4271 {
4272 int ds;
4273
4274 if (d!=s) {
4275 usereg(s);
4276 ds=stackpos(s);
4277 emit_byte(0xd9);
4278 emit_byte(0xc0+ds); /* duplicate source */
4279 emit_byte(0xd9);
4280 emit_byte(0xfe); /* take sin */
4281 tos_make(d); /* store to destination */
4282 }
4283 else {
4284 make_tos(d);
4285 emit_byte(0xd9);
4286 emit_byte(0xfe); /* take sin */
4287 }
4288 }
4289 LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4290
4291 double one=1;
4292 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4293 {
4294 int ds;
4295
4296 usereg(s);
4297 ds=stackpos(s);
4298 emit_byte(0xd9);
4299 emit_byte(0xc0+ds); /* duplicate source */
4300
4301 emit_byte(0xd9);
4302 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4303 emit_byte(0xd9);
4304 emit_byte(0xfc); /* rndint */
4305 emit_byte(0xd9);
4306 emit_byte(0xc9); /* swap top two elements */
4307 emit_byte(0xd8);
4308 emit_byte(0xe1); /* subtract rounded from original */
4309 emit_byte(0xd9);
4310 emit_byte(0xf0); /* f2xm1 */
4311 emit_byte(0xdc);
4312 emit_byte(0x05);
4313 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4314 emit_byte(0xd9);
4315 emit_byte(0xfd); /* and scale it */
4316 emit_byte(0xdd);
4317 emit_byte(0xd9); /* take he rounded value off */
4318 tos_make(d); /* store to destination */
4319 }
4320 LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4321
4322 LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4323 {
4324 int ds;
4325
4326 usereg(s);
4327 ds=stackpos(s);
4328 emit_byte(0xd9);
4329 emit_byte(0xc0+ds); /* duplicate source */
4330 emit_byte(0xd9);
4331 emit_byte(0xea); /* fldl2e */
4332 emit_byte(0xde);
4333 emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
4334
4335 emit_byte(0xd9);
4336 emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
4337 emit_byte(0xd9);
4338 emit_byte(0xfc); /* rndint */
4339 emit_byte(0xd9);
4340 emit_byte(0xc9); /* swap top two elements */
4341 emit_byte(0xd8);
4342 emit_byte(0xe1); /* subtract rounded from original */
4343 emit_byte(0xd9);
4344 emit_byte(0xf0); /* f2xm1 */
4345 emit_byte(0xdc);
4346 emit_byte(0x05);
4347 emit_long((uintptr)&one); /* Add '1' without using extra stack space */
4348 emit_byte(0xd9);
4349 emit_byte(0xfd); /* and scale it */
4350 emit_byte(0xdd);
4351 emit_byte(0xd9); /* take he rounded value off */
4352 tos_make(d); /* store to destination */
4353 }
4354 LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4355
4356 LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4357 {
4358 int ds;
4359
4360 usereg(s);
4361 ds=stackpos(s);
4362 emit_byte(0xd9);
4363 emit_byte(0xc0+ds); /* duplicate source */
4364 emit_byte(0xd9);
4365 emit_byte(0xe8); /* push '1' */
4366 emit_byte(0xd9);
4367 emit_byte(0xc9); /* swap top two */
4368 emit_byte(0xd9);
4369 emit_byte(0xf1); /* take 1*log2(x) */
4370 tos_make(d); /* store to destination */
4371 }
4372 LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4373
4374
4375 LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4376 {
4377 int ds;
4378
4379 if (d!=s) {
4380 usereg(s);
4381 ds=stackpos(s);
4382 emit_byte(0xd9);
4383 emit_byte(0xc0+ds); /* duplicate source */
4384 emit_byte(0xd9);
4385 emit_byte(0xe0); /* take fchs */
4386 tos_make(d); /* store to destination */
4387 }
4388 else {
4389 make_tos(d);
4390 emit_byte(0xd9);
4391 emit_byte(0xe0); /* take fchs */
4392 }
4393 }
4394 LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4395
4396 LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4397 {
4398 int ds;
4399
4400 usereg(s);
4401 usereg(d);
4402
4403 if (live.spos[s]==live.tos) {
4404 /* Source is on top of stack */
4405 ds=stackpos(d);
4406 emit_byte(0xdc);
4407 emit_byte(0xc0+ds); /* add source to dest*/
4408 }
4409 else {
4410 make_tos(d);
4411 ds=stackpos(s);
4412
4413 emit_byte(0xd8);
4414 emit_byte(0xc0+ds); /* add source to dest*/
4415 }
4416 }
4417 LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4418
4419 LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4420 {
4421 int ds;
4422
4423 usereg(s);
4424 usereg(d);
4425
4426 if (live.spos[s]==live.tos) {
4427 /* Source is on top of stack */
4428 ds=stackpos(d);
4429 emit_byte(0xdc);
4430 emit_byte(0xe8+ds); /* sub source from dest*/
4431 }
4432 else {
4433 make_tos(d);
4434 ds=stackpos(s);
4435
4436 emit_byte(0xd8);
4437 emit_byte(0xe0+ds); /* sub src from dest */
4438 }
4439 }
4440 LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4441
4442 LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4443 {
4444 int ds;
4445
4446 usereg(s);
4447 usereg(d);
4448
4449 make_tos(d);
4450 ds=stackpos(s);
4451
4452 emit_byte(0xdd);
4453 emit_byte(0xe0+ds); /* cmp dest with source*/
4454 }
4455 LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4456
4457 LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4458 {
4459 int ds;
4460
4461 usereg(s);
4462 usereg(d);
4463
4464 if (live.spos[s]==live.tos) {
4465 /* Source is on top of stack */
4466 ds=stackpos(d);
4467 emit_byte(0xdc);
4468 emit_byte(0xc8+ds); /* mul dest by source*/
4469 }
4470 else {
4471 make_tos(d);
4472 ds=stackpos(s);
4473
4474 emit_byte(0xd8);
4475 emit_byte(0xc8+ds); /* mul dest by source*/
4476 }
4477 }
4478 LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4479
4480 LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4481 {
4482 int ds;
4483
4484 usereg(s);
4485 usereg(d);
4486
4487 if (live.spos[s]==live.tos) {
4488 /* Source is on top of stack */
4489 ds=stackpos(d);
4490 emit_byte(0xdc);
4491 emit_byte(0xf8+ds); /* div dest by source */
4492 }
4493 else {
4494 make_tos(d);
4495 ds=stackpos(s);
4496
4497 emit_byte(0xd8);
4498 emit_byte(0xf0+ds); /* div dest by source*/
4499 }
4500 }
4501 LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4502
4503 LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4504 {
4505 int ds;
4506
4507 usereg(s);
4508 usereg(d);
4509
4510 make_tos2(d,s);
4511 ds=stackpos(s);
4512
4513 if (ds!=1) {
4514 printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4515 abort();
4516 }
4517 emit_byte(0xd9);
4518 emit_byte(0xf8); /* take rem from dest by source */
4519 }
4520 LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4521
4522 LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4523 {
4524 int ds;
4525
4526 usereg(s);
4527 usereg(d);
4528
4529 make_tos2(d,s);
4530 ds=stackpos(s);
4531
4532 if (ds!=1) {
4533 printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4534 abort();
4535 }
4536 emit_byte(0xd9);
4537 emit_byte(0xf5); /* take rem1 from dest by source */
4538 }
4539 LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4540
4541
4542 LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4543 {
4544 make_tos(r);
4545 emit_byte(0xd9); /* ftst */
4546 emit_byte(0xe4);
4547 }
4548 LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4549
4550 /* %eax register is clobbered if target processor doesn't support fucomi */
4551 #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
4552 #define FFLAG_NREG EAX_INDEX
4553
4554 static __inline__ void raw_fflags_into_flags(int r)
4555 {
4556 int p;
4557
4558 usereg(r);
4559 p=stackpos(r);
4560
4561 emit_byte(0xd9);
4562 emit_byte(0xee); /* Push 0 */
4563 emit_byte(0xd9);
4564 emit_byte(0xc9+p); /* swap top two around */
4565 if (have_cmov) {
4566 // gb-- fucomi is for P6 cores only, not K6-2 then...
4567 emit_byte(0xdb);
4568 emit_byte(0xe9+p); /* fucomi them */
4569 }
4570 else {
4571 emit_byte(0xdd);
4572 emit_byte(0xe1+p); /* fucom them */
4573 emit_byte(0x9b);
4574 emit_byte(0xdf);
4575 emit_byte(0xe0); /* fstsw ax */
4576 raw_sahf(0); /* sahf */
4577 }
4578 emit_byte(0xdd);
4579 emit_byte(0xd9+p); /* store value back, and get rid of 0 */
4580 }