ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp (file contents):
Revision 1.6 by gbeauche, 2002-10-03T16:13:46Z vs.
Revision 1.19 by cebix, 2004-01-12T15:29:29Z

# Line 3 | Line 3
3   *
4   *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5   *
6 < *  Adaptation for Basilisk II and improvements, copyright 2000-2002
6 > *  Adaptation for Basilisk II and improvements, copyright 2000-2004
7   *    Gwenole Beauchesne
8   *
9 < *  Basilisk II (C) 1997-2002 Christian Bauer
9 > *  Basilisk II (C) 1997-2004 Christian Bauer
10 > *
11 > *  Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
12   *  
13   *  This program is free software; you can redistribute it and/or modify
14   *  it under the terms of the GNU General Public License as published by
# Line 75 | Line 77 | uae_s8 always_used[]={4,-1};
77   uae_s8 can_byte[]={0,1,2,3,-1};
78   uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
79  
80 + #if USE_OPTIMIZED_CALLS
81 + /* Make sure interpretive core does not use cpuopti */
82 + uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
83 + #else
84   /* cpuopti mutate instruction handlers to assume registers are saved
85     by the caller */
86   uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
87 + #endif
88  
89   /* This *should* be the same as call_saved. But:
90     - We might not really know which registers are saved, and which aren't,
# Line 122 | Line 129 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
129   #define CLOBBER_BT   clobber_flags()
130   #define CLOBBER_BSF  clobber_flags()
131  
132 + /* FIXME: disabled until that's proofread.  */
133 + #if 0
134 +
135 + #if defined(__x86_64__)
136 + #define X86_TARGET_64BIT                1
137 + #endif
138 + #define X86_FLAT_REGISTERS              0
139 + #define X86_OPTIMIZE_ALU                1
140 + #define X86_OPTIMIZE_ROTSHI             1
141 + #include "codegen_x86.h"
142 +
143 + #define x86_emit_byte(B)                emit_byte(B)
144 + #define x86_emit_word(W)                emit_word(W)
145 + #define x86_emit_long(L)                emit_long(L)
146 + #define x86_get_target()                get_target()
147 + #define x86_emit_failure(MSG)   jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
148 +
149 + static void jit_fail(const char *msg, const char *file, int line, const char *function)
150 + {
151 +        fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
152 +                        function, file, line, msg);
153 +        abort();
154 + }
155 +
156 + LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
157 + {
158 +        PUSHLr(r);
159 + }
160 + LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
161 +
162 + LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
163 + {
164 +        POPLr(r);
165 + }
166 + LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
167 +
168 + LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
169 + {
170 +        BTLir(i, r);
171 + }
172 + LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
173 +
174 + LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
175 + {
176 +        BTLrr(b, r);
177 + }
178 + LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
179 +
180 + LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
181 + {
182 +        BTCLir(i, r);
183 + }
184 + LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
185 +
186 + LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
187 + {
188 +        BTCLrr(b, r);
189 + }
190 + LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
191 +
192 + LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
193 + {
194 +        BTRLir(i, r);
195 + }
196 + LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
197 +
198 + LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
199 + {
200 +        BTRLrr(b, r);
201 + }
202 + LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
203 +
204 + LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
205 + {
206 +        BTSLir(i, r);
207 + }
208 + LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
209 +
210 + LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
211 + {
212 +        BTSLrr(b, r);
213 + }
214 + LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
215 +
216 + LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
217 + {
218 +        SUBWir(i, d);
219 + }
220 + LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
221 +
222 + LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
223 + {
224 +        MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
225 + }
226 + LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
227 +
228 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
229 + {
230 +        MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
231 + }
232 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
233 +
234 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
235 + {
236 +        MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
237 + }
238 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
239 +
240 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
241 + {
242 +        MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
243 + }
244 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
245 +
246 + LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
247 + {
248 +        ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
249 + }
250 + LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
251 +
252 + LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
253 + {
254 +        ROLBir(i, r);
255 + }
256 + LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
257 +
258 + LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
259 + {
260 +        ROLWir(i, r);
261 + }
262 + LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
263 +
264 + LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
265 + {
266 +        ROLLir(i, r);
267 + }
268 + LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
269 +
270 + LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
271 + {
272 +        ROLLrr(r, d);
273 + }
274 + LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
275 +
276 + LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
277 + {
278 +        ROLWrr(r, d);
279 + }
280 + LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
281 +
282 + LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
283 + {
284 +        ROLBrr(r, d);
285 + }
286 + LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
287 +
288 + LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
289 + {
290 +        SHLLrr(r, d);
291 + }
292 + LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
293 +
294 + LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
295 + {
296 +        SHLWrr(r, d);
297 + }
298 + LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
299 +
300 + LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
301 + {
302 +        SHLBrr(r, d);
303 + }
304 + LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
305 +
306 + LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
307 + {
308 +        RORBir(i, r);
309 + }
310 + LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
311 +
312 + LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
313 + {
314 +        RORWir(i, r);
315 + }
316 + LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
317 +
318 + LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
319 + {
320 +        ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
321 + }
322 + LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
323 +
324 + LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
325 + {
326 +        RORLir(i, r);
327 + }
328 + LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
329 +
330 + LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
331 + {
332 +        RORLrr(r, d);
333 + }
334 + LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
335 +
336 + LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
337 + {
338 +        RORWrr(r, d);
339 + }
340 + LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
341 +
342 + LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
343 + {
344 +        RORBrr(r, d);
345 + }
346 + LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
347 +
348 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
349 + {
350 +        SHRLrr(r, d);
351 + }
352 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
353 +
354 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
355 + {
356 +        SHRWrr(r, d);
357 + }
358 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
359 +
360 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
361 + {
362 +        SHRBrr(r, d);
363 + }
364 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
365 +
366 + LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
367 + {
368 +        SARLrr(r, d);
369 + }
370 + LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
371 +
372 + LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
373 + {
374 +        SARWrr(r, d);
375 + }
376 + LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
377 +
378 + LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
379 + {
380 +        SARBrr(r, d);
381 + }
382 + LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
383 +
384 + LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
385 + {
386 +        SHLLir(i, r);
387 + }
388 + LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
389 +
390 + LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
391 + {
392 +        SHLWir(i, r);
393 + }
394 + LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
395 +
396 + LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
397 + {
398 +        SHLBir(i, r);
399 + }
400 + LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
401 +
402 + LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
403 + {
404 +        SHRLir(i, r);
405 + }
406 + LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
407 +
408 + LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
409 + {
410 +        SHRWir(i, r);
411 + }
412 + LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
413 +
414 + LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
415 + {
416 +        SHRBir(i, r);
417 + }
418 + LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
419 +
420 + LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
421 + {
422 +        SARLir(i, r);
423 + }
424 + LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
425 +
426 + LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
427 + {
428 +        SARWir(i, r);
429 + }
430 + LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
431 +
432 + LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
433 + {
434 +        SARBir(i, r);
435 + }
436 + LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
437 +
438 + LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
439 + {
440 +        SAHF();
441 + }
442 + LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
443 +
444 + LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
445 + {
446 +        CPUID();
447 + }
448 + LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
449 +
450 + LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
451 + {
452 +        LAHF();
453 + }
454 + LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
455 +
456 + LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
457 + {
458 +        SETCCir(cc, d);
459 + }
460 + LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
461 +
462 + LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
463 + {
464 +        SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
465 + }
466 + LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
467 +
468 + LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
469 + {
470 +        if (have_cmov)
471 +                CMOVLrr(cc, s, d);
472 +        else { /* replacement using branch and mov */
473 + #if defined(__x86_64__)
474 +                write_log("x86-64 implementations are bound to have CMOV!\n");
475 +                abort();
476 + #endif
477 +                JCCSii(cc^1, 2);
478 +                MOVLrr(s, d);
479 +        }
480 + }
481 + LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
482 +
483 + LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
484 + {
485 +        BSFLrr(s, d);
486 + }
487 + LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
488 +
489 + LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
490 + {
491 +        MOVSWLrr(s, d);
492 + }
493 + LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
494 +
495 + LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
496 + {
497 +        MOVSBLrr(s, d);
498 + }
499 + LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
500 +
501 + LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
502 + {
503 +        MOVZWLrr(s, d);
504 + }
505 + LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
506 +
507 + LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
508 + {
509 +        MOVZBLrr(s, d);
510 + }
511 + LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
512 +
513 + LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
514 + {
515 +        IMULLrr(s, d);
516 + }
517 + LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
518 +
519 + LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
520 + {
521 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
522 +        write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
523 +        abort();
524 +        }
525 +        IMULLr(s);
526 + }
527 + LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
528 +
529 + LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
530 + {
531 +        if (d!=MUL_NREG1 || s!=MUL_NREG2) {
532 +        write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
533 +        abort();
534 +        }
535 +        MULLr(s);
536 + }
537 + LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
538 +
539 + LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
540 + {
541 +        abort(); /* %^$&%^$%#^ x86! */
542 + }
543 + LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
544 +
545 + LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
546 + {
547 +        MOVBrr(s, d);
548 + }
549 + LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
550 +
551 + LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
552 + {
553 +        MOVWrr(s, d);
554 + }
555 + LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
556 +
557 + LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
558 + {
559 +        MOVLmr(0, baser, index, factor, d);
560 + }
561 + LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
562 +
563 + LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
564 + {
565 +        MOVWmr(0, baser, index, factor, d);
566 + }
567 + LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
568 +
569 + LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
570 + {
571 +        MOVBmr(0, baser, index, factor, d);
572 + }
573 + LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
574 +
575 + LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
576 + {
577 +        MOVLrm(s, 0, baser, index, factor);
578 + }
579 + LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
580 +
581 + LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
582 + {
583 +        MOVWrm(s, 0, baser, index, factor);
584 + }
585 + LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
586 +
587 + LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
588 + {
589 +        MOVBrm(s, 0, baser, index, factor);
590 + }
591 + LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
592 +
593 + LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
594 + {
595 +        MOVLrm(s, base, baser, index, factor);
596 + }
597 + LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
598 +
599 + LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
600 + {
601 +        MOVWrm(s, base, baser, index, factor);
602 + }
603 + LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
604 +
605 + LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
606 + {
607 +        MOVBrm(s, base, baser, index, factor);
608 + }
609 + LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
610 +
611 + LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
612 + {
613 +        MOVLmr(base, baser, index, factor, d);
614 + }
615 + LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
616 +
617 + LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
618 + {
619 +        MOVWmr(base, baser, index, factor, d);
620 + }
621 + LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
622 +
623 + LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
624 + {
625 +        MOVBmr(base, baser, index, factor, d);
626 + }
627 + LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
628 +
629 + LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
630 + {
631 +        MOVLmr(base, X86_NOREG, index, factor, d);
632 + }
633 + LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
634 +
635 + LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
636 + {
637 +        if (have_cmov)
638 +                CMOVLmr(cond, base, X86_NOREG, index, factor, d);
639 +        else { /* replacement using branch and mov */
640 + #if defined(__x86_64__)
641 +                write_log("x86-64 implementations are bound to have CMOV!\n");
642 +                abort();
643 + #endif
644 +                JCCSii(cond^1, 7);
645 +                MOVLmr(base, X86_NOREG, index, factor, d);
646 +        }
647 + }
648 + LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
649 +
650 + LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
651 + {
652 +        if (have_cmov)
653 +                CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
654 +        else { /* replacement using branch and mov */
655 + #if defined(__x86_64__)
656 +                write_log("x86-64 implementations are bound to have CMOV!\n");
657 +                abort();
658 + #endif
659 +                JCCSii(cond^1, 6);
660 +                MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
661 +        }
662 + }
663 + LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
664 +
665 + LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
666 + {
667 +        MOVLmr(offset, s, X86_NOREG, 1, d);
668 + }
669 + LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
670 +
671 + LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
672 + {
673 +        MOVWmr(offset, s, X86_NOREG, 1, d);
674 + }
675 + LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
676 +
677 + LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
678 + {
679 +        MOVBmr(offset, s, X86_NOREG, 1, d);
680 + }
681 + LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
682 +
683 + LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
684 + {
685 +        MOVLmr(offset, s, X86_NOREG, 1, d);
686 + }
687 + LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
688 +
689 + LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
690 + {
691 +        MOVWmr(offset, s, X86_NOREG, 1, d);
692 + }
693 + LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
694 +
695 + LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
696 + {
697 +        MOVBmr(offset, s, X86_NOREG, 1, d);
698 + }
699 + LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
700 +
701 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
702 + {
703 +        MOVLim(i, offset, d, X86_NOREG, 1);
704 + }
705 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
706 +
707 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
708 + {
709 +        MOVWim(i, offset, d, X86_NOREG, 1);
710 + }
711 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
712 +
713 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
714 + {
715 +        MOVBim(i, offset, d, X86_NOREG, 1);
716 + }
717 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
718 +
719 + LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
720 + {
721 +        MOVLrm(s, offset, d, X86_NOREG, 1);
722 + }
723 + LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
724 +
725 + LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
726 + {
727 +        MOVWrm(s, offset, d, X86_NOREG, 1);
728 + }
729 + LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
730 +
731 + LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
732 + {
733 +        MOVBrm(s, offset, d, X86_NOREG, 1);
734 + }
735 + LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
736 +
737 + LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
738 + {
739 +        LEALmr(offset, s, X86_NOREG, 1, d);
740 + }
741 + LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
742 +
743 + LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
744 + {
745 +        LEALmr(offset, s, index, factor, d);
746 + }
747 + LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
748 +
749 + LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
750 + {
751 +        LEALmr(0, s, index, factor, d);
752 + }
753 + LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
754 +
755 + LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
756 + {
757 +        MOVLrm(s, offset, d, X86_NOREG, 1);
758 + }
759 + LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
760 +
761 + LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
762 + {
763 +        MOVWrm(s, offset, d, X86_NOREG, 1);
764 + }
765 + LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
766 +
767 + LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
768 + {
769 +        MOVBrm(s, offset, d, X86_NOREG, 1);
770 + }
771 + LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
772 +
773 + LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
774 + {
775 +        BSWAPLr(r);
776 + }
777 + LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
778 +
779 + LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
780 + {
781 +        ROLWir(8, r);
782 + }
783 + LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
784 +
785 + LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
786 + {
787 +        MOVLrr(s, d);
788 + }
789 + LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
790 +
791 + LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
792 + {
793 +        MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
794 + }
795 + LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
796 +
797 + LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
798 + {
799 +        MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
800 + }
801 + LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
802 +
803 + LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
804 + {
805 +        MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
806 + }
807 + LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
808 +
809 + LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
810 + {
811 +        MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
812 + }
813 + LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
814 +
815 + LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
816 + {
817 +        MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
818 + }
819 + LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
820 +
821 + LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
822 + {
823 +        MOVLir(s, d);
824 + }
825 + LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
826 +
827 + LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
828 + {
829 +        MOVWir(s, d);
830 + }
831 + LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
832 +
833 + LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
834 + {
835 +        MOVBir(s, d);
836 + }
837 + LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
838 +
839 + LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
840 + {
841 +        ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
842 + }
843 + LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
844 +
845 + LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
846 + {
847 +        ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
848 + }
849 + LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
850 +
851 + LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
852 + {
853 +        ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
854 + }
855 + LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
856 +
857 + LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
858 + {
859 +        ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
860 + }
861 + LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
862 +
863 + LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
864 + {
865 +        TESTLir(i, d);
866 + }
867 + LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
868 +
869 + LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
870 + {
871 +        TESTLrr(s, d);
872 + }
873 + LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
874 +
875 + LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
876 + {
877 +        TESTWrr(s, d);
878 + }
879 + LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
880 +
881 + LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
882 + {
883 +        TESTBrr(s, d);
884 + }
885 + LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
886 +
887 + LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
888 + {
889 +        ANDLir(i, d);
890 + }
891 + LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
892 +
893 + LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
894 + {
895 +        ANDWir(i, d);
896 + }
897 + LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
898 +
899 + LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
900 + {
901 +        ANDLrr(s, d);
902 + }
903 + LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
904 +
905 + LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
906 + {
907 +        ANDWrr(s, d);
908 + }
909 + LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
910 +
911 + LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
912 + {
913 +        ANDBrr(s, d);
914 + }
915 + LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
916 +
917 + LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
918 + {
919 +        ORLir(i, d);
920 + }
921 + LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
922 +
923 + LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
924 + {
925 +        ORLrr(s, d);
926 + }
927 + LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
928 +
929 + LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
930 + {
931 +        ORWrr(s, d);
932 + }
933 + LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
934 +
935 + LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
936 + {
937 +        ORBrr(s, d);
938 + }
939 + LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
940 +
941 + LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
942 + {
943 +        ADCLrr(s, d);
944 + }
945 + LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
946 +
947 + LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
948 + {
949 +        ADCWrr(s, d);
950 + }
951 + LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
952 +
953 + LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
954 + {
955 +        ADCBrr(s, d);
956 + }
957 + LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
958 +
959 + LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
960 + {
961 +        ADDLrr(s, d);
962 + }
963 + LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
964 +
965 + LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
966 + {
967 +        ADDWrr(s, d);
968 + }
969 + LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
970 +
971 + LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
972 + {
973 +        ADDBrr(s, d);
974 + }
975 + LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
976 +
977 + LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
978 + {
979 +        SUBLir(i, d);
980 + }
981 + LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
982 +
983 + LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
984 + {
985 +        SUBBir(i, d);
986 + }
987 + LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
988 +
989 + LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
990 + {
991 +        ADDLir(i, d);
992 + }
993 + LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
994 +
995 + LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
996 + {
997 +        ADDWir(i, d);
998 + }
999 + LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1000 +
1001 + LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1002 + {
1003 +        ADDBir(i, d);
1004 + }
1005 + LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1006 +
1007 + LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1008 + {
1009 +        SBBLrr(s, d);
1010 + }
1011 + LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1012 +
1013 + LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1014 + {
1015 +        SBBWrr(s, d);
1016 + }
1017 + LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1018 +
1019 + LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1020 + {
1021 +        SBBBrr(s, d);
1022 + }
1023 + LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1024 +
1025 + LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1026 + {
1027 +        SUBLrr(s, d);
1028 + }
1029 + LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1030 +
1031 + LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1032 + {
1033 +        SUBWrr(s, d);
1034 + }
1035 + LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1036 +
1037 + LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1038 + {
1039 +        SUBBrr(s, d);
1040 + }
1041 + LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1042 +
1043 + LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1044 + {
1045 +        CMPLrr(s, d);
1046 + }
1047 + LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1048 +
1049 + LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1050 + {
1051 +        CMPLir(i, r);
1052 + }
1053 + LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1054 +
1055 + LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1056 + {
1057 +        CMPWrr(s, d);
1058 + }
1059 + LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1060 +
1061 + LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1062 + {
1063 +        CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1064 + }
1065 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1066 +
1067 + LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1068 + {
1069 +        CMPBir(i, d);
1070 + }
1071 + LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1072 +
1073 + LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1074 + {
1075 +        CMPBrr(s, d);
1076 + }
1077 + LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1078 +
1079 + LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1080 + {
1081 +        CMPLmr(offset, X86_NOREG, index, factor, d);
1082 + }
1083 + LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1084 +
1085 + LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1086 + {
1087 +        XORLrr(s, d);
1088 + }
1089 + LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1090 +
1091 + LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1092 + {
1093 +        XORWrr(s, d);
1094 + }
1095 + LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1096 +
1097 + LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1098 + {
1099 +        XORBrr(s, d);
1100 + }
1101 + LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1102 +
1103 + LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1104 + {
1105 +        SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1106 + }
1107 + LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1108 +
1109 + LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1110 + {
1111 +        CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1112 + }
1113 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1114 +
1115 + LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1116 + {
1117 +        XCHGLrr(r2, r1);
1118 + }
1119 + LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1120 +
1121 + LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1122 + {
1123 +        PUSHF();
1124 + }
1125 + LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1126 +
1127 + LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1128 + {
1129 +        POPF();
1130 + }
1131 + LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1132 +
1133 + #else
1134 +
1135   const bool optimize_accum               = true;
1136   const bool optimize_imm8                = true;
1137   const bool optimize_shift_once  = true;
# Line 1071 | Line 2081 | LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d
2081  
2082   LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2083   {
2084 +        Dif(!isbyte(offset)) abort();
2085      emit_byte(0x8b);
2086      emit_byte(0x40+8*d+s);
2087      emit_byte(offset);
# Line 1079 | Line 2090 | LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d,
2090  
2091   LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2092   {
2093 +        Dif(!isbyte(offset)) abort();
2094      emit_byte(0x66);
2095      emit_byte(0x8b);
2096      emit_byte(0x40+8*d+s);
# Line 1088 | Line 2100 | LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d,
2100  
2101   LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2102   {
2103 +        Dif(!isbyte(offset)) abort();
2104      emit_byte(0x8a);
2105      emit_byte(0x40+8*d+s);
2106      emit_byte(offset);
# Line 1121 | Line 2134 | LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d
2134  
2135   LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2136   {
2137 +        Dif(!isbyte(offset)) abort();
2138      emit_byte(0xc7);
2139      emit_byte(0x40+d);
2140      emit_byte(offset);
# Line 1130 | Line 2144 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d
2144  
2145   LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2146   {
2147 +        Dif(!isbyte(offset)) abort();
2148      emit_byte(0x66);
2149      emit_byte(0xc7);
2150      emit_byte(0x40+d);
# Line 1140 | Line 2155 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d
2155  
2156   LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2157   {
2158 +        Dif(!isbyte(offset)) abort();
2159      emit_byte(0xc6);
2160      emit_byte(0x40+d);
2161      emit_byte(offset);
# Line 1149 | Line 2165 | LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d
2165  
2166   LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2167   {
2168 +        Dif(!isbyte(offset)) abort();
2169      emit_byte(0x89);
2170      emit_byte(0x40+8*s+d);
2171      emit_byte(offset);
# Line 1157 | Line 2174 | LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d
2174  
2175   LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2176   {
2177 +        Dif(!isbyte(offset)) abort();
2178      emit_byte(0x66);
2179      emit_byte(0x89);
2180      emit_byte(0x40+8*s+d);
# Line 1166 | Line 2184 | LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d
2184  
2185   LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2186   {
2187 +        Dif(!isbyte(offset)) abort();
2188      emit_byte(0x88);
2189      emit_byte(0x40+8*s+d);
2190      emit_byte(offset);
# Line 1856 | Line 2875 | LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r
2875   LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
2876  
2877   /*************************************************************************
1859 * FIXME: string-related instructions                                    *
1860 *************************************************************************/
1861
1862 LOWFUNC(WRITE,NONE,0,raw_cld,(void))
1863 {
1864        emit_byte(0xfc);
1865 }
1866 LENDFUNC(WRITE,NONE,0,raw_cld,(void))
1867
1868 LOWFUNC(WRITE,NONE,0,raw_std,(void))
1869 {
1870        emit_byte(0xfd);
1871 }
1872 LENDFUNC(WRITE,NONE,0,raw_std,(void))
1873
1874 LOWFUNC(NONE,RMW,0,raw_movs_b,(void))
1875 {
1876        emit_byte(0xa4);
1877 }
1878 LENDFUNC(NONE,RMW,0,raw_movs_b,(void))
1879
1880 LOWFUNC(NONE,RMW,0,raw_movs_l,(void))
1881 {
1882        emit_byte(0xa5);
1883 }
1884 LENDFUNC(NONE,RMW,0,raw_movs_l,(void))
1885
1886 LOWFUNC(NONE,RMW,0,raw_rep,(void))
1887 {
1888        emit_byte(0xf3);
1889 }
1890 LENDFUNC(NONE,RMW,0,raw_rep,(void))
1891
1892 LOWFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1893 {
1894        raw_rep();
1895        raw_movs_b();
1896 }
1897 LENDFUNC(NONE,RMW,0,raw_rep_movsb,(void))
1898
1899 LOWFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1900 {
1901        raw_rep();
1902        raw_movs_l();
1903 }
1904 LENDFUNC(NONE,RMW,0,raw_rep_movsl,(void))
1905
1906 /*************************************************************************
2878   * FIXME: mem access modes probably wrong                                *
2879   *************************************************************************/
2880  
# Line 1919 | Line 2890 | LOWFUNC(WRITE,READ,0,raw_popfl,(void))
2890   }
2891   LENDFUNC(WRITE,READ,0,raw_popfl,(void))
2892  
2893 + #endif
2894 +
2895   /*************************************************************************
2896   * Unoptimizable stuff --- jump                                          *
2897   *************************************************************************/
# Line 2055 | Line 3028 | static __inline__ void raw_nop(void)
3028      emit_byte(0x90);
3029   }
3030  
3031 + static __inline__ void raw_emit_nop_filler(int nbytes)
3032 + {
3033 +  /* Source: GNU Binutils 2.12.90.0.15 */
3034 +  /* Various efficient no-op patterns for aligning code labels.
3035 +     Note: Don't try to assemble the instructions in the comments.
3036 +     0L and 0w are not legal.  */
3037 +  static const uae_u8 f32_1[] =
3038 +    {0x90};                                                                     /* nop                                  */
3039 +  static const uae_u8 f32_2[] =
3040 +    {0x89,0xf6};                                                        /* movl %esi,%esi               */
3041 +  static const uae_u8 f32_3[] =
3042 +    {0x8d,0x76,0x00};                                           /* leal 0(%esi),%esi    */
3043 +  static const uae_u8 f32_4[] =
3044 +    {0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3045 +  static const uae_u8 f32_5[] =
3046 +    {0x90,                                                                      /* nop                                  */
3047 +     0x8d,0x74,0x26,0x00};                                      /* leal 0(%esi,1),%esi  */
3048 +  static const uae_u8 f32_6[] =
3049 +    {0x8d,0xb6,0x00,0x00,0x00,0x00};            /* leal 0L(%esi),%esi   */
3050 +  static const uae_u8 f32_7[] =
3051 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3052 +  static const uae_u8 f32_8[] =
3053 +    {0x90,                                                                      /* nop                                  */
3054 +     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};       /* leal 0L(%esi,1),%esi */
3055 +  static const uae_u8 f32_9[] =
3056 +    {0x89,0xf6,                                                         /* movl %esi,%esi               */
3057 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3058 +  static const uae_u8 f32_10[] =
3059 +    {0x8d,0x76,0x00,                                            /* leal 0(%esi),%esi    */
3060 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3061 +  static const uae_u8 f32_11[] =
3062 +    {0x8d,0x74,0x26,0x00,                                       /* leal 0(%esi,1),%esi  */
3063 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3064 +  static const uae_u8 f32_12[] =
3065 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3066 +     0x8d,0xbf,0x00,0x00,0x00,0x00};            /* leal 0L(%edi),%edi   */
3067 +  static const uae_u8 f32_13[] =
3068 +    {0x8d,0xb6,0x00,0x00,0x00,0x00,                     /* leal 0L(%esi),%esi   */
3069 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3070 +  static const uae_u8 f32_14[] =
3071 +    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00,        /* leal 0L(%esi,1),%esi */
3072 +     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};       /* leal 0L(%edi,1),%edi */
3073 +  static const uae_u8 f32_15[] =
3074 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3075 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3076 +  static const uae_u8 f32_16[] =
3077 +    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,        /* jmp .+15; lotsa nops */
3078 +     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3079 +  static const uae_u8 *const f32_patt[] = {
3080 +    f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3081 +    f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3082 +  };
3083 +
3084 +  int nloops = nbytes / 16;
3085 +  while (nloops-- > 0)
3086 +        emit_block(f32_16, sizeof(f32_16));
3087 +
3088 +  nbytes %= 16;
3089 +  if (nbytes)
3090 +        emit_block(f32_patt[nbytes - 1], nbytes);
3091 + }
3092 +
3093  
3094   /*************************************************************************
3095   * Flag handling, to and fro UAE flag register                           *
# Line 2143 | Line 3178 | static __inline__ void raw_load_flagx(ua
3178          raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
3179   }
3180  
3181 + #define NATIVE_FLAG_Z 0x40
3182 + static __inline__ void raw_flags_set_zero(int f, int r, int t)
3183 + {
3184 +        // FIXME: this is really suboptimal
3185 +        raw_pushfl();
3186 +        raw_pop_l_r(f);
3187 +        raw_and_l_ri(f,~NATIVE_FLAG_Z);
3188 +        raw_test_l_rr(r,r);
3189 +        raw_mov_l_ri(r,0);
3190 +        raw_mov_l_ri(t,NATIVE_FLAG_Z);
3191 +        raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
3192 +        raw_or_l(f,r);
3193 +        raw_push_l_r(f);
3194 +        raw_popfl();
3195 + }
3196  
3197   static __inline__ void raw_inc_sp(int off)
3198   {
# Line 2429 | Line 3479 | enum {
3479    X86_PROCESSOR_K6,
3480    X86_PROCESSOR_ATHLON,
3481    X86_PROCESSOR_PENTIUM4,
3482 +  X86_PROCESSOR_K8,
3483    X86_PROCESSOR_max
3484   };
3485  
# Line 2439 | Line 3490 | static const char * x86_processor_string
3490    "PentiumPro",
3491    "K6",
3492    "Athlon",
3493 <  "Pentium4"
3493 >  "Pentium4",
3494 >  "K8"
3495   };
3496  
3497   static struct ptt {
# Line 2456 | Line 3508 | x86_alignments[X86_PROCESSOR_max] = {
3508    { 16, 15, 16,  7, 16 },
3509    { 32,  7, 32,  7, 32 },
3510    { 16,  7, 16,  7, 16 },
3511 <  {  0,  0,  0,  0,  0 }
3511 >  {  0,  0,  0,  0,  0 },
3512 >  { 16,  7, 16,  7, 16 }
3513   };
3514  
3515   static void
# Line 2520 | Line 3573 | raw_init_cpu(void)
3573    struct cpuinfo_x86 *c = &cpuinfo;
3574  
3575    /* Defaults */
3576 +  c->x86_processor = X86_PROCESSOR_max;
3577    c->x86_vendor = X86_VENDOR_UNKNOWN;
3578    c->cpuid_level = -1;                          /* CPUID not detected */
3579    c->x86_model = c->x86_mask = 0;       /* So far unknown... */
# Line 2555 | Line 3609 | raw_init_cpu(void)
3609          c->x86 = 4;
3610    }
3611  
3612 +  /* AMD-defined flags: level 0x80000001 */
3613 +  uae_u32 xlvl;
3614 +  cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
3615 +  if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3616 +        if ( xlvl >= 0x80000001 ) {
3617 +          uae_u32 features;
3618 +          cpuid(0x80000001, NULL, NULL, NULL, &features);
3619 +          if (features & (1 << 29)) {
3620 +                /* Assume x86-64 if long mode is supported */
3621 +                c->x86_processor = X86_PROCESSOR_K8;
3622 +          }
3623 +        }
3624 +  }
3625 +          
3626    /* Canonicalize processor ID */
2559  c->x86_processor = X86_PROCESSOR_max;
3627    switch (c->x86) {
3628    case 3:
3629          c->x86_processor = X86_PROCESSOR_I386;
# Line 2578 | Line 3645 | raw_init_cpu(void)
3645          break;
3646    case 15:
3647          if (c->x86_vendor == X86_VENDOR_INTEL) {
3648 <          /*  Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
3648 >          /* Assume any BrandID >= 8 and family == 15 yields a Pentium 4 */
3649            if (c->x86_brand_id >= 8)
3650                  c->x86_processor = X86_PROCESSOR_PENTIUM4;
3651          }
3652 +        if (c->x86_vendor == X86_VENDOR_AMD) {
3653 +          /* Assume an Athlon processor if family == 15 and it was not
3654 +             detected as an x86-64 so far */
3655 +          if (c->x86_processor == X86_PROCESSOR_max)
3656 +                c->x86_processor = X86_PROCESSOR_ATHLON;
3657 +        }
3658          break;
3659    }
3660    if (c->x86_processor == X86_PROCESSOR_max) {
# Line 2589 | Line 3662 | raw_init_cpu(void)
3662          fprintf(stderr, "  Family  : %d\n", c->x86);
3663          fprintf(stderr, "  Model   : %d\n", c->x86_model);
3664          fprintf(stderr, "  Mask    : %d\n", c->x86_mask);
3665 +        fprintf(stderr, "  Vendor  : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
3666          if (c->x86_brand_id)
3667            fprintf(stderr, "  BrandID : %02x\n", c->x86_brand_id);
3668          abort();
3669    }
3670  
3671    /* Have CMOV support? */
3672 <  have_cmov = (c->x86_hwcap & (1 << 15)) && true;
3672 >  have_cmov = c->x86_hwcap & (1 << 15);
3673  
3674    /* Can the host CPU suffer from partial register stalls? */
3675    have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
# Line 2618 | Line 3692 | raw_init_cpu(void)
3692                          x86_processor_string_table[c->x86_processor]);
3693   }
3694  
3695 + static bool target_check_bsf(void)
3696 + {
3697 +        bool mismatch = false;
3698 +        for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3699 +        for (int g_CF = 0; g_CF <= 1; g_CF++) {
3700 +        for (int g_OF = 0; g_OF <= 1; g_OF++) {
3701 +        for (int g_SF = 0; g_SF <= 1; g_SF++) {
3702 +                for (int value = -1; value <= 1; value++) {
3703 +                        int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3704 +                        int tmp = value;
3705 +                        __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3706 +                                                                  : "+r" (flags), "+r" (tmp) : : "cc");
3707 +                        int OF = (flags >> 11) & 1;
3708 +                        int SF = (flags >>  7) & 1;
3709 +                        int ZF = (flags >>  6) & 1;
3710 +                        int CF = flags & 1;
3711 +                        tmp = (value == 0);
3712 +                        if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3713 +                                mismatch = true;
3714 +                }
3715 +        }}}}
3716 +        if (mismatch)
3717 +                write_log("Target CPU defines all flags on BSF instruction\n");
3718 +        return !mismatch;
3719 + }
3720 +
3721  
3722   /*************************************************************************
3723   * FPU stuff                                                             *

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines