ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.1
Committed: 2002-09-17T16:04:06Z (21 years, 9 months ago) by gbeauche
Branch: MAIN
Log Message:
Import JIT compiler

File Contents

# Content
1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
2 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
3 #endif
4
5 #define USE_MATCH 0
6
7 /* kludge for Brian, so he can compile under MSVC++ */
8 #define USE_NORMAL_CALLING_CONVENTION 0
9
10 #ifndef WIN32
11 #include <sys/types.h>
12 #include <sys/mman.h>
13 #endif
14
15 #include <stdlib.h>
16 #include <fcntl.h>
17 #include <errno.h>
18
19 #include "sysdeps.h"
20 #include "cpu_emulation.h"
21 #include "main.h"
22 #include "prefs.h"
23 #include "user_strings.h"
24
25 #include "m68k.h"
26 #include "memory.h"
27 #include "readcpu.h"
28 #include "newcpu.h"
29 #include "comptbl.h"
30 #include "compiler/compemu.h"
31 #include "fpu/fpu.h"
32 #include "fpu/flags.h"
33
34 #define DEBUG 1
35 #include "debug.h"
36
37 #ifdef ENABLE_MON
38 #include "mon.h"
39 #endif
40
41 #ifndef WIN32
42 #define PROFILE_COMPILE_TIME 1
43 #endif
44
45 #ifdef WIN32
46 #undef write_log
47 #define write_log dummy_write_log
48 static void dummy_write_log(const char *, ...) { }
49 #endif
50
51 #if JIT_DEBUG
52 #undef abort
53 #define abort() do { \
54 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
55 exit(EXIT_FAILURE); \
56 } while (0)
57 #endif
58
59 #if PROFILE_COMPILE_TIME
60 #include <time.h>
61 static uae_u32 compile_count = 0;
62 static clock_t compile_time = 0;
63 static clock_t emul_start_time = 0;
64 static clock_t emul_end_time = 0;
65 #endif
66
67 compop_func *compfunctbl[65536];
68 compop_func *nfcompfunctbl[65536];
69 cpuop_func *nfcpufunctbl[65536];
70 uae_u8* comp_pc_p;
71
72 // gb-- Extra data for Basilisk II/JIT
73 #if JIT_DEBUG
74 static bool JITDebug = false; // Enable runtime disassemblers through mon?
75 #else
76 const bool JITDebug = false; // Don't use JIT debug mode at all
77 #endif
78
79 const uae_u32 MIN_CACHE_SIZE = 2048; // Minimal translation cache size (2048 KB)
80 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
81 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed yet
82 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
83 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
84 static bool have_cmov = false; // target has CMOV instructions ?
85 static bool have_rat_stall = true; // target has partial register stalls ?
86 static int zero_fd = -1;
87 static int optcount[10] = {
88 10, // How often a block has to be executed before it is translated
89 0, // How often to use naive translation
90 0, 0, 0, 0,
91 -1, -1, -1, -1
92 };
93
94 struct op_properties {
95 uae_u8 use_flags;
96 uae_u8 set_flags;
97 uae_u8 is_addx;
98 uae_u8 cflow;
99 };
100 static op_properties prop[65536];
101
102 // gb-- Control Flow Predicates
103
104 static inline int end_block(uae_u32 opcode)
105 {
106 return (prop[opcode].cflow & fl_end_block);
107 }
108
109 static inline bool may_trap(uae_u32 opcode)
110 {
111 return (prop[opcode].cflow & fl_trap);
112 }
113
114 uae_u8* start_pc_p;
115 uae_u32 start_pc;
116 uae_u32 current_block_pc_p;
117 uae_u32 current_block_start_target;
118 uae_u32 needed_flags;
119 static uae_u32 next_pc_p;
120 static uae_u32 taken_pc_p;
121 static int branch_cc;
122 static int redo_current_block;
123
124 int segvcount=0;
125 int soft_flush_count=0;
126 int hard_flush_count=0;
127 int checksum_count=0;
128 static uae_u8* current_compile_p=NULL;
129 static uae_u8* max_compile_start;
130 static uae_u8* compiled_code=NULL;
131 static uae_s32 reg_alloc_run;
132
133 void* pushall_call_handler=NULL;
134 static void* popall_do_nothing=NULL;
135 static void* popall_exec_nostats=NULL;
136 static void* popall_execute_normal=NULL;
137 static void* popall_cache_miss=NULL;
138 static void* popall_recompile_block=NULL;
139 static void* popall_check_checksum=NULL;
140
141 extern uae_u32 oink;
142 extern unsigned long foink3;
143 extern unsigned long foink;
144
145 /* The 68k only ever executes from even addresses. So right now, we
146 * waste half the entries in this array
147 * UPDATE: We now use those entries to store the start of the linked
148 * lists that we maintain for each hash result.
149 */
150 cacheline cache_tags[TAGSIZE];
151 int letit=0;
152 blockinfo* hold_bi[MAX_HOLD_BI];
153 blockinfo* active;
154 blockinfo* dormant;
155
156 /* 68040 */
157 extern struct cputbl op_smalltbl_0_nf[];
158 extern struct comptbl op_smalltbl_0_comp_nf[];
159 extern struct comptbl op_smalltbl_0_comp_ff[];
160
161 /* 68020 + 68881 */
162 extern struct cputbl op_smalltbl_1_nf[];
163
164 /* 68020 */
165 extern struct cputbl op_smalltbl_2_nf[];
166
167 /* 68010 */
168 extern struct cputbl op_smalltbl_3_nf[];
169
170 /* 68000 */
171 extern struct cputbl op_smalltbl_4_nf[];
172
173 /* 68000 slow but compatible. */
174 extern struct cputbl op_smalltbl_5_nf[];
175
176 static void flush_icache_hard(int n);
177 static void flush_icache_lazy(int n);
178 static void flush_icache_none(int n);
179 void (*flush_icache)(int n) = flush_icache_none;
180
181
182
183 bigstate live;
184 smallstate empty_ss;
185 smallstate default_ss;
186 static int optlev;
187
188 static int writereg(int r, int size);
189 static void unlock2(int r);
190 static void setlock(int r);
191 static int readreg_specific(int r, int size, int spec);
192 static int writereg_specific(int r, int size, int spec);
193 static void prepare_for_call_1(void);
194 static void prepare_for_call_2(void);
195 static void align_target(uae_u32 a);
196
197 static uae_s32 nextused[VREGS];
198
199 uae_u32 m68k_pc_offset;
200
201 /* Some arithmetic ooperations can be optimized away if the operands
202 * are known to be constant. But that's only a good idea when the
203 * side effects they would have on the flags are not important. This
204 * variable indicates whether we need the side effects or not
205 */
206 uae_u32 needflags=0;
207
208 /* Flag handling is complicated.
209 *
210 * x86 instructions create flags, which quite often are exactly what we
211 * want. So at times, the "68k" flags are actually in the x86 flags.
212 *
213 * Then again, sometimes we do x86 instructions that clobber the x86
214 * flags, but don't represent a corresponding m68k instruction. In that
215 * case, we have to save them.
216 *
217 * We used to save them to the stack, but now store them back directly
218 * into the regflags.cznv of the traditional emulation. Thus some odd
219 * names.
220 *
221 * So flags can be in either of two places (used to be three; boy were
222 * things complicated back then!); And either place can contain either
223 * valid flags or invalid trash (and on the stack, there was also the
224 * option of "nothing at all", now gone). A couple of variables keep
225 * track of the respective states.
226 *
227 * To make things worse, we might or might not be interested in the flags.
228 * by default, we are, but a call to dont_care_flags can change that
229 * until the next call to live_flags. If we are not, pretty much whatever
230 * is in the register and/or the native flags is seen as valid.
231 */
232
233 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
234 {
235 return cache_tags[cl+1].bi;
236 }
237
238 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
239 {
240 blockinfo* bi=get_blockinfo(cacheline(addr));
241
242 while (bi) {
243 if (bi->pc_p==addr)
244 return bi;
245 bi=bi->next_same_cl;
246 }
247 return NULL;
248 }
249
250
251 /*******************************************************************
252 * All sorts of list related functions for all of the lists *
253 *******************************************************************/
254
255 static __inline__ void remove_from_cl_list(blockinfo* bi)
256 {
257 uae_u32 cl=cacheline(bi->pc_p);
258
259 if (bi->prev_same_cl_p)
260 *(bi->prev_same_cl_p)=bi->next_same_cl;
261 if (bi->next_same_cl)
262 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
263 if (cache_tags[cl+1].bi)
264 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
265 else
266 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
267 }
268
269 static __inline__ void remove_from_list(blockinfo* bi)
270 {
271 if (bi->prev_p)
272 *(bi->prev_p)=bi->next;
273 if (bi->next)
274 bi->next->prev_p=bi->prev_p;
275 }
276
277 static __inline__ void remove_from_lists(blockinfo* bi)
278 {
279 remove_from_list(bi);
280 remove_from_cl_list(bi);
281 }
282
283 static __inline__ void add_to_cl_list(blockinfo* bi)
284 {
285 uae_u32 cl=cacheline(bi->pc_p);
286
287 if (cache_tags[cl+1].bi)
288 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
289 bi->next_same_cl=cache_tags[cl+1].bi;
290
291 cache_tags[cl+1].bi=bi;
292 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
293
294 cache_tags[cl].handler=bi->handler_to_use;
295 }
296
297 static __inline__ void raise_in_cl_list(blockinfo* bi)
298 {
299 remove_from_cl_list(bi);
300 add_to_cl_list(bi);
301 }
302
303 static __inline__ void add_to_active(blockinfo* bi)
304 {
305 if (active)
306 active->prev_p=&(bi->next);
307 bi->next=active;
308
309 active=bi;
310 bi->prev_p=&active;
311 }
312
313 static __inline__ void add_to_dormant(blockinfo* bi)
314 {
315 if (dormant)
316 dormant->prev_p=&(bi->next);
317 bi->next=dormant;
318
319 dormant=bi;
320 bi->prev_p=&dormant;
321 }
322
323 static __inline__ void remove_dep(dependency* d)
324 {
325 if (d->prev_p)
326 *(d->prev_p)=d->next;
327 if (d->next)
328 d->next->prev_p=d->prev_p;
329 d->prev_p=NULL;
330 d->next=NULL;
331 }
332
333 /* This block's code is about to be thrown away, so it no longer
334 depends on anything else */
335 static __inline__ void remove_deps(blockinfo* bi)
336 {
337 remove_dep(&(bi->dep[0]));
338 remove_dep(&(bi->dep[1]));
339 }
340
341 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
342 {
343 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
344 }
345
346 /********************************************************************
347 * Soft flush handling support functions *
348 ********************************************************************/
349
350 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
351 {
352 //write_log("bi is %p\n",bi);
353 if (dh!=bi->direct_handler_to_use) {
354 dependency* x=bi->deplist;
355 //write_log("bi->deplist=%p\n",bi->deplist);
356 while (x) {
357 //write_log("x is %p\n",x);
358 //write_log("x->next is %p\n",x->next);
359 //write_log("x->prev_p is %p\n",x->prev_p);
360
361 if (x->jmp_off) {
362 adjust_jmpdep(x,dh);
363 }
364 x=x->next;
365 }
366 bi->direct_handler_to_use=dh;
367 }
368 }
369
370 static __inline__ void invalidate_block(blockinfo* bi)
371 {
372 int i;
373
374 bi->optlevel=0;
375 bi->count=optcount[0]-1;
376 bi->handler=NULL;
377 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
378 bi->direct_handler=NULL;
379 set_dhtu(bi,bi->direct_pen);
380 bi->needed_flags=0xff;
381 bi->status=BI_INVALID;
382 for (i=0;i<2;i++) {
383 bi->dep[i].jmp_off=NULL;
384 bi->dep[i].target=NULL;
385 }
386 remove_deps(bi);
387 }
388
389 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
390 {
391 blockinfo* tbi=get_blockinfo_addr((void*)target);
392
393 Dif(!tbi) {
394 write_log("Could not create jmpdep!\n");
395 abort();
396 }
397 bi->dep[i].jmp_off=jmpaddr;
398 bi->dep[i].source=bi;
399 bi->dep[i].target=tbi;
400 bi->dep[i].next=tbi->deplist;
401 if (bi->dep[i].next)
402 bi->dep[i].next->prev_p=&(bi->dep[i].next);
403 bi->dep[i].prev_p=&(tbi->deplist);
404 tbi->deplist=&(bi->dep[i]);
405 }
406
407 static __inline__ void block_need_recompile(blockinfo * bi)
408 {
409 uae_u32 cl = cacheline(bi->pc_p);
410
411 set_dhtu(bi, bi->direct_pen);
412 bi->direct_handler = bi->direct_pen;
413
414 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
415 bi->handler = (cpuop_func *)popall_execute_normal;
416 if (bi == cache_tags[cl + 1].bi)
417 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
418 bi->status = BI_NEED_RECOMP;
419 }
420
421 static __inline__ void mark_callers_recompile(blockinfo * bi)
422 {
423 dependency *x = bi->deplist;
424
425 while (x) {
426 dependency *next = x->next; /* This disappears when we mark for
427 * recompilation and thus remove the
428 * blocks from the lists */
429 if (x->jmp_off) {
430 blockinfo *cbi = x->source;
431
432 Dif(cbi->status == BI_INVALID) {
433 // write_log("invalid block in dependency list\n"); // FIXME?
434 // abort();
435 }
436 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
437 block_need_recompile(cbi);
438 mark_callers_recompile(cbi);
439 }
440 else if (cbi->status == BI_COMPILING) {
441 redo_current_block = 1;
442 }
443 else if (cbi->status == BI_NEED_RECOMP) {
444 /* nothing */
445 }
446 else {
447 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
448 }
449 }
450 x = next;
451 }
452 }
453
454 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
455 {
456 blockinfo* bi=get_blockinfo_addr(addr);
457 int i;
458
459 if (!bi) {
460 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
461 if (hold_bi[i]) {
462 uae_u32 cl=cacheline(addr);
463
464 bi=hold_bi[i];
465 hold_bi[i]=NULL;
466 bi->pc_p=(uae_u8 *)addr;
467 invalidate_block(bi);
468 add_to_active(bi);
469 add_to_cl_list(bi);
470
471 }
472 }
473 }
474 if (!bi) {
475 write_log("Looking for blockinfo, can't find free one\n");
476 abort();
477 }
478 return bi;
479 }
480
481 static void prepare_block(blockinfo* bi);
482
483 /* Managment of blockinfos.
484
485 A blockinfo struct is allocated whenever a new block has to be
486 compiled. If the list of free blockinfos is empty, we allocate a new
487 pool of blockinfos and link the newly created blockinfos altogether
488 into the list of free blockinfos. Otherwise, we simply pop a structure
489 of the free list.
490
491 Blockinfo are lazily deallocated, i.e. chained altogether in the
492 list of free blockinfos whenvever a translation cache flush (hard or
493 soft) request occurs.
494 */
495
496 #if USE_SEPARATE_BIA
497 const int BLOCKINFO_POOL_SIZE = 128;
498 struct blockinfo_pool {
499 blockinfo bi[BLOCKINFO_POOL_SIZE];
500 blockinfo_pool *next;
501 };
502 static blockinfo_pool * blockinfo_pools = 0;
503 static blockinfo * free_blockinfos = 0;
504 #endif
505
506 static __inline__ blockinfo *alloc_blockinfo(void)
507 {
508 #if USE_SEPARATE_BIA
509 if (!free_blockinfos) {
510 // There is no blockinfo struct left, allocate a new
511 // pool and link the chunks into the free list
512 blockinfo_pool *bi_pool = (blockinfo_pool *)malloc(sizeof(blockinfo_pool));
513 for (blockinfo *bi = &bi_pool->bi[0]; bi < &bi_pool->bi[BLOCKINFO_POOL_SIZE]; bi++) {
514 bi->next = free_blockinfos;
515 free_blockinfos = bi;
516 }
517 bi_pool->next = blockinfo_pools;
518 blockinfo_pools = bi_pool;
519 }
520 blockinfo *bi = free_blockinfos;
521 free_blockinfos = bi->next;
522 #else
523 blockinfo *bi = (blockinfo*)current_compile_p;
524 current_compile_p += sizeof(blockinfo);
525 #endif
526 return bi;
527 }
528
529 static __inline__ void free_blockinfo(blockinfo *bi)
530 {
531 #if USE_SEPARATE_BIA
532 bi->next = free_blockinfos;
533 free_blockinfos = bi;
534 #endif
535 }
536
537 static void free_blockinfo_pools(void)
538 {
539 #if USE_SEPARATE_BIA
540 int blockinfo_pool_count = 0;
541 blockinfo_pool *curr_pool = blockinfo_pools;
542 while (curr_pool) {
543 blockinfo_pool_count++;
544 blockinfo_pool *dead_pool = curr_pool;
545 curr_pool = curr_pool->next;
546 free(dead_pool);
547 }
548
549 uae_u32 blockinfo_pools_size = blockinfo_pool_count * BLOCKINFO_POOL_SIZE * sizeof(blockinfo);
550 write_log("### Blockinfo allocation statistics\n");
551 write_log("Number of blockinfo pools : %d\n", blockinfo_pool_count);
552 write_log("Total number of blockinfos : %d (%d KB)\n",
553 blockinfo_pool_count * BLOCKINFO_POOL_SIZE,
554 blockinfo_pools_size / 1024);
555 write_log("\n");
556 #endif
557 }
558
559 static __inline__ void alloc_blockinfos(void)
560 {
561 int i;
562 blockinfo* bi;
563
564 for (i=0;i<MAX_HOLD_BI;i++) {
565 if (hold_bi[i])
566 return;
567 bi=hold_bi[i]=alloc_blockinfo();
568 prepare_block(bi);
569 }
570 }
571
572 /********************************************************************
573 * Functions to emit data into memory, and other general support *
574 ********************************************************************/
575
576 static uae_u8* target;
577
578 static void emit_init(void)
579 {
580 }
581
582 static __inline__ void emit_byte(uae_u8 x)
583 {
584 *target++=x;
585 }
586
587 static __inline__ void emit_word(uae_u16 x)
588 {
589 *((uae_u16*)target)=x;
590 target+=2;
591 }
592
593 static __inline__ void emit_long(uae_u32 x)
594 {
595 *((uae_u32*)target)=x;
596 target+=4;
597 }
598
599 static __inline__ uae_u32 reverse32(uae_u32 v)
600 {
601 #if 1
602 // gb-- We have specialized byteswapping functions, just use them
603 return do_byteswap_32(v);
604 #else
605 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
606 #endif
607 }
608
609 /********************************************************************
610 * Getting the information about the target CPU *
611 ********************************************************************/
612
613 #include "codegen_x86.cpp"
614
615 void set_target(uae_u8* t)
616 {
617 target=t;
618 }
619
620 static __inline__ uae_u8* get_target_noopt(void)
621 {
622 return target;
623 }
624
625 __inline__ uae_u8* get_target(void)
626 {
627 return get_target_noopt();
628 }
629
630
631 /********************************************************************
632 * Flags status handling. EMIT TIME! *
633 ********************************************************************/
634
635 static void bt_l_ri_noclobber(R4 r, IMM i);
636
637 static void make_flags_live_internal(void)
638 {
639 if (live.flags_in_flags==VALID)
640 return;
641 Dif (live.flags_on_stack==TRASH) {
642 write_log("Want flags, got something on stack, but it is TRASH\n");
643 abort();
644 }
645 if (live.flags_on_stack==VALID) {
646 int tmp;
647 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
648 raw_reg_to_flags(tmp);
649 unlock2(tmp);
650
651 live.flags_in_flags=VALID;
652 return;
653 }
654 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
655 live.flags_in_flags,live.flags_on_stack);
656 abort();
657 }
658
659 static void flags_to_stack(void)
660 {
661 if (live.flags_on_stack==VALID)
662 return;
663 if (!live.flags_are_important) {
664 live.flags_on_stack=VALID;
665 return;
666 }
667 Dif (live.flags_in_flags!=VALID)
668 abort();
669 else {
670 int tmp;
671 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
672 raw_flags_to_reg(tmp);
673 unlock2(tmp);
674 }
675 live.flags_on_stack=VALID;
676 }
677
678 static __inline__ void clobber_flags(void)
679 {
680 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
681 flags_to_stack();
682 live.flags_in_flags=TRASH;
683 }
684
685 /* Prepare for leaving the compiled stuff */
686 static __inline__ void flush_flags(void)
687 {
688 flags_to_stack();
689 return;
690 }
691
692 int touchcnt;
693
694 /********************************************************************
695 * register allocation per block logging *
696 ********************************************************************/
697
698 static uae_s8 vstate[VREGS];
699 static uae_s8 vwritten[VREGS];
700 static uae_s8 nstate[N_REGS];
701
702 #define L_UNKNOWN -127
703 #define L_UNAVAIL -1
704 #define L_NEEDED -2
705 #define L_UNNEEDED -3
706
707 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
708 {
709 int i;
710
711 for (i = 0; i < VREGS; i++)
712 s->virt[i] = vstate[i];
713 for (i = 0; i < N_REGS; i++)
714 s->nat[i] = nstate[i];
715 }
716
717 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
718 {
719 int i;
720 int reverse = 0;
721
722 for (i = 0; i < VREGS; i++) {
723 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
724 return 1;
725 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
726 reverse++;
727 }
728 for (i = 0; i < N_REGS; i++) {
729 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
730 return 1;
731 if (nstate[i] < 0 && s->nat[i] >= 0)
732 reverse++;
733 }
734 if (reverse >= 2 && USE_MATCH)
735 return 1; /* In this case, it might be worth recompiling the
736 * callers */
737 return 0;
738 }
739
740 static __inline__ void log_startblock(void)
741 {
742 int i;
743
744 for (i = 0; i < VREGS; i++) {
745 vstate[i] = L_UNKNOWN;
746 vwritten[i] = 0;
747 }
748 for (i = 0; i < N_REGS; i++)
749 nstate[i] = L_UNKNOWN;
750 }
751
752 /* Using an n-reg for a temp variable */
753 static __inline__ void log_isused(int n)
754 {
755 if (nstate[n] == L_UNKNOWN)
756 nstate[n] = L_UNAVAIL;
757 }
758
759 static __inline__ void log_visused(int r)
760 {
761 if (vstate[r] == L_UNKNOWN)
762 vstate[r] = L_NEEDED;
763 }
764
765 static __inline__ void do_load_reg(int n, int r)
766 {
767 if (r == FLAGTMP)
768 raw_load_flagreg(n, r);
769 else if (r == FLAGX)
770 raw_load_flagx(n, r);
771 else
772 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
773 }
774
775 static __inline__ void check_load_reg(int n, int r)
776 {
777 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
778 }
779
780 static __inline__ void log_vwrite(int r)
781 {
782 vwritten[r] = 1;
783 }
784
785 /* Using an n-reg to hold a v-reg */
786 static __inline__ void log_isreg(int n, int r)
787 {
788 static int count = 0;
789
790 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
791 nstate[n] = r;
792 else {
793 do_load_reg(n, r);
794 if (nstate[n] == L_UNKNOWN)
795 nstate[n] = L_UNAVAIL;
796 }
797 if (vstate[r] == L_UNKNOWN)
798 vstate[r] = L_NEEDED;
799 }
800
801 static __inline__ void log_clobberreg(int r)
802 {
803 if (vstate[r] == L_UNKNOWN)
804 vstate[r] = L_UNNEEDED;
805 }
806
807 /* This ends all possibility of clever register allocation */
808
809 static __inline__ void log_flush(void)
810 {
811 int i;
812
813 for (i = 0; i < VREGS; i++)
814 if (vstate[i] == L_UNKNOWN)
815 vstate[i] = L_NEEDED;
816 for (i = 0; i < N_REGS; i++)
817 if (nstate[i] == L_UNKNOWN)
818 nstate[i] = L_UNAVAIL;
819 }
820
821 static __inline__ void log_dump(void)
822 {
823 int i;
824
825 return;
826
827 write_log("----------------------\n");
828 for (i = 0; i < N_REGS; i++) {
829 switch (nstate[i]) {
830 case L_UNKNOWN:
831 write_log("Nat %d : UNKNOWN\n", i);
832 break;
833 case L_UNAVAIL:
834 write_log("Nat %d : UNAVAIL\n", i);
835 break;
836 default:
837 write_log("Nat %d : %d\n", i, nstate[i]);
838 break;
839 }
840 }
841 for (i = 0; i < VREGS; i++) {
842 if (vstate[i] == L_UNNEEDED)
843 write_log("Virt %d: UNNEEDED\n", i);
844 }
845 }
846
847 /********************************************************************
848 * register status handling. EMIT TIME! *
849 ********************************************************************/
850
851 static __inline__ void set_status(int r, int status)
852 {
853 if (status == ISCONST)
854 log_clobberreg(r);
855 live.state[r].status=status;
856 }
857
858 static __inline__ int isinreg(int r)
859 {
860 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
861 }
862
863 static __inline__ void adjust_nreg(int r, uae_u32 val)
864 {
865 if (!val)
866 return;
867 raw_lea_l_brr(r,r,val);
868 }
869
870 static void tomem(int r)
871 {
872 int rr=live.state[r].realreg;
873
874 if (isinreg(r)) {
875 if (live.state[r].val && live.nat[rr].nholds==1
876 && !live.nat[rr].locked) {
877 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
878 // live.state[r].val,r,rr,target);
879 adjust_nreg(rr,live.state[r].val);
880 live.state[r].val=0;
881 live.state[r].dirtysize=4;
882 set_status(r,DIRTY);
883 }
884 }
885
886 if (live.state[r].status==DIRTY) {
887 switch (live.state[r].dirtysize) {
888 case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
889 case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
890 case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
891 default: abort();
892 }
893 log_vwrite(r);
894 set_status(r,CLEAN);
895 live.state[r].dirtysize=0;
896 }
897 }
898
899 static __inline__ int isconst(int r)
900 {
901 return live.state[r].status==ISCONST;
902 }
903
904 int is_const(int r)
905 {
906 return isconst(r);
907 }
908
909 static __inline__ void writeback_const(int r)
910 {
911 if (!isconst(r))
912 return;
913 Dif (live.state[r].needflush==NF_HANDLER) {
914 write_log("Trying to write back constant NF_HANDLER!\n");
915 abort();
916 }
917
918 raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
919 log_vwrite(r);
920 live.state[r].val=0;
921 set_status(r,INMEM);
922 }
923
924 static __inline__ void tomem_c(int r)
925 {
926 if (isconst(r)) {
927 writeback_const(r);
928 }
929 else
930 tomem(r);
931 }
932
933 static void evict(int r)
934 {
935 int rr;
936
937 if (!isinreg(r))
938 return;
939 tomem(r);
940 rr=live.state[r].realreg;
941
942 Dif (live.nat[rr].locked &&
943 live.nat[rr].nholds==1) {
944 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
945 abort();
946 }
947
948 live.nat[rr].nholds--;
949 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
950 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
951 int thisind=live.state[r].realind;
952
953 live.nat[rr].holds[thisind]=topreg;
954 live.state[topreg].realind=thisind;
955 }
956 live.state[r].realreg=-1;
957 set_status(r,INMEM);
958 }
959
960 static __inline__ void free_nreg(int r)
961 {
962 int i=live.nat[r].nholds;
963
964 while (i) {
965 int vr;
966
967 --i;
968 vr=live.nat[r].holds[i];
969 evict(vr);
970 }
971 Dif (live.nat[r].nholds!=0) {
972 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
973 abort();
974 }
975 }
976
977 /* Use with care! */
978 static __inline__ void isclean(int r)
979 {
980 if (!isinreg(r))
981 return;
982 live.state[r].validsize=4;
983 live.state[r].dirtysize=0;
984 live.state[r].val=0;
985 set_status(r,CLEAN);
986 }
987
988 static __inline__ void disassociate(int r)
989 {
990 isclean(r);
991 evict(r);
992 }
993
994 static __inline__ void set_const(int r, uae_u32 val)
995 {
996 disassociate(r);
997 live.state[r].val=val;
998 set_status(r,ISCONST);
999 }
1000
1001 static __inline__ uae_u32 get_offset(int r)
1002 {
1003 return live.state[r].val;
1004 }
1005
1006 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1007 {
1008 int bestreg;
1009 uae_s32 when;
1010 int i;
1011 uae_s32 badness=0; /* to shut up gcc */
1012 bestreg=-1;
1013 when=2000000000;
1014
1015 for (i=N_REGS;i--;) {
1016 badness=live.nat[i].touched;
1017 if (live.nat[i].nholds==0)
1018 badness=0;
1019 if (i==hint)
1020 badness-=200000000;
1021 if (!live.nat[i].locked && badness<when) {
1022 if ((size==1 && live.nat[i].canbyte) ||
1023 (size==2 && live.nat[i].canword) ||
1024 (size==4)) {
1025 bestreg=i;
1026 when=badness;
1027 if (live.nat[i].nholds==0 && hint<0)
1028 break;
1029 if (i==hint)
1030 break;
1031 }
1032 }
1033 }
1034 Dif (bestreg==-1)
1035 abort();
1036
1037 if (live.nat[bestreg].nholds>0) {
1038 free_nreg(bestreg);
1039 }
1040 if (isinreg(r)) {
1041 int rr=live.state[r].realreg;
1042 /* This will happen if we read a partially dirty register at a
1043 bigger size */
1044 Dif (willclobber || live.state[r].validsize>=size)
1045 abort();
1046 Dif (live.nat[rr].nholds!=1)
1047 abort();
1048 if (size==4 && live.state[r].validsize==2) {
1049 log_isused(bestreg);
1050 log_visused(r);
1051 raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
1052 raw_bswap_32(bestreg);
1053 raw_zero_extend_16_rr(rr,rr);
1054 raw_zero_extend_16_rr(bestreg,bestreg);
1055 raw_bswap_32(bestreg);
1056 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1057 live.state[r].validsize=4;
1058 live.nat[rr].touched=touchcnt++;
1059 return rr;
1060 }
1061 if (live.state[r].validsize==1) {
1062 /* Nothing yet */
1063 }
1064 evict(r);
1065 }
1066
1067 if (!willclobber) {
1068 if (live.state[r].status!=UNDEF) {
1069 if (isconst(r)) {
1070 raw_mov_l_ri(bestreg,live.state[r].val);
1071 live.state[r].val=0;
1072 live.state[r].dirtysize=4;
1073 set_status(r,DIRTY);
1074 log_isused(bestreg);
1075 }
1076 else {
1077 log_isreg(bestreg, r); /* This will also load it! */
1078 live.state[r].dirtysize=0;
1079 set_status(r,CLEAN);
1080 }
1081 }
1082 else {
1083 live.state[r].val=0;
1084 live.state[r].dirtysize=0;
1085 set_status(r,CLEAN);
1086 log_isused(bestreg);
1087 }
1088 live.state[r].validsize=4;
1089 }
1090 else { /* this is the easiest way, but not optimal. FIXME! */
1091 /* Now it's trickier, but hopefully still OK */
1092 if (!isconst(r) || size==4) {
1093 live.state[r].validsize=size;
1094 live.state[r].dirtysize=size;
1095 live.state[r].val=0;
1096 set_status(r,DIRTY);
1097 if (size == 4) {
1098 log_clobberreg(r);
1099 log_isused(bestreg);
1100 }
1101 else {
1102 log_visused(r);
1103 log_isused(bestreg);
1104 }
1105 }
1106 else {
1107 if (live.state[r].status!=UNDEF)
1108 raw_mov_l_ri(bestreg,live.state[r].val);
1109 live.state[r].val=0;
1110 live.state[r].validsize=4;
1111 live.state[r].dirtysize=4;
1112 set_status(r,DIRTY);
1113 log_isused(bestreg);
1114 }
1115 }
1116 live.state[r].realreg=bestreg;
1117 live.state[r].realind=live.nat[bestreg].nholds;
1118 live.nat[bestreg].touched=touchcnt++;
1119 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1120 live.nat[bestreg].nholds++;
1121
1122 return bestreg;
1123 }
1124
1125 static int alloc_reg(int r, int size, int willclobber)
1126 {
1127 return alloc_reg_hinted(r,size,willclobber,-1);
1128 }
1129
1130 static void unlock2(int r)
1131 {
1132 Dif (!live.nat[r].locked)
1133 abort();
1134 live.nat[r].locked--;
1135 }
1136
1137 static void setlock(int r)
1138 {
1139 live.nat[r].locked++;
1140 }
1141
1142
1143 static void mov_nregs(int d, int s)
1144 {
1145 int ns=live.nat[s].nholds;
1146 int nd=live.nat[d].nholds;
1147 int i;
1148
1149 if (s==d)
1150 return;
1151
1152 if (nd>0)
1153 free_nreg(d);
1154
1155 log_isused(d);
1156 raw_mov_l_rr(d,s);
1157
1158 for (i=0;i<live.nat[s].nholds;i++) {
1159 int vs=live.nat[s].holds[i];
1160
1161 live.state[vs].realreg=d;
1162 live.state[vs].realind=i;
1163 live.nat[d].holds[i]=vs;
1164 }
1165 live.nat[d].nholds=live.nat[s].nholds;
1166
1167 live.nat[s].nholds=0;
1168 }
1169
1170
1171 static __inline__ void make_exclusive(int r, int size, int spec)
1172 {
1173 int clobber;
1174 reg_status oldstate;
1175 int rr=live.state[r].realreg;
1176 int nr;
1177 int nind;
1178 int ndirt=0;
1179 int i;
1180
1181 if (!isinreg(r))
1182 return;
1183 if (live.nat[rr].nholds==1)
1184 return;
1185 for (i=0;i<live.nat[rr].nholds;i++) {
1186 int vr=live.nat[rr].holds[i];
1187 if (vr!=r &&
1188 (live.state[vr].status==DIRTY || live.state[vr].val))
1189 ndirt++;
1190 }
1191 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1192 /* Everything else is clean, so let's keep this register */
1193 for (i=0;i<live.nat[rr].nholds;i++) {
1194 int vr=live.nat[rr].holds[i];
1195 if (vr!=r) {
1196 evict(vr);
1197 i--; /* Try that index again! */
1198 }
1199 }
1200 Dif (live.nat[rr].nholds!=1) {
1201 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1202 rr,live.nat[rr].nholds,r);
1203 abort();
1204 }
1205 return;
1206 }
1207
1208 /* We have to split the register */
1209 oldstate=live.state[r];
1210
1211 setlock(rr); /* Make sure this doesn't go away */
1212 /* Forget about r being in the register rr */
1213 disassociate(r);
1214 /* Get a new register, that we will clobber completely */
1215 if (oldstate.status==DIRTY) {
1216 /* If dirtysize is <4, we need a register that can handle the
1217 eventual smaller memory store! Thanks to Quake68k for exposing
1218 this detail ;-) */
1219 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1220 }
1221 else {
1222 nr=alloc_reg_hinted(r,4,1,spec);
1223 }
1224 nind=live.state[r].realind;
1225 live.state[r]=oldstate; /* Keep all the old state info */
1226 live.state[r].realreg=nr;
1227 live.state[r].realind=nind;
1228
1229 if (size<live.state[r].validsize) {
1230 if (live.state[r].val) {
1231 /* Might as well compensate for the offset now */
1232 raw_lea_l_brr(nr,rr,oldstate.val);
1233 live.state[r].val=0;
1234 live.state[r].dirtysize=4;
1235 set_status(r,DIRTY);
1236 }
1237 else
1238 raw_mov_l_rr(nr,rr); /* Make another copy */
1239 }
1240 unlock2(rr);
1241 }
1242
1243 static __inline__ void add_offset(int r, uae_u32 off)
1244 {
1245 live.state[r].val+=off;
1246 }
1247
1248 static __inline__ void remove_offset(int r, int spec)
1249 {
1250 reg_status oldstate;
1251 int rr;
1252
1253 if (isconst(r))
1254 return;
1255 if (live.state[r].val==0)
1256 return;
1257 if (isinreg(r) && live.state[r].validsize<4)
1258 evict(r);
1259
1260 if (!isinreg(r))
1261 alloc_reg_hinted(r,4,0,spec);
1262
1263 Dif (live.state[r].validsize!=4) {
1264 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1265 abort();
1266 }
1267 make_exclusive(r,0,-1);
1268 /* make_exclusive might have done the job already */
1269 if (live.state[r].val==0)
1270 return;
1271
1272 rr=live.state[r].realreg;
1273
1274 if (live.nat[rr].nholds==1) {
1275 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1276 // live.state[r].val,r,rr,target);
1277 adjust_nreg(rr,live.state[r].val);
1278 live.state[r].dirtysize=4;
1279 live.state[r].val=0;
1280 set_status(r,DIRTY);
1281 return;
1282 }
1283 write_log("Failed in remove_offset\n");
1284 abort();
1285 }
1286
1287 static __inline__ void remove_all_offsets(void)
1288 {
1289 int i;
1290
1291 for (i=0;i<VREGS;i++)
1292 remove_offset(i,-1);
1293 }
1294
1295 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1296 {
1297 int n;
1298 int answer=-1;
1299
1300 if (live.state[r].status==UNDEF) {
1301 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1302 }
1303 if (!can_offset)
1304 remove_offset(r,spec);
1305
1306 if (isinreg(r) && live.state[r].validsize>=size) {
1307 n=live.state[r].realreg;
1308 switch(size) {
1309 case 1:
1310 if (live.nat[n].canbyte || spec>=0) {
1311 answer=n;
1312 }
1313 break;
1314 case 2:
1315 if (live.nat[n].canword || spec>=0) {
1316 answer=n;
1317 }
1318 break;
1319 case 4:
1320 answer=n;
1321 break;
1322 default: abort();
1323 }
1324 if (answer<0)
1325 evict(r);
1326 }
1327 /* either the value was in memory to start with, or it was evicted and
1328 is in memory now */
1329 if (answer<0) {
1330 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1331 }
1332
1333 if (spec>=0 && spec!=answer) {
1334 /* Too bad */
1335 mov_nregs(spec,answer);
1336 answer=spec;
1337 }
1338 live.nat[answer].locked++;
1339 live.nat[answer].touched=touchcnt++;
1340 return answer;
1341 }
1342
1343
1344
1345 static int readreg(int r, int size)
1346 {
1347 return readreg_general(r,size,-1,0);
1348 }
1349
1350 static int readreg_specific(int r, int size, int spec)
1351 {
1352 return readreg_general(r,size,spec,0);
1353 }
1354
1355 static int readreg_offset(int r, int size)
1356 {
1357 return readreg_general(r,size,-1,1);
1358 }
1359
1360 /* writereg_general(r, size, spec)
1361 *
1362 * INPUT
1363 * - r : mid-layer register
1364 * - size : requested size (1/2/4)
1365 * - spec : -1 if find or make a register free, otherwise specifies
1366 * the physical register to use in any case
1367 *
1368 * OUTPUT
1369 * - hard (physical, x86 here) register allocated to virtual register r
1370 */
1371 static __inline__ int writereg_general(int r, int size, int spec)
1372 {
1373 int n;
1374 int answer=-1;
1375
1376 if (size<4) {
1377 remove_offset(r,spec);
1378 }
1379
1380 make_exclusive(r,size,spec);
1381 if (isinreg(r)) {
1382 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1383 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1384 n=live.state[r].realreg;
1385
1386 Dif (live.nat[n].nholds!=1)
1387 abort();
1388 switch(size) {
1389 case 1:
1390 if (live.nat[n].canbyte || spec>=0) {
1391 live.state[r].dirtysize=ndsize;
1392 live.state[r].validsize=nvsize;
1393 answer=n;
1394 }
1395 break;
1396 case 2:
1397 if (live.nat[n].canword || spec>=0) {
1398 live.state[r].dirtysize=ndsize;
1399 live.state[r].validsize=nvsize;
1400 answer=n;
1401 }
1402 break;
1403 case 4:
1404 live.state[r].dirtysize=ndsize;
1405 live.state[r].validsize=nvsize;
1406 answer=n;
1407 break;
1408 default: abort();
1409 }
1410 if (answer<0)
1411 evict(r);
1412 }
1413 /* either the value was in memory to start with, or it was evicted and
1414 is in memory now */
1415 if (answer<0) {
1416 answer=alloc_reg_hinted(r,size,1,spec);
1417 }
1418 if (spec>=0 && spec!=answer) {
1419 mov_nregs(spec,answer);
1420 answer=spec;
1421 }
1422 if (live.state[r].status==UNDEF)
1423 live.state[r].validsize=4;
1424 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1425 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1426
1427 live.nat[answer].locked++;
1428 live.nat[answer].touched=touchcnt++;
1429 if (size==4) {
1430 live.state[r].val=0;
1431 }
1432 else {
1433 Dif (live.state[r].val) {
1434 write_log("Problem with val\n");
1435 abort();
1436 }
1437 }
1438 set_status(r,DIRTY);
1439 return answer;
1440 }
1441
1442 static int writereg(int r, int size)
1443 {
1444 return writereg_general(r,size,-1);
1445 }
1446
1447 static int writereg_specific(int r, int size, int spec)
1448 {
1449 return writereg_general(r,size,spec);
1450 }
1451
1452 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1453 {
1454 int n;
1455 int answer=-1;
1456
1457 if (live.state[r].status==UNDEF) {
1458 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1459 }
1460 remove_offset(r,spec);
1461 make_exclusive(r,0,spec);
1462
1463 Dif (wsize<rsize) {
1464 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1465 abort();
1466 }
1467 if (isinreg(r) && live.state[r].validsize>=rsize) {
1468 n=live.state[r].realreg;
1469 Dif (live.nat[n].nholds!=1)
1470 abort();
1471
1472 switch(rsize) {
1473 case 1:
1474 if (live.nat[n].canbyte || spec>=0) {
1475 answer=n;
1476 }
1477 break;
1478 case 2:
1479 if (live.nat[n].canword || spec>=0) {
1480 answer=n;
1481 }
1482 break;
1483 case 4:
1484 answer=n;
1485 break;
1486 default: abort();
1487 }
1488 if (answer<0)
1489 evict(r);
1490 }
1491 /* either the value was in memory to start with, or it was evicted and
1492 is in memory now */
1493 if (answer<0) {
1494 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1495 }
1496
1497 if (spec>=0 && spec!=answer) {
1498 /* Too bad */
1499 mov_nregs(spec,answer);
1500 answer=spec;
1501 }
1502 if (wsize>live.state[r].dirtysize)
1503 live.state[r].dirtysize=wsize;
1504 if (wsize>live.state[r].validsize)
1505 live.state[r].validsize=wsize;
1506 set_status(r,DIRTY);
1507
1508 live.nat[answer].locked++;
1509 live.nat[answer].touched=touchcnt++;
1510
1511 Dif (live.state[r].val) {
1512 write_log("Problem with val(rmw)\n");
1513 abort();
1514 }
1515 return answer;
1516 }
1517
1518 static int rmw(int r, int wsize, int rsize)
1519 {
1520 return rmw_general(r,wsize,rsize,-1);
1521 }
1522
1523 static int rmw_specific(int r, int wsize, int rsize, int spec)
1524 {
1525 return rmw_general(r,wsize,rsize,spec);
1526 }
1527
1528
1529 /* needed for restoring the carry flag on non-P6 cores */
1530 static void bt_l_ri_noclobber(R4 r, IMM i)
1531 {
1532 int size=4;
1533 if (i<16)
1534 size=2;
1535 r=readreg(r,size);
1536 raw_bt_l_ri(r,i);
1537 unlock2(r);
1538 }
1539
1540 /********************************************************************
1541 * FPU register status handling. EMIT TIME! *
1542 ********************************************************************/
1543
1544 static void f_tomem(int r)
1545 {
1546 if (live.fate[r].status==DIRTY) {
1547 #if USE_LONG_DOUBLE
1548 raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1549 #else
1550 raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1551 #endif
1552 live.fate[r].status=CLEAN;
1553 }
1554 }
1555
1556 static void f_tomem_drop(int r)
1557 {
1558 if (live.fate[r].status==DIRTY) {
1559 #if USE_LONG_DOUBLE
1560 raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1561 #else
1562 raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1563 #endif
1564 live.fate[r].status=INMEM;
1565 }
1566 }
1567
1568
1569 static __inline__ int f_isinreg(int r)
1570 {
1571 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1572 }
1573
1574 static void f_evict(int r)
1575 {
1576 int rr;
1577
1578 if (!f_isinreg(r))
1579 return;
1580 rr=live.fate[r].realreg;
1581 if (live.fat[rr].nholds==1)
1582 f_tomem_drop(r);
1583 else
1584 f_tomem(r);
1585
1586 Dif (live.fat[rr].locked &&
1587 live.fat[rr].nholds==1) {
1588 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1589 abort();
1590 }
1591
1592 live.fat[rr].nholds--;
1593 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1594 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1595 int thisind=live.fate[r].realind;
1596 live.fat[rr].holds[thisind]=topreg;
1597 live.fate[topreg].realind=thisind;
1598 }
1599 live.fate[r].status=INMEM;
1600 live.fate[r].realreg=-1;
1601 }
1602
1603 static __inline__ void f_free_nreg(int r)
1604 {
1605 int i=live.fat[r].nholds;
1606
1607 while (i) {
1608 int vr;
1609
1610 --i;
1611 vr=live.fat[r].holds[i];
1612 f_evict(vr);
1613 }
1614 Dif (live.fat[r].nholds!=0) {
1615 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1616 abort();
1617 }
1618 }
1619
1620
1621 /* Use with care! */
1622 static __inline__ void f_isclean(int r)
1623 {
1624 if (!f_isinreg(r))
1625 return;
1626 live.fate[r].status=CLEAN;
1627 }
1628
1629 static __inline__ void f_disassociate(int r)
1630 {
1631 f_isclean(r);
1632 f_evict(r);
1633 }
1634
1635
1636
1637 static int f_alloc_reg(int r, int willclobber)
1638 {
1639 int bestreg;
1640 uae_s32 when;
1641 int i;
1642 uae_s32 badness;
1643 bestreg=-1;
1644 when=2000000000;
1645 for (i=N_FREGS;i--;) {
1646 badness=live.fat[i].touched;
1647 if (live.fat[i].nholds==0)
1648 badness=0;
1649
1650 if (!live.fat[i].locked && badness<when) {
1651 bestreg=i;
1652 when=badness;
1653 if (live.fat[i].nholds==0)
1654 break;
1655 }
1656 }
1657 Dif (bestreg==-1)
1658 abort();
1659
1660 if (live.fat[bestreg].nholds>0) {
1661 f_free_nreg(bestreg);
1662 }
1663 if (f_isinreg(r)) {
1664 f_evict(r);
1665 }
1666
1667 if (!willclobber) {
1668 if (live.fate[r].status!=UNDEF) {
1669 #if USE_LONG_DOUBLE
1670 raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
1671 #else
1672 raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
1673 #endif
1674 }
1675 live.fate[r].status=CLEAN;
1676 }
1677 else {
1678 live.fate[r].status=DIRTY;
1679 }
1680 live.fate[r].realreg=bestreg;
1681 live.fate[r].realind=live.fat[bestreg].nholds;
1682 live.fat[bestreg].touched=touchcnt++;
1683 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
1684 live.fat[bestreg].nholds++;
1685
1686 return bestreg;
1687 }
1688
1689 static void f_unlock(int r)
1690 {
1691 Dif (!live.fat[r].locked)
1692 abort();
1693 live.fat[r].locked--;
1694 }
1695
1696 static void f_setlock(int r)
1697 {
1698 live.fat[r].locked++;
1699 }
1700
1701 static __inline__ int f_readreg(int r)
1702 {
1703 int n;
1704 int answer=-1;
1705
1706 if (f_isinreg(r)) {
1707 n=live.fate[r].realreg;
1708 answer=n;
1709 }
1710 /* either the value was in memory to start with, or it was evicted and
1711 is in memory now */
1712 if (answer<0)
1713 answer=f_alloc_reg(r,0);
1714
1715 live.fat[answer].locked++;
1716 live.fat[answer].touched=touchcnt++;
1717 return answer;
1718 }
1719
1720 static __inline__ void f_make_exclusive(int r, int clobber)
1721 {
1722 freg_status oldstate;
1723 int rr=live.fate[r].realreg;
1724 int nr;
1725 int nind;
1726 int ndirt=0;
1727 int i;
1728
1729 if (!f_isinreg(r))
1730 return;
1731 if (live.fat[rr].nholds==1)
1732 return;
1733 for (i=0;i<live.fat[rr].nholds;i++) {
1734 int vr=live.fat[rr].holds[i];
1735 if (vr!=r && live.fate[vr].status==DIRTY)
1736 ndirt++;
1737 }
1738 if (!ndirt && !live.fat[rr].locked) {
1739 /* Everything else is clean, so let's keep this register */
1740 for (i=0;i<live.fat[rr].nholds;i++) {
1741 int vr=live.fat[rr].holds[i];
1742 if (vr!=r) {
1743 f_evict(vr);
1744 i--; /* Try that index again! */
1745 }
1746 }
1747 Dif (live.fat[rr].nholds!=1) {
1748 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
1749 for (i=0;i<live.fat[rr].nholds;i++) {
1750 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
1751 live.fate[live.fat[rr].holds[i]].realreg,
1752 live.fate[live.fat[rr].holds[i]].realind);
1753 }
1754 write_log("\n");
1755 abort();
1756 }
1757 return;
1758 }
1759
1760 /* We have to split the register */
1761 oldstate=live.fate[r];
1762
1763 f_setlock(rr); /* Make sure this doesn't go away */
1764 /* Forget about r being in the register rr */
1765 f_disassociate(r);
1766 /* Get a new register, that we will clobber completely */
1767 nr=f_alloc_reg(r,1);
1768 nind=live.fate[r].realind;
1769 if (!clobber)
1770 raw_fmov_rr(nr,rr); /* Make another copy */
1771 live.fate[r]=oldstate; /* Keep all the old state info */
1772 live.fate[r].realreg=nr;
1773 live.fate[r].realind=nind;
1774 f_unlock(rr);
1775 }
1776
1777
1778 static __inline__ int f_writereg(int r)
1779 {
1780 int n;
1781 int answer=-1;
1782
1783 f_make_exclusive(r,1);
1784 if (f_isinreg(r)) {
1785 n=live.fate[r].realreg;
1786 answer=n;
1787 }
1788 if (answer<0) {
1789 answer=f_alloc_reg(r,1);
1790 }
1791 live.fate[r].status=DIRTY;
1792 live.fat[answer].locked++;
1793 live.fat[answer].touched=touchcnt++;
1794 return answer;
1795 }
1796
1797 static int f_rmw(int r)
1798 {
1799 int n;
1800
1801 f_make_exclusive(r,0);
1802 if (f_isinreg(r)) {
1803 n=live.fate[r].realreg;
1804 }
1805 else
1806 n=f_alloc_reg(r,0);
1807 live.fate[r].status=DIRTY;
1808 live.fat[n].locked++;
1809 live.fat[n].touched=touchcnt++;
1810 return n;
1811 }
1812
1813 static void fflags_into_flags_internal(uae_u32 tmp)
1814 {
1815 int r;
1816
1817 clobber_flags();
1818 r=f_readreg(FP_RESULT);
1819 if (FFLAG_NREG_CLOBBER_CONDITION) {
1820 int tmp2=tmp;
1821 tmp=writereg_specific(tmp,4,FFLAG_NREG);
1822 raw_fflags_into_flags(r);
1823 unlock2(tmp);
1824 forget_about(tmp2);
1825 }
1826 else
1827 raw_fflags_into_flags(r);
1828 f_unlock(r);
1829 }
1830
1831
1832
1833
1834 /********************************************************************
1835 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
1836 ********************************************************************/
1837
1838 /*
1839 * RULES FOR HANDLING REGISTERS:
1840 *
1841 * * In the function headers, order the parameters
1842 * - 1st registers written to
1843 * - 2nd read/modify/write registers
1844 * - 3rd registers read from
1845 * * Before calling raw_*, you must call readreg, writereg or rmw for
1846 * each register
1847 * * The order for this is
1848 * - 1st call remove_offset for all registers written to with size<4
1849 * - 2nd call readreg for all registers read without offset
1850 * - 3rd call rmw for all rmw registers
1851 * - 4th call readreg_offset for all registers that can handle offsets
1852 * - 5th call get_offset for all the registers from the previous step
1853 * - 6th call writereg for all written-to registers
1854 * - 7th call raw_*
1855 * - 8th unlock2 all registers that were locked
1856 */
1857
1858 MIDFUNC(0,live_flags,(void))
1859 {
1860 live.flags_on_stack=TRASH;
1861 live.flags_in_flags=VALID;
1862 live.flags_are_important=1;
1863 }
1864 MENDFUNC(0,live_flags,(void))
1865
1866 MIDFUNC(0,dont_care_flags,(void))
1867 {
1868 live.flags_are_important=0;
1869 }
1870 MENDFUNC(0,dont_care_flags,(void))
1871
1872
1873 MIDFUNC(0,duplicate_carry,(void))
1874 {
1875 evict(FLAGX);
1876 make_flags_live_internal();
1877 COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
1878 log_vwrite(FLAGX);
1879 }
1880 MENDFUNC(0,duplicate_carry,(void))
1881
1882 MIDFUNC(0,restore_carry,(void))
1883 {
1884 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
1885 bt_l_ri_noclobber(FLAGX,0);
1886 }
1887 else { /* Avoid the stall the above creates.
1888 This is slow on non-P6, though.
1889 */
1890 COMPCALL(rol_b_ri(FLAGX,8));
1891 isclean(FLAGX);
1892 }
1893 }
1894 MENDFUNC(0,restore_carry,(void))
1895
1896 MIDFUNC(0,start_needflags,(void))
1897 {
1898 needflags=1;
1899 }
1900 MENDFUNC(0,start_needflags,(void))
1901
1902 MIDFUNC(0,end_needflags,(void))
1903 {
1904 needflags=0;
1905 }
1906 MENDFUNC(0,end_needflags,(void))
1907
1908 MIDFUNC(0,make_flags_live,(void))
1909 {
1910 make_flags_live_internal();
1911 }
1912 MENDFUNC(0,make_flags_live,(void))
1913
1914 MIDFUNC(1,fflags_into_flags,(W2 tmp))
1915 {
1916 clobber_flags();
1917 fflags_into_flags_internal(tmp);
1918 }
1919 MENDFUNC(1,fflags_into_flags,(W2 tmp))
1920
1921
1922 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
1923 {
1924 int size=4;
1925 if (i<16)
1926 size=2;
1927 CLOBBER_BT;
1928 r=readreg(r,size);
1929 raw_bt_l_ri(r,i);
1930 unlock2(r);
1931 }
1932 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
1933
1934 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
1935 {
1936 CLOBBER_BT;
1937 r=readreg(r,4);
1938 b=readreg(b,4);
1939 raw_bt_l_rr(r,b);
1940 unlock2(r);
1941 unlock2(b);
1942 }
1943 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
1944
1945 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
1946 {
1947 int size=4;
1948 if (i<16)
1949 size=2;
1950 CLOBBER_BT;
1951 r=rmw(r,size,size);
1952 raw_btc_l_ri(r,i);
1953 unlock2(r);
1954 }
1955 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
1956
1957 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
1958 {
1959 CLOBBER_BT;
1960 b=readreg(b,4);
1961 r=rmw(r,4,4);
1962 raw_btc_l_rr(r,b);
1963 unlock2(r);
1964 unlock2(b);
1965 }
1966 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
1967
1968
1969 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
1970 {
1971 int size=4;
1972 if (i<16)
1973 size=2;
1974 CLOBBER_BT;
1975 r=rmw(r,size,size);
1976 raw_btr_l_ri(r,i);
1977 unlock2(r);
1978 }
1979 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
1980
1981 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
1982 {
1983 CLOBBER_BT;
1984 b=readreg(b,4);
1985 r=rmw(r,4,4);
1986 raw_btr_l_rr(r,b);
1987 unlock2(r);
1988 unlock2(b);
1989 }
1990 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
1991
1992
1993 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
1994 {
1995 int size=4;
1996 if (i<16)
1997 size=2;
1998 CLOBBER_BT;
1999 r=rmw(r,size,size);
2000 raw_bts_l_ri(r,i);
2001 unlock2(r);
2002 }
2003 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2004
2005 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2006 {
2007 CLOBBER_BT;
2008 b=readreg(b,4);
2009 r=rmw(r,4,4);
2010 raw_bts_l_rr(r,b);
2011 unlock2(r);
2012 unlock2(b);
2013 }
2014 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2015
2016 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2017 {
2018 CLOBBER_MOV;
2019 d=writereg(d,4);
2020 raw_mov_l_rm(d,s);
2021 unlock2(d);
2022 }
2023 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2024
2025
2026 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2027 {
2028 r=readreg(r,4);
2029 raw_call_r(r);
2030 unlock2(r);
2031 }
2032 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2033
2034 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2035 {
2036 CLOBBER_SUB;
2037 raw_sub_l_mi(d,s) ;
2038 }
2039 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2040
2041 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2042 {
2043 CLOBBER_MOV;
2044 raw_mov_l_mi(d,s) ;
2045 }
2046 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2047
2048 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2049 {
2050 CLOBBER_MOV;
2051 raw_mov_w_mi(d,s) ;
2052 }
2053 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2054
2055 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2056 {
2057 CLOBBER_MOV;
2058 raw_mov_b_mi(d,s) ;
2059 }
2060 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2061
2062 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2063 {
2064 if (!i && !needflags)
2065 return;
2066 CLOBBER_ROL;
2067 r=rmw(r,1,1);
2068 raw_rol_b_ri(r,i);
2069 unlock2(r);
2070 }
2071 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2072
2073 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2074 {
2075 if (!i && !needflags)
2076 return;
2077 CLOBBER_ROL;
2078 r=rmw(r,2,2);
2079 raw_rol_w_ri(r,i);
2080 unlock2(r);
2081 }
2082 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2083
2084 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2085 {
2086 if (!i && !needflags)
2087 return;
2088 CLOBBER_ROL;
2089 r=rmw(r,4,4);
2090 raw_rol_l_ri(r,i);
2091 unlock2(r);
2092 }
2093 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2094
2095 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2096 {
2097 if (isconst(r)) {
2098 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2099 return;
2100 }
2101 CLOBBER_ROL;
2102 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2103 d=rmw(d,4,4);
2104 Dif (r!=1) {
2105 write_log("Illegal register %d in raw_rol_b\n",r);
2106 abort();
2107 }
2108 raw_rol_l_rr(d,r) ;
2109 unlock2(r);
2110 unlock2(d);
2111 }
2112 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2113
2114 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2115 { /* Can only do this with r==1, i.e. cl */
2116
2117 if (isconst(r)) {
2118 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2119 return;
2120 }
2121 CLOBBER_ROL;
2122 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2123 d=rmw(d,2,2);
2124 Dif (r!=1) {
2125 write_log("Illegal register %d in raw_rol_b\n",r);
2126 abort();
2127 }
2128 raw_rol_w_rr(d,r) ;
2129 unlock2(r);
2130 unlock2(d);
2131 }
2132 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2133
2134 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2135 { /* Can only do this with r==1, i.e. cl */
2136
2137 if (isconst(r)) {
2138 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2139 return;
2140 }
2141
2142 CLOBBER_ROL;
2143 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2144 d=rmw(d,1,1);
2145 Dif (r!=1) {
2146 write_log("Illegal register %d in raw_rol_b\n",r);
2147 abort();
2148 }
2149 raw_rol_b_rr(d,r) ;
2150 unlock2(r);
2151 unlock2(d);
2152 }
2153 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2154
2155
2156 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2157 {
2158 if (isconst(r)) {
2159 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2160 return;
2161 }
2162 CLOBBER_SHLL;
2163 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2164 d=rmw(d,4,4);
2165 Dif (r!=1) {
2166 write_log("Illegal register %d in raw_rol_b\n",r);
2167 abort();
2168 }
2169 raw_shll_l_rr(d,r) ;
2170 unlock2(r);
2171 unlock2(d);
2172 }
2173 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2174
2175 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2176 { /* Can only do this with r==1, i.e. cl */
2177
2178 if (isconst(r)) {
2179 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2180 return;
2181 }
2182 CLOBBER_SHLL;
2183 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2184 d=rmw(d,2,2);
2185 Dif (r!=1) {
2186 write_log("Illegal register %d in raw_shll_b\n",r);
2187 abort();
2188 }
2189 raw_shll_w_rr(d,r) ;
2190 unlock2(r);
2191 unlock2(d);
2192 }
2193 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2194
2195 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2196 { /* Can only do this with r==1, i.e. cl */
2197
2198 if (isconst(r)) {
2199 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2200 return;
2201 }
2202
2203 CLOBBER_SHLL;
2204 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2205 d=rmw(d,1,1);
2206 Dif (r!=1) {
2207 write_log("Illegal register %d in raw_shll_b\n",r);
2208 abort();
2209 }
2210 raw_shll_b_rr(d,r) ;
2211 unlock2(r);
2212 unlock2(d);
2213 }
2214 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2215
2216
2217 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2218 {
2219 if (!i && !needflags)
2220 return;
2221 CLOBBER_ROR;
2222 r=rmw(r,1,1);
2223 raw_ror_b_ri(r,i);
2224 unlock2(r);
2225 }
2226 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2227
2228 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2229 {
2230 if (!i && !needflags)
2231 return;
2232 CLOBBER_ROR;
2233 r=rmw(r,2,2);
2234 raw_ror_w_ri(r,i);
2235 unlock2(r);
2236 }
2237 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2238
2239 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2240 {
2241 if (!i && !needflags)
2242 return;
2243 CLOBBER_ROR;
2244 r=rmw(r,4,4);
2245 raw_ror_l_ri(r,i);
2246 unlock2(r);
2247 }
2248 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2249
2250 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2251 {
2252 if (isconst(r)) {
2253 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2254 return;
2255 }
2256 CLOBBER_ROR;
2257 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2258 d=rmw(d,4,4);
2259 raw_ror_l_rr(d,r) ;
2260 unlock2(r);
2261 unlock2(d);
2262 }
2263 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2264
2265 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2266 {
2267 if (isconst(r)) {
2268 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2269 return;
2270 }
2271 CLOBBER_ROR;
2272 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2273 d=rmw(d,2,2);
2274 raw_ror_w_rr(d,r) ;
2275 unlock2(r);
2276 unlock2(d);
2277 }
2278 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2279
2280 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2281 {
2282 if (isconst(r)) {
2283 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2284 return;
2285 }
2286
2287 CLOBBER_ROR;
2288 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2289 d=rmw(d,1,1);
2290 raw_ror_b_rr(d,r) ;
2291 unlock2(r);
2292 unlock2(d);
2293 }
2294 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2295
2296 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2297 {
2298 if (isconst(r)) {
2299 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2300 return;
2301 }
2302 CLOBBER_SHRL;
2303 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2304 d=rmw(d,4,4);
2305 Dif (r!=1) {
2306 write_log("Illegal register %d in raw_rol_b\n",r);
2307 abort();
2308 }
2309 raw_shrl_l_rr(d,r) ;
2310 unlock2(r);
2311 unlock2(d);
2312 }
2313 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2314
2315 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2316 { /* Can only do this with r==1, i.e. cl */
2317
2318 if (isconst(r)) {
2319 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2320 return;
2321 }
2322 CLOBBER_SHRL;
2323 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2324 d=rmw(d,2,2);
2325 Dif (r!=1) {
2326 write_log("Illegal register %d in raw_shrl_b\n",r);
2327 abort();
2328 }
2329 raw_shrl_w_rr(d,r) ;
2330 unlock2(r);
2331 unlock2(d);
2332 }
2333 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2334
2335 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2336 { /* Can only do this with r==1, i.e. cl */
2337
2338 if (isconst(r)) {
2339 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2340 return;
2341 }
2342
2343 CLOBBER_SHRL;
2344 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2345 d=rmw(d,1,1);
2346 Dif (r!=1) {
2347 write_log("Illegal register %d in raw_shrl_b\n",r);
2348 abort();
2349 }
2350 raw_shrl_b_rr(d,r) ;
2351 unlock2(r);
2352 unlock2(d);
2353 }
2354 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2355
2356
2357
2358 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2359 {
2360 if (!i && !needflags)
2361 return;
2362 if (isconst(r) && !needflags) {
2363 live.state[r].val<<=i;
2364 return;
2365 }
2366 CLOBBER_SHLL;
2367 r=rmw(r,4,4);
2368 raw_shll_l_ri(r,i);
2369 unlock2(r);
2370 }
2371 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2372
2373 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2374 {
2375 if (!i && !needflags)
2376 return;
2377 CLOBBER_SHLL;
2378 r=rmw(r,2,2);
2379 raw_shll_w_ri(r,i);
2380 unlock2(r);
2381 }
2382 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2383
2384 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2385 {
2386 if (!i && !needflags)
2387 return;
2388 CLOBBER_SHLL;
2389 r=rmw(r,1,1);
2390 raw_shll_b_ri(r,i);
2391 unlock2(r);
2392 }
2393 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2394
2395 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2396 {
2397 if (!i && !needflags)
2398 return;
2399 if (isconst(r) && !needflags) {
2400 live.state[r].val>>=i;
2401 return;
2402 }
2403 CLOBBER_SHRL;
2404 r=rmw(r,4,4);
2405 raw_shrl_l_ri(r,i);
2406 unlock2(r);
2407 }
2408 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2409
2410 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2411 {
2412 if (!i && !needflags)
2413 return;
2414 CLOBBER_SHRL;
2415 r=rmw(r,2,2);
2416 raw_shrl_w_ri(r,i);
2417 unlock2(r);
2418 }
2419 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2420
2421 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2422 {
2423 if (!i && !needflags)
2424 return;
2425 CLOBBER_SHRL;
2426 r=rmw(r,1,1);
2427 raw_shrl_b_ri(r,i);
2428 unlock2(r);
2429 }
2430 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2431
2432 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2433 {
2434 if (!i && !needflags)
2435 return;
2436 CLOBBER_SHRA;
2437 r=rmw(r,4,4);
2438 raw_shra_l_ri(r,i);
2439 unlock2(r);
2440 }
2441 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2442
2443 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2444 {
2445 if (!i && !needflags)
2446 return;
2447 CLOBBER_SHRA;
2448 r=rmw(r,2,2);
2449 raw_shra_w_ri(r,i);
2450 unlock2(r);
2451 }
2452 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2453
2454 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2455 {
2456 if (!i && !needflags)
2457 return;
2458 CLOBBER_SHRA;
2459 r=rmw(r,1,1);
2460 raw_shra_b_ri(r,i);
2461 unlock2(r);
2462 }
2463 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2464
2465 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2466 {
2467 if (isconst(r)) {
2468 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2469 return;
2470 }
2471 CLOBBER_SHRA;
2472 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2473 d=rmw(d,4,4);
2474 Dif (r!=1) {
2475 write_log("Illegal register %d in raw_rol_b\n",r);
2476 abort();
2477 }
2478 raw_shra_l_rr(d,r) ;
2479 unlock2(r);
2480 unlock2(d);
2481 }
2482 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2483
2484 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2485 { /* Can only do this with r==1, i.e. cl */
2486
2487 if (isconst(r)) {
2488 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2489 return;
2490 }
2491 CLOBBER_SHRA;
2492 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2493 d=rmw(d,2,2);
2494 Dif (r!=1) {
2495 write_log("Illegal register %d in raw_shra_b\n",r);
2496 abort();
2497 }
2498 raw_shra_w_rr(d,r) ;
2499 unlock2(r);
2500 unlock2(d);
2501 }
2502 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2503
2504 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2505 { /* Can only do this with r==1, i.e. cl */
2506
2507 if (isconst(r)) {
2508 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2509 return;
2510 }
2511
2512 CLOBBER_SHRA;
2513 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2514 d=rmw(d,1,1);
2515 Dif (r!=1) {
2516 write_log("Illegal register %d in raw_shra_b\n",r);
2517 abort();
2518 }
2519 raw_shra_b_rr(d,r) ;
2520 unlock2(r);
2521 unlock2(d);
2522 }
2523 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2524
2525
2526 MIDFUNC(2,setcc,(W1 d, IMM cc))
2527 {
2528 CLOBBER_SETCC;
2529 d=writereg(d,1);
2530 raw_setcc(d,cc);
2531 unlock2(d);
2532 }
2533 MENDFUNC(2,setcc,(W1 d, IMM cc))
2534
2535 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2536 {
2537 CLOBBER_SETCC;
2538 raw_setcc_m(d,cc);
2539 }
2540 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2541
2542 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2543 {
2544 if (d==s)
2545 return;
2546 CLOBBER_CMOV;
2547 s=readreg(s,4);
2548 d=rmw(d,4,4);
2549 raw_cmov_l_rr(d,s,cc);
2550 unlock2(s);
2551 unlock2(d);
2552 }
2553 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2554
2555 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2556 {
2557 CLOBBER_CMOV;
2558 d=rmw(d,4,4);
2559 raw_cmov_l_rm(d,s,cc);
2560 unlock2(d);
2561 }
2562 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2563
2564 MIDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2565 {
2566 CLOBBER_BSF;
2567 s=readreg(s,4);
2568 d=writereg(d,4);
2569 raw_bsf_l_rr(d,s);
2570 unlock2(s);
2571 unlock2(d);
2572 }
2573 MENDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2574
2575 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2576 {
2577 CLOBBER_MUL;
2578 s=readreg(s,4);
2579 d=rmw(d,4,4);
2580 raw_imul_32_32(d,s);
2581 unlock2(s);
2582 unlock2(d);
2583 }
2584 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2585
2586 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2587 {
2588 CLOBBER_MUL;
2589 s=rmw_specific(s,4,4,MUL_NREG2);
2590 d=rmw_specific(d,4,4,MUL_NREG1);
2591 raw_imul_64_32(d,s);
2592 unlock2(s);
2593 unlock2(d);
2594 }
2595 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2596
2597 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2598 {
2599 CLOBBER_MUL;
2600 s=rmw_specific(s,4,4,MUL_NREG2);
2601 d=rmw_specific(d,4,4,MUL_NREG1);
2602 raw_mul_64_32(d,s);
2603 unlock2(s);
2604 unlock2(d);
2605 }
2606 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2607
2608 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2609 {
2610 CLOBBER_MUL;
2611 s=readreg(s,4);
2612 d=rmw(d,4,4);
2613 raw_mul_32_32(d,s);
2614 unlock2(s);
2615 unlock2(d);
2616 }
2617 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
2618
2619 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2620 {
2621 int isrmw;
2622
2623 if (isconst(s)) {
2624 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
2625 return;
2626 }
2627
2628 CLOBBER_SE16;
2629 isrmw=(s==d);
2630 if (!isrmw) {
2631 s=readreg(s,2);
2632 d=writereg(d,4);
2633 }
2634 else { /* If we try to lock this twice, with different sizes, we
2635 are int trouble! */
2636 s=d=rmw(s,4,2);
2637 }
2638 raw_sign_extend_16_rr(d,s);
2639 if (!isrmw) {
2640 unlock2(d);
2641 unlock2(s);
2642 }
2643 else {
2644 unlock2(s);
2645 }
2646 }
2647 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2648
2649 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
2650 {
2651 int isrmw;
2652
2653 if (isconst(s)) {
2654 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
2655 return;
2656 }
2657
2658 isrmw=(s==d);
2659 CLOBBER_SE8;
2660 if (!isrmw) {
2661 s=readreg(s,1);
2662 d=writereg(d,4);
2663 }
2664 else { /* If we try to lock this twice, with different sizes, we
2665 are int trouble! */
2666 s=d=rmw(s,4,1);
2667 }
2668
2669 raw_sign_extend_8_rr(d,s);
2670
2671 if (!isrmw) {
2672 unlock2(d);
2673 unlock2(s);
2674 }
2675 else {
2676 unlock2(s);
2677 }
2678 }
2679 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
2680
2681
2682 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
2683 {
2684 int isrmw;
2685
2686 if (isconst(s)) {
2687 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
2688 return;
2689 }
2690
2691 isrmw=(s==d);
2692 CLOBBER_ZE16;
2693 if (!isrmw) {
2694 s=readreg(s,2);
2695 d=writereg(d,4);
2696 }
2697 else { /* If we try to lock this twice, with different sizes, we
2698 are int trouble! */
2699 s=d=rmw(s,4,2);
2700 }
2701 raw_zero_extend_16_rr(d,s);
2702 if (!isrmw) {
2703 unlock2(d);
2704 unlock2(s);
2705 }
2706 else {
2707 unlock2(s);
2708 }
2709 }
2710 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
2711
2712 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
2713 {
2714 int isrmw;
2715 if (isconst(s)) {
2716 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
2717 return;
2718 }
2719
2720 isrmw=(s==d);
2721 CLOBBER_ZE8;
2722 if (!isrmw) {
2723 s=readreg(s,1);
2724 d=writereg(d,4);
2725 }
2726 else { /* If we try to lock this twice, with different sizes, we
2727 are int trouble! */
2728 s=d=rmw(s,4,1);
2729 }
2730
2731 raw_zero_extend_8_rr(d,s);
2732
2733 if (!isrmw) {
2734 unlock2(d);
2735 unlock2(s);
2736 }
2737 else {
2738 unlock2(s);
2739 }
2740 }
2741 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
2742
2743 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
2744 {
2745 if (d==s)
2746 return;
2747 if (isconst(s)) {
2748 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
2749 return;
2750 }
2751
2752 CLOBBER_MOV;
2753 s=readreg(s,1);
2754 d=writereg(d,1);
2755 raw_mov_b_rr(d,s);
2756 unlock2(d);
2757 unlock2(s);
2758 }
2759 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
2760
2761 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
2762 {
2763 if (d==s)
2764 return;
2765 if (isconst(s)) {
2766 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
2767 return;
2768 }
2769
2770 CLOBBER_MOV;
2771 s=readreg(s,2);
2772 d=writereg(d,2);
2773 raw_mov_w_rr(d,s);
2774 unlock2(d);
2775 unlock2(s);
2776 }
2777 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
2778
2779
2780 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
2781 {
2782 CLOBBER_MOV;
2783 baser=readreg(baser,4);
2784 index=readreg(index,4);
2785 d=writereg(d,4);
2786
2787 raw_mov_l_rrm_indexed(d,baser,index,factor);
2788 unlock2(d);
2789 unlock2(baser);
2790 unlock2(index);
2791 }
2792 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
2793
2794 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
2795 {
2796 CLOBBER_MOV;
2797 baser=readreg(baser,4);
2798 index=readreg(index,4);
2799 d=writereg(d,2);
2800
2801 raw_mov_w_rrm_indexed(d,baser,index,factor);
2802 unlock2(d);
2803 unlock2(baser);
2804 unlock2(index);
2805 }
2806 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
2807
2808 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
2809 {
2810 CLOBBER_MOV;
2811 baser=readreg(baser,4);
2812 index=readreg(index,4);
2813 d=writereg(d,1);
2814
2815 raw_mov_b_rrm_indexed(d,baser,index,factor);
2816
2817 unlock2(d);
2818 unlock2(baser);
2819 unlock2(index);
2820 }
2821 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
2822
2823
2824 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
2825 {
2826 CLOBBER_MOV;
2827 baser=readreg(baser,4);
2828 index=readreg(index,4);
2829 s=readreg(s,4);
2830
2831 Dif (baser==s || index==s)
2832 abort();
2833
2834
2835 raw_mov_l_mrr_indexed(baser,index,factor,s);
2836 unlock2(s);
2837 unlock2(baser);
2838 unlock2(index);
2839 }
2840 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
2841
2842 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2843 {
2844 CLOBBER_MOV;
2845 baser=readreg(baser,4);
2846 index=readreg(index,4);
2847 s=readreg(s,2);
2848
2849 raw_mov_w_mrr_indexed(baser,index,factor,s);
2850 unlock2(s);
2851 unlock2(baser);
2852 unlock2(index);
2853 }
2854 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2855
2856 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2857 {
2858 CLOBBER_MOV;
2859 s=readreg(s,1);
2860 baser=readreg(baser,4);
2861 index=readreg(index,4);
2862
2863 raw_mov_b_mrr_indexed(baser,index,factor,s);
2864 unlock2(s);
2865 unlock2(baser);
2866 unlock2(index);
2867 }
2868 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2869
2870
2871 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2872 {
2873 int basereg=baser;
2874 int indexreg=index;
2875
2876 CLOBBER_MOV;
2877 s=readreg(s,4);
2878 baser=readreg_offset(baser,4);
2879 index=readreg_offset(index,4);
2880
2881 base+=get_offset(basereg);
2882 base+=factor*get_offset(indexreg);
2883
2884 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
2885 unlock2(s);
2886 unlock2(baser);
2887 unlock2(index);
2888 }
2889 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2890
2891 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2892 {
2893 int basereg=baser;
2894 int indexreg=index;
2895
2896 CLOBBER_MOV;
2897 s=readreg(s,2);
2898 baser=readreg_offset(baser,4);
2899 index=readreg_offset(index,4);
2900
2901 base+=get_offset(basereg);
2902 base+=factor*get_offset(indexreg);
2903
2904 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
2905 unlock2(s);
2906 unlock2(baser);
2907 unlock2(index);
2908 }
2909 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2910
2911 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2912 {
2913 int basereg=baser;
2914 int indexreg=index;
2915
2916 CLOBBER_MOV;
2917 s=readreg(s,1);
2918 baser=readreg_offset(baser,4);
2919 index=readreg_offset(index,4);
2920
2921 base+=get_offset(basereg);
2922 base+=factor*get_offset(indexreg);
2923
2924 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
2925 unlock2(s);
2926 unlock2(baser);
2927 unlock2(index);
2928 }
2929 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2930
2931
2932
2933 /* Read a long from base+baser+factor*index */
2934 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2935 {
2936 int basereg=baser;
2937 int indexreg=index;
2938
2939 CLOBBER_MOV;
2940 baser=readreg_offset(baser,4);
2941 index=readreg_offset(index,4);
2942 base+=get_offset(basereg);
2943 base+=factor*get_offset(indexreg);
2944 d=writereg(d,4);
2945 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
2946 unlock2(d);
2947 unlock2(baser);
2948 unlock2(index);
2949 }
2950 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2951
2952
2953 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2954 {
2955 int basereg=baser;
2956 int indexreg=index;
2957
2958 CLOBBER_MOV;
2959 remove_offset(d,-1);
2960 baser=readreg_offset(baser,4);
2961 index=readreg_offset(index,4);
2962 base+=get_offset(basereg);
2963 base+=factor*get_offset(indexreg);
2964 d=writereg(d,2);
2965 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
2966 unlock2(d);
2967 unlock2(baser);
2968 unlock2(index);
2969 }
2970 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2971
2972
2973 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2974 {
2975 int basereg=baser;
2976 int indexreg=index;
2977
2978 CLOBBER_MOV;
2979 remove_offset(d,-1);
2980 baser=readreg_offset(baser,4);
2981 index=readreg_offset(index,4);
2982 base+=get_offset(basereg);
2983 base+=factor*get_offset(indexreg);
2984 d=writereg(d,1);
2985 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
2986 unlock2(d);
2987 unlock2(baser);
2988 unlock2(index);
2989 }
2990 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2991
2992 /* Read a long from base+factor*index */
2993 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2994 {
2995 int indexreg=index;
2996
2997 if (isconst(index)) {
2998 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
2999 return;
3000 }
3001
3002 CLOBBER_MOV;
3003 index=readreg_offset(index,4);
3004 base+=get_offset(indexreg)*factor;
3005 d=writereg(d,4);
3006
3007 raw_mov_l_rm_indexed(d,base,index,factor);
3008 unlock2(index);
3009 unlock2(d);
3010 }
3011 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3012
3013
3014 /* read the long at the address contained in s+offset and store in d */
3015 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3016 {
3017 if (isconst(s)) {
3018 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3019 return;
3020 }
3021 CLOBBER_MOV;
3022 s=readreg(s,4);
3023 d=writereg(d,4);
3024
3025 raw_mov_l_rR(d,s,offset);
3026 unlock2(d);
3027 unlock2(s);
3028 }
3029 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3030
3031 /* read the word at the address contained in s+offset and store in d */
3032 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3033 {
3034 if (isconst(s)) {
3035 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3036 return;
3037 }
3038 CLOBBER_MOV;
3039 s=readreg(s,4);
3040 d=writereg(d,2);
3041
3042 raw_mov_w_rR(d,s,offset);
3043 unlock2(d);
3044 unlock2(s);
3045 }
3046 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3047
3048 /* read the word at the address contained in s+offset and store in d */
3049 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3050 {
3051 if (isconst(s)) {
3052 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3053 return;
3054 }
3055 CLOBBER_MOV;
3056 s=readreg(s,4);
3057 d=writereg(d,1);
3058
3059 raw_mov_b_rR(d,s,offset);
3060 unlock2(d);
3061 unlock2(s);
3062 }
3063 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3064
3065 /* read the long at the address contained in s+offset and store in d */
3066 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3067 {
3068 int sreg=s;
3069 if (isconst(s)) {
3070 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3071 return;
3072 }
3073 CLOBBER_MOV;
3074 s=readreg_offset(s,4);
3075 offset+=get_offset(sreg);
3076 d=writereg(d,4);
3077
3078 raw_mov_l_brR(d,s,offset);
3079 unlock2(d);
3080 unlock2(s);
3081 }
3082 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3083
3084 /* read the word at the address contained in s+offset and store in d */
3085 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3086 {
3087 int sreg=s;
3088 if (isconst(s)) {
3089 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3090 return;
3091 }
3092 CLOBBER_MOV;
3093 remove_offset(d,-1);
3094 s=readreg_offset(s,4);
3095 offset+=get_offset(sreg);
3096 d=writereg(d,2);
3097
3098 raw_mov_w_brR(d,s,offset);
3099 unlock2(d);
3100 unlock2(s);
3101 }
3102 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3103
3104 /* read the word at the address contained in s+offset and store in d */
3105 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3106 {
3107 int sreg=s;
3108 if (isconst(s)) {
3109 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3110 return;
3111 }
3112 CLOBBER_MOV;
3113 remove_offset(d,-1);
3114 s=readreg_offset(s,4);
3115 offset+=get_offset(sreg);
3116 d=writereg(d,1);
3117
3118 raw_mov_b_brR(d,s,offset);
3119 unlock2(d);
3120 unlock2(s);
3121 }
3122 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3123
3124 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3125 {
3126 int dreg=d;
3127 if (isconst(d)) {
3128 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3129 return;
3130 }
3131
3132 CLOBBER_MOV;
3133 d=readreg_offset(d,4);
3134 offset+=get_offset(dreg);
3135 raw_mov_l_Ri(d,i,offset);
3136 unlock2(d);
3137 }
3138 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3139
3140 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3141 {
3142 int dreg=d;
3143 if (isconst(d)) {
3144 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3145 return;
3146 }
3147
3148 CLOBBER_MOV;
3149 d=readreg_offset(d,4);
3150 offset+=get_offset(dreg);
3151 raw_mov_w_Ri(d,i,offset);
3152 unlock2(d);
3153 }
3154 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3155
3156 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3157 {
3158 int dreg=d;
3159 if (isconst(d)) {
3160 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3161 return;
3162 }
3163
3164 CLOBBER_MOV;
3165 d=readreg_offset(d,4);
3166 offset+=get_offset(dreg);
3167 raw_mov_b_Ri(d,i,offset);
3168 unlock2(d);
3169 }
3170 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3171
3172 /* Warning! OFFSET is byte sized only! */
3173 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3174 {
3175 if (isconst(d)) {
3176 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3177 return;
3178 }
3179 if (isconst(s)) {
3180 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3181 return;
3182 }
3183
3184 CLOBBER_MOV;
3185 s=readreg(s,4);
3186 d=readreg(d,4);
3187
3188 raw_mov_l_Rr(d,s,offset);
3189 unlock2(d);
3190 unlock2(s);
3191 }
3192 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3193
3194 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3195 {
3196 if (isconst(d)) {
3197 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3198 return;
3199 }
3200 if (isconst(s)) {
3201 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3202 return;
3203 }
3204
3205 CLOBBER_MOV;
3206 s=readreg(s,2);
3207 d=readreg(d,4);
3208 raw_mov_w_Rr(d,s,offset);
3209 unlock2(d);
3210 unlock2(s);
3211 }
3212 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3213
3214 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3215 {
3216 if (isconst(d)) {
3217 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3218 return;
3219 }
3220 if (isconst(s)) {
3221 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3222 return;
3223 }
3224
3225 CLOBBER_MOV;
3226 s=readreg(s,1);
3227 d=readreg(d,4);
3228 raw_mov_b_Rr(d,s,offset);
3229 unlock2(d);
3230 unlock2(s);
3231 }
3232 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3233
3234 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3235 {
3236 if (isconst(s)) {
3237 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3238 return;
3239 }
3240 #if USE_OFFSET
3241 if (d==s) {
3242 add_offset(d,offset);
3243 return;
3244 }
3245 #endif
3246 CLOBBER_LEA;
3247 s=readreg(s,4);
3248 d=writereg(d,4);
3249 raw_lea_l_brr(d,s,offset);
3250 unlock2(d);
3251 unlock2(s);
3252 }
3253 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3254
3255 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3256 {
3257 if (!offset) {
3258 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3259 return;
3260 }
3261 CLOBBER_LEA;
3262 s=readreg(s,4);
3263 index=readreg(index,4);
3264 d=writereg(d,4);
3265
3266 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3267 unlock2(d);
3268 unlock2(index);
3269 unlock2(s);
3270 }
3271 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3272
3273 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3274 {
3275 CLOBBER_LEA;
3276 s=readreg(s,4);
3277 index=readreg(index,4);
3278 d=writereg(d,4);
3279
3280 raw_lea_l_rr_indexed(d,s,index,factor);
3281 unlock2(d);
3282 unlock2(index);
3283 unlock2(s);
3284 }
3285 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3286
3287 /* write d to the long at the address contained in s+offset */
3288 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3289 {
3290 int dreg=d;
3291 if (isconst(d)) {
3292 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3293 return;
3294 }
3295
3296 CLOBBER_MOV;
3297 s=readreg(s,4);
3298 d=readreg_offset(d,4);
3299 offset+=get_offset(dreg);
3300
3301 raw_mov_l_bRr(d,s,offset);
3302 unlock2(d);
3303 unlock2(s);
3304 }
3305 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3306
3307 /* write the word at the address contained in s+offset and store in d */
3308 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3309 {
3310 int dreg=d;
3311
3312 if (isconst(d)) {
3313 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3314 return;
3315 }
3316
3317 CLOBBER_MOV;
3318 s=readreg(s,2);
3319 d=readreg_offset(d,4);
3320 offset+=get_offset(dreg);
3321 raw_mov_w_bRr(d,s,offset);
3322 unlock2(d);
3323 unlock2(s);
3324 }
3325 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3326
3327 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3328 {
3329 int dreg=d;
3330 if (isconst(d)) {
3331 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3332 return;
3333 }
3334
3335 CLOBBER_MOV;
3336 s=readreg(s,1);
3337 d=readreg_offset(d,4);
3338 offset+=get_offset(dreg);
3339 raw_mov_b_bRr(d,s,offset);
3340 unlock2(d);
3341 unlock2(s);
3342 }
3343 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3344
3345 MIDFUNC(1,bswap_32,(RW4 r))
3346 {
3347 int reg=r;
3348
3349 if (isconst(r)) {
3350 uae_u32 oldv=live.state[r].val;
3351 live.state[r].val=reverse32(oldv);
3352 return;
3353 }
3354
3355 CLOBBER_SW32;
3356 r=rmw(r,4,4);
3357 raw_bswap_32(r);
3358 unlock2(r);
3359 }
3360 MENDFUNC(1,bswap_32,(RW4 r))
3361
3362 MIDFUNC(1,bswap_16,(RW2 r))
3363 {
3364 if (isconst(r)) {
3365 uae_u32 oldv=live.state[r].val;
3366 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3367 (oldv&0xffff0000);
3368 return;
3369 }
3370
3371 CLOBBER_SW16;
3372 r=rmw(r,2,2);
3373
3374 raw_bswap_16(r);
3375 unlock2(r);
3376 }
3377 MENDFUNC(1,bswap_16,(RW2 r))
3378
3379
3380
3381 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3382 {
3383 int olds;
3384
3385 if (d==s) { /* How pointless! */
3386 return;
3387 }
3388 if (isconst(s)) {
3389 COMPCALL(mov_l_ri)(d,live.state[s].val);
3390 return;
3391 }
3392 olds=s;
3393 disassociate(d);
3394 s=readreg_offset(s,4);
3395 live.state[d].realreg=s;
3396 live.state[d].realind=live.nat[s].nholds;
3397 live.state[d].val=live.state[olds].val;
3398 live.state[d].validsize=4;
3399 live.state[d].dirtysize=4;
3400 set_status(d,DIRTY);
3401
3402 live.nat[s].holds[live.nat[s].nholds]=d;
3403 live.nat[s].nholds++;
3404 log_clobberreg(d);
3405 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3406 d,s,live.state[d].realind,live.nat[s].nholds); */
3407 unlock2(s);
3408 }
3409 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3410
3411 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3412 {
3413 if (isconst(s)) {
3414 COMPCALL(mov_l_mi)(d,live.state[s].val);
3415 return;
3416 }
3417 CLOBBER_MOV;
3418 s=readreg(s,4);
3419
3420 raw_mov_l_mr(d,s);
3421 unlock2(s);
3422 }
3423 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3424
3425
3426 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3427 {
3428 if (isconst(s)) {
3429 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3430 return;
3431 }
3432 CLOBBER_MOV;
3433 s=readreg(s,2);
3434
3435 raw_mov_w_mr(d,s);
3436 unlock2(s);
3437 }
3438 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3439
3440 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3441 {
3442 CLOBBER_MOV;
3443 d=writereg(d,2);
3444
3445 raw_mov_w_rm(d,s);
3446 unlock2(d);
3447 }
3448 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3449
3450 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3451 {
3452 if (isconst(s)) {
3453 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3454 return;
3455 }
3456
3457 CLOBBER_MOV;
3458 s=readreg(s,1);
3459
3460 raw_mov_b_mr(d,s);
3461 unlock2(s);
3462 }
3463 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3464
3465 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3466 {
3467 CLOBBER_MOV;
3468 d=writereg(d,1);
3469
3470 raw_mov_b_rm(d,s);
3471 unlock2(d);
3472 }
3473 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3474
3475 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3476 {
3477 set_const(d,s);
3478 return;
3479 }
3480 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3481
3482 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3483 {
3484 CLOBBER_MOV;
3485 d=writereg(d,2);
3486
3487 raw_mov_w_ri(d,s);
3488 unlock2(d);
3489 }
3490 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3491
3492 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3493 {
3494 CLOBBER_MOV;
3495 d=writereg(d,1);
3496
3497 raw_mov_b_ri(d,s);
3498 unlock2(d);
3499 }
3500 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3501
3502
3503 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3504 {
3505 CLOBBER_ADD;
3506 raw_add_l_mi(d,s) ;
3507 }
3508 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3509
3510 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3511 {
3512 CLOBBER_ADD;
3513 raw_add_w_mi(d,s) ;
3514 }
3515 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3516
3517 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3518 {
3519 CLOBBER_ADD;
3520 raw_add_b_mi(d,s) ;
3521 }
3522 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3523
3524
3525 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3526 {
3527 CLOBBER_TEST;
3528 d=readreg(d,4);
3529
3530 raw_test_l_ri(d,i);
3531 unlock2(d);
3532 }
3533 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3534
3535 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3536 {
3537 CLOBBER_TEST;
3538 d=readreg(d,4);
3539 s=readreg(s,4);
3540
3541 raw_test_l_rr(d,s);;
3542 unlock2(d);
3543 unlock2(s);
3544 }
3545 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3546
3547 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3548 {
3549 CLOBBER_TEST;
3550 d=readreg(d,2);
3551 s=readreg(s,2);
3552
3553 raw_test_w_rr(d,s);
3554 unlock2(d);
3555 unlock2(s);
3556 }
3557 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3558
3559 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3560 {
3561 CLOBBER_TEST;
3562 d=readreg(d,1);
3563 s=readreg(s,1);
3564
3565 raw_test_b_rr(d,s);
3566 unlock2(d);
3567 unlock2(s);
3568 }
3569 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3570
3571
3572 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3573 {
3574 if (isconst(d) && !needflags) {
3575 live.state[d].val &= i;
3576 return;
3577 }
3578
3579 CLOBBER_AND;
3580 d=rmw(d,4,4);
3581
3582 raw_and_l_ri(d,i);
3583 unlock2(d);
3584 }
3585 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
3586
3587 MIDFUNC(2,and_l,(RW4 d, R4 s))
3588 {
3589 CLOBBER_AND;
3590 s=readreg(s,4);
3591 d=rmw(d,4,4);
3592
3593 raw_and_l(d,s);
3594 unlock2(d);
3595 unlock2(s);
3596 }
3597 MENDFUNC(2,and_l,(RW4 d, R4 s))
3598
3599 MIDFUNC(2,and_w,(RW2 d, R2 s))
3600 {
3601 CLOBBER_AND;
3602 s=readreg(s,2);
3603 d=rmw(d,2,2);
3604
3605 raw_and_w(d,s);
3606 unlock2(d);
3607 unlock2(s);
3608 }
3609 MENDFUNC(2,and_w,(RW2 d, R2 s))
3610
3611 MIDFUNC(2,and_b,(RW1 d, R1 s))
3612 {
3613 CLOBBER_AND;
3614 s=readreg(s,1);
3615 d=rmw(d,1,1);
3616
3617 raw_and_b(d,s);
3618 unlock2(d);
3619 unlock2(s);
3620 }
3621 MENDFUNC(2,and_b,(RW1 d, R1 s))
3622
3623 // gb-- used for making an fpcr value in compemu_fpp.cpp
3624 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
3625 {
3626 CLOBBER_OR;
3627 d=rmw(d,4,4);
3628
3629 raw_or_l_rm(d,s);
3630 unlock2(d);
3631 }
3632 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
3633
3634 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
3635 {
3636 if (isconst(d) && !needflags) {
3637 live.state[d].val|=i;
3638 return;
3639 }
3640 CLOBBER_OR;
3641 d=rmw(d,4,4);
3642
3643 raw_or_l_ri(d,i);
3644 unlock2(d);
3645 }
3646 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
3647
3648 MIDFUNC(2,or_l,(RW4 d, R4 s))
3649 {
3650 if (isconst(d) && isconst(s) && !needflags) {
3651 live.state[d].val|=live.state[s].val;
3652 return;
3653 }
3654 CLOBBER_OR;
3655 s=readreg(s,4);
3656 d=rmw(d,4,4);
3657
3658 raw_or_l(d,s);
3659 unlock2(d);
3660 unlock2(s);
3661 }
3662 MENDFUNC(2,or_l,(RW4 d, R4 s))
3663
3664 MIDFUNC(2,or_w,(RW2 d, R2 s))
3665 {
3666 CLOBBER_OR;
3667 s=readreg(s,2);
3668 d=rmw(d,2,2);
3669
3670 raw_or_w(d,s);
3671 unlock2(d);
3672 unlock2(s);
3673 }
3674 MENDFUNC(2,or_w,(RW2 d, R2 s))
3675
3676 MIDFUNC(2,or_b,(RW1 d, R1 s))
3677 {
3678 CLOBBER_OR;
3679 s=readreg(s,1);
3680 d=rmw(d,1,1);
3681
3682 raw_or_b(d,s);
3683 unlock2(d);
3684 unlock2(s);
3685 }
3686 MENDFUNC(2,or_b,(RW1 d, R1 s))
3687
3688 MIDFUNC(2,adc_l,(RW4 d, R4 s))
3689 {
3690 CLOBBER_ADC;
3691 s=readreg(s,4);
3692 d=rmw(d,4,4);
3693
3694 raw_adc_l(d,s);
3695
3696 unlock2(d);
3697 unlock2(s);
3698 }
3699 MENDFUNC(2,adc_l,(RW4 d, R4 s))
3700
3701 MIDFUNC(2,adc_w,(RW2 d, R2 s))
3702 {
3703 CLOBBER_ADC;
3704 s=readreg(s,2);
3705 d=rmw(d,2,2);
3706
3707 raw_adc_w(d,s);
3708 unlock2(d);
3709 unlock2(s);
3710 }
3711 MENDFUNC(2,adc_w,(RW2 d, R2 s))
3712
3713 MIDFUNC(2,adc_b,(RW1 d, R1 s))
3714 {
3715 CLOBBER_ADC;
3716 s=readreg(s,1);
3717 d=rmw(d,1,1);
3718
3719 raw_adc_b(d,s);
3720 unlock2(d);
3721 unlock2(s);
3722 }
3723 MENDFUNC(2,adc_b,(RW1 d, R1 s))
3724
3725 MIDFUNC(2,add_l,(RW4 d, R4 s))
3726 {
3727 if (isconst(s)) {
3728 COMPCALL(add_l_ri)(d,live.state[s].val);
3729 return;
3730 }
3731
3732 CLOBBER_ADD;
3733 s=readreg(s,4);
3734 d=rmw(d,4,4);
3735
3736 raw_add_l(d,s);
3737
3738 unlock2(d);
3739 unlock2(s);
3740 }
3741 MENDFUNC(2,add_l,(RW4 d, R4 s))
3742
3743 MIDFUNC(2,add_w,(RW2 d, R2 s))
3744 {
3745 if (isconst(s)) {
3746 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
3747 return;
3748 }
3749
3750 CLOBBER_ADD;
3751 s=readreg(s,2);
3752 d=rmw(d,2,2);
3753
3754 raw_add_w(d,s);
3755 unlock2(d);
3756 unlock2(s);
3757 }
3758 MENDFUNC(2,add_w,(RW2 d, R2 s))
3759
3760 MIDFUNC(2,add_b,(RW1 d, R1 s))
3761 {
3762 if (isconst(s)) {
3763 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
3764 return;
3765 }
3766
3767 CLOBBER_ADD;
3768 s=readreg(s,1);
3769 d=rmw(d,1,1);
3770
3771 raw_add_b(d,s);
3772 unlock2(d);
3773 unlock2(s);
3774 }
3775 MENDFUNC(2,add_b,(RW1 d, R1 s))
3776
3777 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
3778 {
3779 if (!i && !needflags)
3780 return;
3781 if (isconst(d) && !needflags) {
3782 live.state[d].val-=i;
3783 return;
3784 }
3785 #if USE_OFFSET
3786 if (!needflags) {
3787 add_offset(d,-i);
3788 return;
3789 }
3790 #endif
3791
3792 CLOBBER_SUB;
3793 d=rmw(d,4,4);
3794
3795 raw_sub_l_ri(d,i);
3796 unlock2(d);
3797 }
3798 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
3799
3800 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
3801 {
3802 if (!i && !needflags)
3803 return;
3804
3805 CLOBBER_SUB;
3806 d=rmw(d,2,2);
3807
3808 raw_sub_w_ri(d,i);
3809 unlock2(d);
3810 }
3811 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
3812
3813 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
3814 {
3815 if (!i && !needflags)
3816 return;
3817
3818 CLOBBER_SUB;
3819 d=rmw(d,1,1);
3820
3821 raw_sub_b_ri(d,i);
3822
3823 unlock2(d);
3824 }
3825 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
3826
3827 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
3828 {
3829 if (!i && !needflags)
3830 return;
3831 if (isconst(d) && !needflags) {
3832 live.state[d].val+=i;
3833 return;
3834 }
3835 #if USE_OFFSET
3836 if (!needflags) {
3837 add_offset(d,i);
3838 return;
3839 }
3840 #endif
3841 CLOBBER_ADD;
3842 d=rmw(d,4,4);
3843 raw_add_l_ri(d,i);
3844 unlock2(d);
3845 }
3846 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
3847
3848 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
3849 {
3850 if (!i && !needflags)
3851 return;
3852
3853 CLOBBER_ADD;
3854 d=rmw(d,2,2);
3855
3856 raw_add_w_ri(d,i);
3857 unlock2(d);
3858 }
3859 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
3860
3861 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
3862 {
3863 if (!i && !needflags)
3864 return;
3865
3866 CLOBBER_ADD;
3867 d=rmw(d,1,1);
3868
3869 raw_add_b_ri(d,i);
3870
3871 unlock2(d);
3872 }
3873 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
3874
3875 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
3876 {
3877 CLOBBER_SBB;
3878 s=readreg(s,4);
3879 d=rmw(d,4,4);
3880
3881 raw_sbb_l(d,s);
3882 unlock2(d);
3883 unlock2(s);
3884 }
3885 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
3886
3887 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
3888 {
3889 CLOBBER_SBB;
3890 s=readreg(s,2);
3891 d=rmw(d,2,2);
3892
3893 raw_sbb_w(d,s);
3894 unlock2(d);
3895 unlock2(s);
3896 }
3897 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
3898
3899 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
3900 {
3901 CLOBBER_SBB;
3902 s=readreg(s,1);
3903 d=rmw(d,1,1);
3904
3905 raw_sbb_b(d,s);
3906 unlock2(d);
3907 unlock2(s);
3908 }
3909 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
3910
3911 MIDFUNC(2,sub_l,(RW4 d, R4 s))
3912 {
3913 if (isconst(s)) {
3914 COMPCALL(sub_l_ri)(d,live.state[s].val);
3915 return;
3916 }
3917
3918 CLOBBER_SUB;
3919 s=readreg(s,4);
3920 d=rmw(d,4,4);
3921
3922 raw_sub_l(d,s);
3923 unlock2(d);
3924 unlock2(s);
3925 }
3926 MENDFUNC(2,sub_l,(RW4 d, R4 s))
3927
3928 MIDFUNC(2,sub_w,(RW2 d, R2 s))
3929 {
3930 if (isconst(s)) {
3931 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
3932 return;
3933 }
3934
3935 CLOBBER_SUB;
3936 s=readreg(s,2);
3937 d=rmw(d,2,2);
3938
3939 raw_sub_w(d,s);
3940 unlock2(d);
3941 unlock2(s);
3942 }
3943 MENDFUNC(2,sub_w,(RW2 d, R2 s))
3944
3945 MIDFUNC(2,sub_b,(RW1 d, R1 s))
3946 {
3947 if (isconst(s)) {
3948 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
3949 return;
3950 }
3951
3952 CLOBBER_SUB;
3953 s=readreg(s,1);
3954 d=rmw(d,1,1);
3955
3956 raw_sub_b(d,s);
3957 unlock2(d);
3958 unlock2(s);
3959 }
3960 MENDFUNC(2,sub_b,(RW1 d, R1 s))
3961
3962 MIDFUNC(2,cmp_l,(R4 d, R4 s))
3963 {
3964 CLOBBER_CMP;
3965 s=readreg(s,4);
3966 d=readreg(d,4);
3967
3968 raw_cmp_l(d,s);
3969 unlock2(d);
3970 unlock2(s);
3971 }
3972 MENDFUNC(2,cmp_l,(R4 d, R4 s))
3973
3974 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
3975 {
3976 CLOBBER_CMP;
3977 r=readreg(r,4);
3978
3979 raw_cmp_l_ri(r,i);
3980 unlock2(r);
3981 }
3982 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
3983
3984 MIDFUNC(2,cmp_w,(R2 d, R2 s))
3985 {
3986 CLOBBER_CMP;
3987 s=readreg(s,2);
3988 d=readreg(d,2);
3989
3990 raw_cmp_w(d,s);
3991 unlock2(d);
3992 unlock2(s);
3993 }
3994 MENDFUNC(2,cmp_w,(R2 d, R2 s))
3995
3996 MIDFUNC(2,cmp_b,(R1 d, R1 s))
3997 {
3998 CLOBBER_CMP;
3999 s=readreg(s,1);
4000 d=readreg(d,1);
4001
4002 raw_cmp_b(d,s);
4003 unlock2(d);
4004 unlock2(s);
4005 }
4006 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4007
4008
4009 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4010 {
4011 CLOBBER_XOR;
4012 s=readreg(s,4);
4013 d=rmw(d,4,4);
4014
4015 raw_xor_l(d,s);
4016 unlock2(d);
4017 unlock2(s);
4018 }
4019 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4020
4021 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4022 {
4023 CLOBBER_XOR;
4024 s=readreg(s,2);
4025 d=rmw(d,2,2);
4026
4027 raw_xor_w(d,s);
4028 unlock2(d);
4029 unlock2(s);
4030 }
4031 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4032
4033 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4034 {
4035 CLOBBER_XOR;
4036 s=readreg(s,1);
4037 d=rmw(d,1,1);
4038
4039 raw_xor_b(d,s);
4040 unlock2(d);
4041 unlock2(s);
4042 }
4043 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4044
4045 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4046 {
4047 clobber_flags();
4048 remove_all_offsets();
4049 if (osize==4) {
4050 if (out1!=in1 && out1!=r) {
4051 COMPCALL(forget_about)(out1);
4052 }
4053 }
4054 else {
4055 tomem_c(out1);
4056 }
4057
4058 in1=readreg_specific(in1,isize,REG_PAR1);
4059 r=readreg(r,4);
4060 prepare_for_call_1(); /* This should ensure that there won't be
4061 any need for swapping nregs in prepare_for_call_2
4062 */
4063 #if USE_NORMAL_CALLING_CONVENTION
4064 raw_push_l_r(in1);
4065 #endif
4066 unlock2(in1);
4067 unlock2(r);
4068
4069 prepare_for_call_2();
4070 raw_call_r(r);
4071
4072 #if USE_NORMAL_CALLING_CONVENTION
4073 raw_inc_sp(4);
4074 #endif
4075
4076
4077 live.nat[REG_RESULT].holds[0]=out1;
4078 live.nat[REG_RESULT].nholds=1;
4079 live.nat[REG_RESULT].touched=touchcnt++;
4080
4081 live.state[out1].realreg=REG_RESULT;
4082 live.state[out1].realind=0;
4083 live.state[out1].val=0;
4084 live.state[out1].validsize=osize;
4085 live.state[out1].dirtysize=osize;
4086 set_status(out1,DIRTY);
4087 }
4088 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4089
4090 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4091 {
4092 clobber_flags();
4093 remove_all_offsets();
4094 in1=readreg_specific(in1,isize1,REG_PAR1);
4095 in2=readreg_specific(in2,isize2,REG_PAR2);
4096 r=readreg(r,4);
4097 prepare_for_call_1(); /* This should ensure that there won't be
4098 any need for swapping nregs in prepare_for_call_2
4099 */
4100 #if USE_NORMAL_CALLING_CONVENTION
4101 raw_push_l_r(in2);
4102 raw_push_l_r(in1);
4103 #endif
4104 unlock2(r);
4105 unlock2(in1);
4106 unlock2(in2);
4107 prepare_for_call_2();
4108 raw_call_r(r);
4109 #if USE_NORMAL_CALLING_CONVENTION
4110 raw_inc_sp(8);
4111 #endif
4112 }
4113 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4114
4115 /* forget_about() takes a mid-layer register */
4116 MIDFUNC(1,forget_about,(W4 r))
4117 {
4118 if (isinreg(r))
4119 disassociate(r);
4120 live.state[r].val=0;
4121 set_status(r,UNDEF);
4122 }
4123 MENDFUNC(1,forget_about,(W4 r))
4124
4125 MIDFUNC(0,nop,(void))
4126 {
4127 raw_nop();
4128 }
4129 MENDFUNC(0,nop,(void))
4130
4131
4132 MIDFUNC(1,f_forget_about,(FW r))
4133 {
4134 if (f_isinreg(r))
4135 f_disassociate(r);
4136 live.fate[r].status=UNDEF;
4137 }
4138 MENDFUNC(1,f_forget_about,(FW r))
4139
4140 MIDFUNC(1,fmov_pi,(FW r))
4141 {
4142 r=f_writereg(r);
4143 raw_fmov_pi(r);
4144 f_unlock(r);
4145 }
4146 MENDFUNC(1,fmov_pi,(FW r))
4147
4148 MIDFUNC(1,fmov_log10_2,(FW r))
4149 {
4150 r=f_writereg(r);
4151 raw_fmov_log10_2(r);
4152 f_unlock(r);
4153 }
4154 MENDFUNC(1,fmov_log10_2,(FW r))
4155
4156 MIDFUNC(1,fmov_log2_e,(FW r))
4157 {
4158 r=f_writereg(r);
4159 raw_fmov_log2_e(r);
4160 f_unlock(r);
4161 }
4162 MENDFUNC(1,fmov_log2_e,(FW r))
4163
4164 MIDFUNC(1,fmov_loge_2,(FW r))
4165 {
4166 r=f_writereg(r);
4167 raw_fmov_loge_2(r);
4168 f_unlock(r);
4169 }
4170 MENDFUNC(1,fmov_loge_2,(FW r))
4171
4172 MIDFUNC(1,fmov_1,(FW r))
4173 {
4174 r=f_writereg(r);
4175 raw_fmov_1(r);
4176 f_unlock(r);
4177 }
4178 MENDFUNC(1,fmov_1,(FW r))
4179
4180 MIDFUNC(1,fmov_0,(FW r))
4181 {
4182 r=f_writereg(r);
4183 raw_fmov_0(r);
4184 f_unlock(r);
4185 }
4186 MENDFUNC(1,fmov_0,(FW r))
4187
4188 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4189 {
4190 r=f_writereg(r);
4191 raw_fmov_rm(r,m);
4192 f_unlock(r);
4193 }
4194 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4195
4196 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4197 {
4198 r=f_writereg(r);
4199 raw_fmovi_rm(r,m);
4200 f_unlock(r);
4201 }
4202 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4203
4204 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4205 {
4206 r=f_readreg(r);
4207 raw_fmovi_mr(m,r);
4208 f_unlock(r);
4209 }
4210 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4211
4212 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4213 {
4214 r=f_writereg(r);
4215 raw_fmovs_rm(r,m);
4216 f_unlock(r);
4217 }
4218 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4219
4220 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4221 {
4222 r=f_readreg(r);
4223 raw_fmovs_mr(m,r);
4224 f_unlock(r);
4225 }
4226 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4227
4228 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4229 {
4230 r=f_readreg(r);
4231 raw_fmov_ext_mr(m,r);
4232 f_unlock(r);
4233 }
4234 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4235
4236 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4237 {
4238 r=f_readreg(r);
4239 raw_fmov_mr(m,r);
4240 f_unlock(r);
4241 }
4242 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4243
4244 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4245 {
4246 r=f_writereg(r);
4247 raw_fmov_ext_rm(r,m);
4248 f_unlock(r);
4249 }
4250 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4251
4252 MIDFUNC(2,fmov_rr,(FW d, FR s))
4253 {
4254 if (d==s) { /* How pointless! */
4255 return;
4256 }
4257 #if USE_F_ALIAS
4258 f_disassociate(d);
4259 s=f_readreg(s);
4260 live.fate[d].realreg=s;
4261 live.fate[d].realind=live.fat[s].nholds;
4262 live.fate[d].status=DIRTY;
4263 live.fat[s].holds[live.fat[s].nholds]=d;
4264 live.fat[s].nholds++;
4265 f_unlock(s);
4266 #else
4267 s=f_readreg(s);
4268 d=f_writereg(d);
4269 raw_fmov_rr(d,s);
4270 f_unlock(s);
4271 f_unlock(d);
4272 #endif
4273 }
4274 MENDFUNC(2,fmov_rr,(FW d, FR s))
4275
4276 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4277 {
4278 index=readreg(index,4);
4279
4280 raw_fldcw_m_indexed(index,base);
4281 unlock2(index);
4282 }
4283 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4284
4285 MIDFUNC(1,ftst_r,(FR r))
4286 {
4287 r=f_readreg(r);
4288 raw_ftst_r(r);
4289 f_unlock(r);
4290 }
4291 MENDFUNC(1,ftst_r,(FR r))
4292
4293 MIDFUNC(0,dont_care_fflags,(void))
4294 {
4295 f_disassociate(FP_RESULT);
4296 }
4297 MENDFUNC(0,dont_care_fflags,(void))
4298
4299 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4300 {
4301 s=f_readreg(s);
4302 d=f_writereg(d);
4303 raw_fsqrt_rr(d,s);
4304 f_unlock(s);
4305 f_unlock(d);
4306 }
4307 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4308
4309 MIDFUNC(2,fabs_rr,(FW d, FR s))
4310 {
4311 s=f_readreg(s);
4312 d=f_writereg(d);
4313 raw_fabs_rr(d,s);
4314 f_unlock(s);
4315 f_unlock(d);
4316 }
4317 MENDFUNC(2,fabs_rr,(FW d, FR s))
4318
4319 MIDFUNC(2,fsin_rr,(FW d, FR s))
4320 {
4321 s=f_readreg(s);
4322 d=f_writereg(d);
4323 raw_fsin_rr(d,s);
4324 f_unlock(s);
4325 f_unlock(d);
4326 }
4327 MENDFUNC(2,fsin_rr,(FW d, FR s))
4328
4329 MIDFUNC(2,fcos_rr,(FW d, FR s))
4330 {
4331 s=f_readreg(s);
4332 d=f_writereg(d);
4333 raw_fcos_rr(d,s);
4334 f_unlock(s);
4335 f_unlock(d);
4336 }
4337 MENDFUNC(2,fcos_rr,(FW d, FR s))
4338
4339 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4340 {
4341 s=f_readreg(s);
4342 d=f_writereg(d);
4343 raw_ftwotox_rr(d,s);
4344 f_unlock(s);
4345 f_unlock(d);
4346 }
4347 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4348
4349 MIDFUNC(2,fetox_rr,(FW d, FR s))
4350 {
4351 s=f_readreg(s);
4352 d=f_writereg(d);
4353 raw_fetox_rr(d,s);
4354 f_unlock(s);
4355 f_unlock(d);
4356 }
4357 MENDFUNC(2,fetox_rr,(FW d, FR s))
4358
4359 MIDFUNC(2,frndint_rr,(FW d, FR s))
4360 {
4361 s=f_readreg(s);
4362 d=f_writereg(d);
4363 raw_frndint_rr(d,s);
4364 f_unlock(s);
4365 f_unlock(d);
4366 }
4367 MENDFUNC(2,frndint_rr,(FW d, FR s))
4368
4369 MIDFUNC(2,flog2_rr,(FW d, FR s))
4370 {
4371 s=f_readreg(s);
4372 d=f_writereg(d);
4373 raw_flog2_rr(d,s);
4374 f_unlock(s);
4375 f_unlock(d);
4376 }
4377 MENDFUNC(2,flog2_rr,(FW d, FR s))
4378
4379 MIDFUNC(2,fneg_rr,(FW d, FR s))
4380 {
4381 s=f_readreg(s);
4382 d=f_writereg(d);
4383 raw_fneg_rr(d,s);
4384 f_unlock(s);
4385 f_unlock(d);
4386 }
4387 MENDFUNC(2,fneg_rr,(FW d, FR s))
4388
4389 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4390 {
4391 s=f_readreg(s);
4392 d=f_rmw(d);
4393 raw_fadd_rr(d,s);
4394 f_unlock(s);
4395 f_unlock(d);
4396 }
4397 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4398
4399 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4400 {
4401 s=f_readreg(s);
4402 d=f_rmw(d);
4403 raw_fsub_rr(d,s);
4404 f_unlock(s);
4405 f_unlock(d);
4406 }
4407 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4408
4409 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4410 {
4411 d=f_readreg(d);
4412 s=f_readreg(s);
4413 raw_fcmp_rr(d,s);
4414 f_unlock(s);
4415 f_unlock(d);
4416 }
4417 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4418
4419 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4420 {
4421 s=f_readreg(s);
4422 d=f_rmw(d);
4423 raw_fdiv_rr(d,s);
4424 f_unlock(s);
4425 f_unlock(d);
4426 }
4427 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4428
4429 MIDFUNC(2,frem_rr,(FRW d, FR s))
4430 {
4431 s=f_readreg(s);
4432 d=f_rmw(d);
4433 raw_frem_rr(d,s);
4434 f_unlock(s);
4435 f_unlock(d);
4436 }
4437 MENDFUNC(2,frem_rr,(FRW d, FR s))
4438
4439 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4440 {
4441 s=f_readreg(s);
4442 d=f_rmw(d);
4443 raw_frem1_rr(d,s);
4444 f_unlock(s);
4445 f_unlock(d);
4446 }
4447 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4448
4449 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4450 {
4451 s=f_readreg(s);
4452 d=f_rmw(d);
4453 raw_fmul_rr(d,s);
4454 f_unlock(s);
4455 f_unlock(d);
4456 }
4457 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4458
4459 /********************************************************************
4460 * Support functions exposed to gencomp. CREATE time *
4461 ********************************************************************/
4462
4463 int kill_rodent(int r)
4464 {
4465 return KILLTHERAT &&
4466 have_rat_stall &&
4467 (live.state[r].status==INMEM ||
4468 live.state[r].status==CLEAN ||
4469 live.state[r].status==ISCONST ||
4470 live.state[r].dirtysize==4);
4471 }
4472
4473 uae_u32 get_const(int r)
4474 {
4475 Dif (!isconst(r)) {
4476 write_log("Register %d should be constant, but isn't\n",r);
4477 abort();
4478 }
4479 return live.state[r].val;
4480 }
4481
4482 void sync_m68k_pc(void)
4483 {
4484 if (m68k_pc_offset) {
4485 add_l_ri(PC_P,m68k_pc_offset);
4486 comp_pc_p+=m68k_pc_offset;
4487 m68k_pc_offset=0;
4488 }
4489 }
4490
4491 /********************************************************************
4492 * Scratch registers management *
4493 ********************************************************************/
4494
4495 struct scratch_t {
4496 uae_u32 regs[VREGS];
4497 fpu_register fregs[VFREGS];
4498 };
4499
4500 static scratch_t scratch;
4501
4502 /********************************************************************
4503 * Support functions exposed to newcpu *
4504 ********************************************************************/
4505
4506 static inline const char *str_on_off(bool b)
4507 {
4508 return b ? "on" : "off";
4509 }
4510
4511 static __inline__ unsigned int cft_map (unsigned int f)
4512 {
4513 #ifndef HAVE_GET_WORD_UNSWAPPED
4514 return f;
4515 #else
4516 return ((f >> 8) & 255) | ((f & 255) << 8);
4517 #endif
4518 }
4519
4520 void compiler_init(void)
4521 {
4522 static bool initialized = false;
4523 if (initialized)
4524 return;
4525
4526 #ifndef WIN32
4527 // Open /dev/zero
4528 zero_fd = open("/dev/zero", O_RDWR);
4529 if (zero_fd < 0) {
4530 char str[200];
4531 sprintf(str, GetString(STR_NO_DEV_ZERO_ERR), strerror(errno));
4532 ErrorAlert(str);
4533 QuitEmulator();
4534 }
4535 #endif
4536
4537 #if JIT_DEBUG
4538 // JIT debug mode ?
4539 JITDebug = PrefsFindBool("jitdebug");
4540 #endif
4541 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4542
4543 #ifdef USE_JIT_FPU
4544 // Use JIT compiler for FPU instructions ?
4545 avoid_fpu = !PrefsFindBool("jitfpu");
4546 #else
4547 // JIT FPU is always disabled
4548 avoid_fpu = true;
4549 #endif
4550 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4551
4552 // Get size of the translation cache (in KB)
4553 cache_size = PrefsFindInt32("jitcachesize");
4554 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4555
4556 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4557 raw_init_cpu();
4558 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4559 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4560
4561 // Translation cache flush mechanism
4562 lazy_flush = PrefsFindBool("jitlazyflush");
4563 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4564 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4565
4566 // Compiler features
4567 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4568 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4569 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4570 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4571
4572 // Build compiler tables
4573 build_comp();
4574
4575 initialized = true;
4576
4577 #if PROFILE_COMPILE_TIME
4578 write_log("<JIT compiler> : gather statistics on translation time\n");
4579 emul_start_time = clock();
4580 #endif
4581 }
4582
4583 void compiler_exit(void)
4584 {
4585 #if PROFILE_COMPILE_TIME
4586 emul_end_time = clock();
4587 #endif
4588
4589 // Deallocate translation cache
4590 if (compiled_code) {
4591 #ifndef WIN32
4592 munmap((caddr_t)compiled_code, cache_size);
4593 #else
4594 free(compiled_code);
4595 #endif
4596 compiled_code = 0;
4597 }
4598
4599 // Deallocate blockinfo pools
4600 free_blockinfo_pools();
4601
4602 #ifndef WIN32
4603 // Close /dev/zero
4604 if (zero_fd > 0)
4605 close(zero_fd);
4606 #endif
4607
4608 #if PROFILE_COMPILE_TIME
4609 write_log("### Compile Block statistics\n");
4610 write_log("Number of calls to compile_block : %d\n", compile_count);
4611 uae_u32 emul_time = emul_end_time - emul_start_time;
4612 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
4613 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
4614 100.0*double(compile_time)/double(emul_time));
4615 write_log("\n");
4616 #endif
4617 }
4618
4619 bool compiler_use_jit(void)
4620 {
4621 // Check for the "jit" prefs item
4622 if (!PrefsFindBool("jit"))
4623 return false;
4624
4625 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
4626 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
4627 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
4628 return false;
4629 }
4630
4631 // FIXME: there are currently problems with JIT compilation and anything below a 68040
4632 if (CPUType < 4) {
4633 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
4634 return false;
4635 }
4636
4637 return true;
4638 }
4639
4640 void init_comp(void)
4641 {
4642 int i;
4643 uae_s8* cb=can_byte;
4644 uae_s8* cw=can_word;
4645 uae_s8* au=always_used;
4646
4647 for (i=0;i<VREGS;i++) {
4648 live.state[i].realreg=-1;
4649 live.state[i].needflush=NF_SCRATCH;
4650 live.state[i].val=0;
4651 set_status(i,UNDEF);
4652 }
4653
4654 for (i=0;i<VFREGS;i++) {
4655 live.fate[i].status=UNDEF;
4656 live.fate[i].realreg=-1;
4657 live.fate[i].needflush=NF_SCRATCH;
4658 }
4659
4660 for (i=0;i<VREGS;i++) {
4661 if (i<16) { /* First 16 registers map to 68k registers */
4662 live.state[i].mem=((uae_u32*)&regs)+i;
4663 live.state[i].needflush=NF_TOMEM;
4664 set_status(i,INMEM);
4665 }
4666 else
4667 live.state[i].mem=scratch.regs+i;
4668 }
4669 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
4670 live.state[PC_P].needflush=NF_TOMEM;
4671 set_const(PC_P,(uae_u32)comp_pc_p);
4672
4673 live.state[FLAGX].mem=&(regflags.x);
4674 live.state[FLAGX].needflush=NF_TOMEM;
4675 set_status(FLAGX,INMEM);
4676
4677 live.state[FLAGTMP].mem=&(regflags.cznv);
4678 live.state[FLAGTMP].needflush=NF_TOMEM;
4679 set_status(FLAGTMP,INMEM);
4680
4681 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
4682 set_status(NEXT_HANDLER,UNDEF);
4683
4684 for (i=0;i<VFREGS;i++) {
4685 if (i<8) { /* First 8 registers map to 68k FPU registers */
4686 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
4687 live.fate[i].needflush=NF_TOMEM;
4688 live.fate[i].status=INMEM;
4689 }
4690 else if (i==FP_RESULT) {
4691 live.fate[i].mem=(uae_u32*)(&fpu.result);
4692 live.fate[i].needflush=NF_TOMEM;
4693 live.fate[i].status=INMEM;
4694 }
4695 else
4696 live.fate[i].mem=(uae_u32*)(scratch.fregs+i);
4697 }
4698
4699
4700 for (i=0;i<N_REGS;i++) {
4701 live.nat[i].touched=0;
4702 live.nat[i].nholds=0;
4703 live.nat[i].locked=0;
4704 if (*cb==i) {
4705 live.nat[i].canbyte=1; cb++;
4706 } else live.nat[i].canbyte=0;
4707 if (*cw==i) {
4708 live.nat[i].canword=1; cw++;
4709 } else live.nat[i].canword=0;
4710 if (*au==i) {
4711 live.nat[i].locked=1; au++;
4712 }
4713 }
4714
4715 for (i=0;i<N_FREGS;i++) {
4716 live.fat[i].touched=0;
4717 live.fat[i].nholds=0;
4718 live.fat[i].locked=0;
4719 }
4720
4721 touchcnt=1;
4722 m68k_pc_offset=0;
4723 live.flags_in_flags=TRASH;
4724 live.flags_on_stack=VALID;
4725 live.flags_are_important=1;
4726
4727 raw_fp_init();
4728 }
4729
4730 /* Only do this if you really mean it! The next call should be to init!*/
4731 void flush(int save_regs)
4732 {
4733 int fi,i;
4734
4735 log_flush();
4736 flush_flags(); /* low level */
4737 sync_m68k_pc(); /* mid level */
4738
4739 if (save_regs) {
4740 for (i=0;i<VFREGS;i++) {
4741 if (live.fate[i].needflush==NF_SCRATCH ||
4742 live.fate[i].status==CLEAN) {
4743 f_disassociate(i);
4744 }
4745 }
4746 for (i=0;i<VREGS;i++) {
4747 if (live.state[i].needflush==NF_TOMEM) {
4748 switch(live.state[i].status) {
4749 case INMEM:
4750 if (live.state[i].val) {
4751 raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
4752 log_vwrite(i);
4753 live.state[i].val=0;
4754 }
4755 break;
4756 case CLEAN:
4757 case DIRTY:
4758 remove_offset(i,-1); tomem(i); break;
4759 case ISCONST:
4760 if (i!=PC_P)
4761 writeback_const(i);
4762 break;
4763 default: break;
4764 }
4765 Dif (live.state[i].val && i!=PC_P) {
4766 write_log("Register %d still has val %x\n",
4767 i,live.state[i].val);
4768 }
4769 }
4770 }
4771 for (i=0;i<VFREGS;i++) {
4772 if (live.fate[i].needflush==NF_TOMEM &&
4773 live.fate[i].status==DIRTY) {
4774 f_evict(i);
4775 }
4776 }
4777 raw_fp_cleanup_drop();
4778 }
4779 if (needflags) {
4780 write_log("Warning! flush with needflags=1!\n");
4781 }
4782 }
4783
4784 static void flush_keepflags(void)
4785 {
4786 int fi,i;
4787
4788 for (i=0;i<VFREGS;i++) {
4789 if (live.fate[i].needflush==NF_SCRATCH ||
4790 live.fate[i].status==CLEAN) {
4791 f_disassociate(i);
4792 }
4793 }
4794 for (i=0;i<VREGS;i++) {
4795 if (live.state[i].needflush==NF_TOMEM) {
4796 switch(live.state[i].status) {
4797 case INMEM:
4798 /* Can't adjust the offset here --- that needs "add" */
4799 break;
4800 case CLEAN:
4801 case DIRTY:
4802 remove_offset(i,-1); tomem(i); break;
4803 case ISCONST:
4804 if (i!=PC_P)
4805 writeback_const(i);
4806 break;
4807 default: break;
4808 }
4809 }
4810 }
4811 for (i=0;i<VFREGS;i++) {
4812 if (live.fate[i].needflush==NF_TOMEM &&
4813 live.fate[i].status==DIRTY) {
4814 f_evict(i);
4815 }
4816 }
4817 raw_fp_cleanup_drop();
4818 }
4819
4820 void freescratch(void)
4821 {
4822 int i;
4823 for (i=0;i<N_REGS;i++)
4824 if (live.nat[i].locked && i!=4)
4825 write_log("Warning! %d is locked\n",i);
4826
4827 for (i=0;i<VREGS;i++)
4828 if (live.state[i].needflush==NF_SCRATCH) {
4829 forget_about(i);
4830 }
4831
4832 for (i=0;i<VFREGS;i++)
4833 if (live.fate[i].needflush==NF_SCRATCH) {
4834 f_forget_about(i);
4835 }
4836 }
4837
4838 /********************************************************************
4839 * Support functions, internal *
4840 ********************************************************************/
4841
4842
4843 static void align_target(uae_u32 a)
4844 {
4845 /* Fill with NOPs --- makes debugging with gdb easier */
4846 while ((uae_u32)target&(a-1))
4847 *target++=0x90;
4848 }
4849
4850 static __inline__ int isinrom(uintptr addr)
4851 {
4852 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
4853 }
4854
4855 static void flush_all(void)
4856 {
4857 int i;
4858
4859 log_flush();
4860 for (i=0;i<VREGS;i++)
4861 if (live.state[i].status==DIRTY) {
4862 if (!call_saved[live.state[i].realreg]) {
4863 tomem(i);
4864 }
4865 }
4866 for (i=0;i<VFREGS;i++)
4867 if (f_isinreg(i))
4868 f_evict(i);
4869 raw_fp_cleanup_drop();
4870 }
4871
4872 /* Make sure all registers that will get clobbered by a call are
4873 save and sound in memory */
4874 static void prepare_for_call_1(void)
4875 {
4876 flush_all(); /* If there are registers that don't get clobbered,
4877 * we should be a bit more selective here */
4878 }
4879
4880 /* We will call a C routine in a moment. That will clobber all registers,
4881 so we need to disassociate everything */
4882 static void prepare_for_call_2(void)
4883 {
4884 int i;
4885 for (i=0;i<N_REGS;i++)
4886 if (!call_saved[i] && live.nat[i].nholds>0)
4887 free_nreg(i);
4888
4889 for (i=0;i<N_FREGS;i++)
4890 if (live.fat[i].nholds>0)
4891 f_free_nreg(i);
4892
4893 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
4894 flags at the very start of the call_r
4895 functions! */
4896 }
4897
4898 /********************************************************************
4899 * Memory access and related functions, CREATE time *
4900 ********************************************************************/
4901
4902 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
4903 {
4904 next_pc_p=not_taken;
4905 taken_pc_p=taken;
4906 branch_cc=cond;
4907 }
4908
4909
4910 static uae_u32 get_handler_address(uae_u32 addr)
4911 {
4912 uae_u32 cl=cacheline(addr);
4913 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
4914 return (uae_u32)&(bi->direct_handler_to_use);
4915 }
4916
4917 static uae_u32 get_handler(uae_u32 addr)
4918 {
4919 uae_u32 cl=cacheline(addr);
4920 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
4921 return (uae_u32)bi->direct_handler_to_use;
4922 }
4923
4924 static void load_handler(int reg, uae_u32 addr)
4925 {
4926 mov_l_rm(reg,get_handler_address(addr));
4927 }
4928
4929 /* This version assumes that it is writing *real* memory, and *will* fail
4930 * if that assumption is wrong! No branches, no second chances, just
4931 * straight go-for-it attitude */
4932
4933 static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
4934 {
4935 int f=tmp;
4936
4937 if (clobber)
4938 f=source;
4939 switch(size) {
4940 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
4941 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
4942 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
4943 }
4944 forget_about(tmp);
4945 forget_about(f);
4946 }
4947
4948 void writebyte(int address, int source, int tmp)
4949 {
4950 writemem_real(address,source,20,1,tmp,0);
4951 }
4952
4953 static __inline__ void writeword_general(int address, int source, int tmp,
4954 int clobber)
4955 {
4956 writemem_real(address,source,16,2,tmp,clobber);
4957 }
4958
4959 void writeword_clobber(int address, int source, int tmp)
4960 {
4961 writeword_general(address,source,tmp,1);
4962 }
4963
4964 void writeword(int address, int source, int tmp)
4965 {
4966 writeword_general(address,source,tmp,0);
4967 }
4968
4969 static __inline__ void writelong_general(int address, int source, int tmp,
4970 int clobber)
4971 {
4972 writemem_real(address,source,12,4,tmp,clobber);
4973 }
4974
4975 void writelong_clobber(int address, int source, int tmp)
4976 {
4977 writelong_general(address,source,tmp,1);
4978 }
4979
4980 void writelong(int address, int source, int tmp)
4981 {
4982 writelong_general(address,source,tmp,0);
4983 }
4984
4985
4986
4987 /* This version assumes that it is reading *real* memory, and *will* fail
4988 * if that assumption is wrong! No branches, no second chances, just
4989 * straight go-for-it attitude */
4990
4991 static void readmem_real(int address, int dest, int offset, int size, int tmp)
4992 {
4993 int f=tmp;
4994
4995 if (size==4 && address!=dest)
4996 f=dest;
4997
4998 switch(size) {
4999 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5000 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5001 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5002 }
5003 forget_about(tmp);
5004 }
5005
5006 void readbyte(int address, int dest, int tmp)
5007 {
5008 readmem_real(address,dest,8,1,tmp);
5009 }
5010
5011 void readword(int address, int dest, int tmp)
5012 {
5013 readmem_real(address,dest,4,2,tmp);
5014 }
5015
5016 void readlong(int address, int dest, int tmp)
5017 {
5018 readmem_real(address,dest,0,4,tmp);
5019 }
5020
5021 void get_n_addr(int address, int dest, int tmp)
5022 {
5023 // a is the register containing the virtual address
5024 // after the offset had been fetched
5025 int a=tmp;
5026
5027 // f is the register that will contain the offset
5028 int f=tmp;
5029
5030 // a == f == tmp if (address == dest)
5031 if (address!=dest) {
5032 a=address;
5033 f=dest;
5034 }
5035
5036 #if REAL_ADDRESSING
5037 mov_l_rr(dest, address);
5038 #elif DIRECT_ADDRESSING
5039 lea_l_brr(dest,address,MEMBaseDiff);
5040 #endif
5041 forget_about(tmp);
5042 }
5043
5044 void get_n_addr_jmp(int address, int dest, int tmp)
5045 {
5046 /* For this, we need to get the same address as the rest of UAE
5047 would --- otherwise we end up translating everything twice */
5048 get_n_addr(address,dest,tmp);
5049 }
5050
5051
5052 /* base is a register, but dp is an actual value.
5053 target is a register, as is tmp */
5054 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5055 {
5056 int reg = (dp >> 12) & 15;
5057 int regd_shift=(dp >> 9) & 3;
5058
5059 if (dp & 0x100) {
5060 int ignorebase=(dp&0x80);
5061 int ignorereg=(dp&0x40);
5062 int addbase=0;
5063 int outer=0;
5064
5065 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5066 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5067
5068 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5069 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5070
5071 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5072 if (!ignorereg) {
5073 if ((dp & 0x800) == 0)
5074 sign_extend_16_rr(target,reg);
5075 else
5076 mov_l_rr(target,reg);
5077 shll_l_ri(target,regd_shift);
5078 }
5079 else
5080 mov_l_ri(target,0);
5081
5082 /* target is now regd */
5083 if (!ignorebase)
5084 add_l(target,base);
5085 add_l_ri(target,addbase);
5086 if (dp&0x03) readlong(target,target,tmp);
5087 } else { /* do the getlong first, then add regd */
5088 if (!ignorebase) {
5089 mov_l_rr(target,base);
5090 add_l_ri(target,addbase);
5091 }
5092 else
5093 mov_l_ri(target,addbase);
5094 if (dp&0x03) readlong(target,target,tmp);
5095
5096 if (!ignorereg) {
5097 if ((dp & 0x800) == 0)
5098 sign_extend_16_rr(tmp,reg);
5099 else
5100 mov_l_rr(tmp,reg);
5101 shll_l_ri(tmp,regd_shift);
5102 /* tmp is now regd */
5103 add_l(target,tmp);
5104 }
5105 }
5106 add_l_ri(target,outer);
5107 }
5108 else { /* 68000 version */
5109 if ((dp & 0x800) == 0) { /* Sign extend */
5110 sign_extend_16_rr(target,reg);
5111 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5112 }
5113 else {
5114 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5115 }
5116 }
5117 forget_about(tmp);
5118 }
5119
5120
5121
5122
5123
5124 void set_cache_state(int enabled)
5125 {
5126 if (enabled!=letit)
5127 flush_icache_hard(77);
5128 letit=enabled;
5129 }
5130
5131 int get_cache_state(void)
5132 {
5133 return letit;
5134 }
5135
5136 uae_u32 get_jitted_size(void)
5137 {
5138 if (compiled_code)
5139 return current_compile_p-compiled_code;
5140 return 0;
5141 }
5142
5143 void alloc_cache(void)
5144 {
5145 if (compiled_code) {
5146 flush_icache_hard(6);
5147 #ifndef WIN32
5148 munmap((caddr_t)compiled_code, cache_size*1024);
5149 #else
5150 free(compiled_code);
5151 #endif
5152 compiled_code = 0;
5153 }
5154
5155 if (cache_size == 0)
5156 return;
5157
5158 while (!compiled_code && cache_size) {
5159 #ifndef WIN32
5160 compiled_code = (uae_u8 *)mmap(0, cache_size * 1024,
5161 PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, zero_fd, 0);
5162 if (compiled_code == (uae_u8 *)MAP_FAILED) {
5163 #else
5164 compiled_code = (uae_u8 *)malloc(cache_size * 1024);
5165 if (compiled_code == 0) {
5166 #endif
5167 compiled_code = 0;
5168 cache_size /= 2;
5169 }
5170 }
5171
5172 if (compiled_code) {
5173 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5174 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5175 current_compile_p = compiled_code;
5176 current_cache_size = 0;
5177 }
5178 }
5179
5180
5181
5182 extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM;
5183
5184 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5185 {
5186 uae_u32 k1=0;
5187 uae_u32 k2=0;
5188 uae_s32 len=bi->len;
5189 uae_u32 tmp=bi->min_pcp;
5190 uae_u32* pos;
5191
5192 len+=(tmp&3);
5193 tmp&=(~3);
5194 pos=(uae_u32*)tmp;
5195
5196 if (len<0 || len>MAX_CHECKSUM_LEN) {
5197 *c1=0;
5198 *c2=0;
5199 }
5200 else {
5201 while (len>0) {
5202 k1+=*pos;
5203 k2^=*pos;
5204 pos++;
5205 len-=4;
5206 }
5207 *c1=k1;
5208 *c2=k2;
5209 }
5210 }
5211
5212 static void show_checksum(blockinfo* bi)
5213 {
5214 uae_u32 k1=0;
5215 uae_u32 k2=0;
5216 uae_s32 len=bi->len;
5217 uae_u32 tmp=(uae_u32)bi->pc_p;
5218 uae_u32* pos;
5219
5220 len+=(tmp&3);
5221 tmp&=(~3);
5222 pos=(uae_u32*)tmp;
5223
5224 if (len<0 || len>MAX_CHECKSUM_LEN) {
5225 return;
5226 }
5227 else {
5228 while (len>0) {
5229 write_log("%08x ",*pos);
5230 pos++;
5231 len-=4;
5232 }
5233 write_log(" bla\n");
5234 }
5235 }
5236
5237
5238 int check_for_cache_miss(void)
5239 {
5240 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5241
5242 if (bi) {
5243 int cl=cacheline(regs.pc_p);
5244 if (bi!=cache_tags[cl+1].bi) {
5245 raise_in_cl_list(bi);
5246 return 1;
5247 }
5248 }
5249 return 0;
5250 }
5251
5252
5253 static void recompile_block(void)
5254 {
5255 /* An existing block's countdown code has expired. We need to make
5256 sure that execute_normal doesn't refuse to recompile due to a
5257 perceived cache miss... */
5258 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5259
5260 Dif (!bi)
5261 abort();
5262 raise_in_cl_list(bi);
5263 execute_normal();
5264 return;
5265 }
5266 static void cache_miss(void)
5267 {
5268 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5269 uae_u32 cl=cacheline(regs.pc_p);
5270 blockinfo* bi2=get_blockinfo(cl);
5271
5272 if (!bi) {
5273 execute_normal(); /* Compile this block now */
5274 return;
5275 }
5276 Dif (!bi2 || bi==bi2) {
5277 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5278 abort();
5279 }
5280 raise_in_cl_list(bi);
5281 return;
5282 }
5283
5284 static int called_check_checksum(blockinfo* bi);
5285
5286 static inline int block_check_checksum(blockinfo* bi)
5287 {
5288 uae_u32 c1,c2;
5289 int isgood;
5290
5291 if (bi->status!=BI_NEED_CHECK)
5292 return 1; /* This block is in a checked state */
5293
5294 checksum_count++;
5295 if (bi->c1 || bi->c2)
5296 calc_checksum(bi,&c1,&c2);
5297 else {
5298 c1=c2=1; /* Make sure it doesn't match */
5299 }
5300
5301 isgood=(c1==bi->c1 && c2==bi->c2);
5302 if (isgood) {
5303 /* This block is still OK. So we reactivate. Of course, that
5304 means we have to move it into the needs-to-be-flushed list */
5305 bi->handler_to_use=bi->handler;
5306 set_dhtu(bi,bi->direct_handler);
5307 bi->status=BI_CHECKING;
5308 isgood=called_check_checksum(bi);
5309 }
5310 if (isgood) {
5311 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5312 c1,c2,bi->c1,bi->c2);*/
5313 remove_from_list(bi);
5314 add_to_active(bi);
5315 raise_in_cl_list(bi);
5316 bi->status=BI_ACTIVE;
5317 }
5318 else {
5319 /* This block actually changed. We need to invalidate it,
5320 and set it up to be recompiled */
5321 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5322 c1,c2,bi->c1,bi->c2); */
5323 invalidate_block(bi);
5324 raise_in_cl_list(bi);
5325 }
5326 return isgood;
5327 }
5328
5329 static int called_check_checksum(blockinfo* bi)
5330 {
5331 dependency* x=bi->deplist;
5332 int isgood=1;
5333 int i;
5334
5335 for (i=0;i<2 && isgood;i++) {
5336 if (bi->dep[i].jmp_off) {
5337 isgood=block_check_checksum(bi->dep[i].target);
5338 }
5339 }
5340 return isgood;
5341 }
5342
5343 static void check_checksum(void)
5344 {
5345 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5346 uae_u32 cl=cacheline(regs.pc_p);
5347 blockinfo* bi2=get_blockinfo(cl);
5348
5349 /* These are not the droids you are looking for... */
5350 if (!bi) {
5351 /* Whoever is the primary target is in a dormant state, but
5352 calling it was accidental, and we should just compile this
5353 new block */
5354 execute_normal();
5355 return;
5356 }
5357 if (bi!=bi2) {
5358 /* The block was hit accidentally, but it does exist. Cache miss */
5359 cache_miss();
5360 return;
5361 }
5362
5363 if (!block_check_checksum(bi))
5364 execute_normal();
5365 }
5366
5367 static __inline__ void match_states(blockinfo* bi)
5368 {
5369 int i;
5370 smallstate* s=&(bi->env);
5371
5372 if (bi->status==BI_NEED_CHECK) {
5373 block_check_checksum(bi);
5374 }
5375 if (bi->status==BI_ACTIVE ||
5376 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5377 block makes (about not using
5378 certain vregs) */
5379 for (i=0;i<16;i++) {
5380 if (s->virt[i]==L_UNNEEDED) {
5381 // write_log("unneeded reg %d at %p\n",i,target);
5382 COMPCALL(forget_about)(i); // FIXME
5383 }
5384 }
5385 }
5386 flush(1);
5387
5388 /* And now deal with the *demands* the block makes */
5389 for (i=0;i<N_REGS;i++) {
5390 int v=s->nat[i];
5391 if (v>=0) {
5392 // printf("Loading reg %d into %d at %p\n",v,i,target);
5393 readreg_specific(v,4,i);
5394 // do_load_reg(i,v);
5395 // setlock(i);
5396 }
5397 }
5398 for (i=0;i<N_REGS;i++) {
5399 int v=s->nat[i];
5400 if (v>=0) {
5401 unlock2(i);
5402 }
5403 }
5404 }
5405
5406 static uae_u8 popallspace[1024]; /* That should be enough space */
5407
5408 static __inline__ void create_popalls(void)
5409 {
5410 int i,r;
5411
5412 current_compile_p=popallspace;
5413 set_target(current_compile_p);
5414 #if USE_PUSH_POP
5415 /* If we can't use gcc inline assembly, we need to pop some
5416 registers before jumping back to the various get-out routines.
5417 This generates the code for it.
5418 */
5419 popall_do_nothing=current_compile_p;
5420 for (i=0;i<N_REGS;i++) {
5421 if (need_to_preserve[i])
5422 raw_pop_l_r(i);
5423 }
5424 raw_jmp((uae_u32)do_nothing);
5425 align_target(32);
5426
5427 popall_execute_normal=get_target();
5428 for (i=0;i<N_REGS;i++) {
5429 if (need_to_preserve[i])
5430 raw_pop_l_r(i);
5431 }
5432 raw_jmp((uae_u32)execute_normal);
5433 align_target(32);
5434
5435 popall_cache_miss=get_target();
5436 for (i=0;i<N_REGS;i++) {
5437 if (need_to_preserve[i])
5438 raw_pop_l_r(i);
5439 }
5440 raw_jmp((uae_u32)cache_miss);
5441 align_target(32);
5442
5443 popall_recompile_block=get_target();
5444 for (i=0;i<N_REGS;i++) {
5445 if (need_to_preserve[i])
5446 raw_pop_l_r(i);
5447 }
5448 raw_jmp((uae_u32)recompile_block);
5449 align_target(32);
5450
5451 popall_exec_nostats=get_target();
5452 for (i=0;i<N_REGS;i++) {
5453 if (need_to_preserve[i])
5454 raw_pop_l_r(i);
5455 }
5456 raw_jmp((uae_u32)exec_nostats);
5457 align_target(32);
5458
5459 popall_check_checksum=get_target();
5460 for (i=0;i<N_REGS;i++) {
5461 if (need_to_preserve[i])
5462 raw_pop_l_r(i);
5463 }
5464 raw_jmp((uae_u32)check_checksum);
5465 align_target(32);
5466
5467 current_compile_p=get_target();
5468 #else
5469 popall_exec_nostats=(void *)exec_nostats;
5470 popall_execute_normal=(void *)execute_normal;
5471 popall_cache_miss=(void *)cache_miss;
5472 popall_recompile_block=(void *)recompile_block;
5473 popall_do_nothing=(void *)do_nothing;
5474 popall_check_checksum=(void *)check_checksum;
5475 pushall_call_handler=get_target();
5476 #endif
5477
5478 /* And now, the code to do the matching pushes and then jump
5479 into a handler routine */
5480 pushall_call_handler=get_target();
5481 #if USE_PUSH_POP
5482 for (i=N_REGS;i--;) {
5483 if (need_to_preserve[i])
5484 raw_push_l_r(i);
5485 }
5486 #endif
5487 r=REG_PC_TMP;
5488 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5489 raw_and_l_ri(r,TAGMASK);
5490 raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5491 }
5492
5493 static __inline__ void reset_lists(void)
5494 {
5495 int i;
5496
5497 for (i=0;i<MAX_HOLD_BI;i++)
5498 hold_bi[i]=NULL;
5499 active=NULL;
5500 dormant=NULL;
5501 }
5502
5503 static void prepare_block(blockinfo* bi)
5504 {
5505 int i;
5506
5507 set_target(current_compile_p);
5508 align_target(32);
5509 bi->direct_pen=(cpuop_func *)get_target();
5510 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5511 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5512 raw_jmp((uae_u32)popall_execute_normal);
5513
5514 align_target(32);
5515 bi->direct_pcc=(cpuop_func *)get_target();
5516 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5517 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5518 raw_jmp((uae_u32)popall_check_checksum);
5519
5520 align_target(32);
5521 current_compile_p=get_target();
5522
5523 bi->deplist=NULL;
5524 for (i=0;i<2;i++) {
5525 bi->dep[i].prev_p=NULL;
5526 bi->dep[i].next=NULL;
5527 }
5528 bi->env=default_ss;
5529 bi->status=BI_INVALID;
5530 bi->havestate=0;
5531 //bi->env=empty_ss;
5532 }
5533
5534 void build_comp(void)
5535 {
5536 int i;
5537 int jumpcount=0;
5538 unsigned long opcode;
5539 struct comptbl* tbl=op_smalltbl_0_comp_ff;
5540 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
5541 int count;
5542 int cpu_level = 0; // 68000 (default)
5543 if (CPUType == 4)
5544 cpu_level = 4; // 68040 with FPU
5545 else {
5546 if (FPUType)
5547 cpu_level = 3; // 68020 with FPU
5548 else if (CPUType >= 2)
5549 cpu_level = 2; // 68020
5550 else if (CPUType == 1)
5551 cpu_level = 1;
5552 }
5553 struct cputbl *nfctbl = (
5554 cpu_level == 4 ? op_smalltbl_0_nf
5555 : cpu_level == 3 ? op_smalltbl_1_nf
5556 : cpu_level == 2 ? op_smalltbl_2_nf
5557 : cpu_level == 1 ? op_smalltbl_3_nf
5558 : op_smalltbl_4_nf);
5559
5560 write_log ("<JIT compiler> : building compiler function tables\n");
5561
5562 for (opcode = 0; opcode < 65536; opcode++) {
5563 nfcpufunctbl[opcode] = op_illg_1;
5564 compfunctbl[opcode] = NULL;
5565 nfcompfunctbl[opcode] = NULL;
5566 prop[opcode].use_flags = 0x1f;
5567 prop[opcode].set_flags = 0x1f;
5568 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
5569 }
5570
5571 for (i = 0; tbl[i].opcode < 65536; i++) {
5572 int cflow = table68k[tbl[i].opcode].cflow;
5573 prop[cft_map(tbl[i].opcode)].cflow = cflow;
5574
5575 int uses_fpu = tbl[i].specific & 32;
5576 if (uses_fpu && avoid_fpu)
5577 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
5578 else
5579 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
5580 }
5581
5582 for (i = 0; nftbl[i].opcode < 65536; i++) {
5583 int uses_fpu = tbl[i].specific & 32;
5584 if (uses_fpu && avoid_fpu)
5585 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
5586 else
5587 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
5588
5589 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
5590 }
5591
5592 for (i = 0; nfctbl[i].handler; i++) {
5593 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
5594 }
5595
5596 for (opcode = 0; opcode < 65536; opcode++) {
5597 compop_func *f;
5598 compop_func *nff;
5599 cpuop_func *nfcf;
5600 int isaddx,cflow;
5601
5602 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
5603 continue;
5604
5605 if (table68k[opcode].handler != -1) {
5606 f = compfunctbl[cft_map(table68k[opcode].handler)];
5607 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
5608 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
5609 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
5610 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
5611 prop[cft_map(opcode)].cflow = cflow;
5612 prop[cft_map(opcode)].is_addx = isaddx;
5613 compfunctbl[cft_map(opcode)] = f;
5614 nfcompfunctbl[cft_map(opcode)] = nff;
5615 Dif (nfcf == op_illg_1)
5616 abort();
5617 nfcpufunctbl[cft_map(opcode)] = nfcf;
5618 }
5619 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
5620 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
5621 }
5622 for (i = 0; nfctbl[i].handler != NULL; i++) {
5623 if (nfctbl[i].specific)
5624 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
5625 }
5626
5627 count=0;
5628 for (opcode = 0; opcode < 65536; opcode++) {
5629 if (compfunctbl[cft_map(opcode)])
5630 count++;
5631 }
5632 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
5633
5634 /* Initialise state */
5635 create_popalls();
5636 alloc_cache();
5637 reset_lists();
5638
5639 for (i=0;i<TAGSIZE;i+=2) {
5640 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
5641 cache_tags[i+1].bi=NULL;
5642 }
5643
5644 #if 0
5645 for (i=0;i<N_REGS;i++) {
5646 empty_ss.nat[i].holds=-1;
5647 empty_ss.nat[i].validsize=0;
5648 empty_ss.nat[i].dirtysize=0;
5649 }
5650 #endif
5651 for (i=0;i<VREGS;i++) {
5652 empty_ss.virt[i]=L_NEEDED;
5653 }
5654 for (i=0;i<N_REGS;i++) {
5655 empty_ss.nat[i]=L_UNKNOWN;
5656 }
5657 default_ss=empty_ss;
5658 }
5659
5660
5661 static void flush_icache_none(int n)
5662 {
5663 /* Nothing to do. */
5664 }
5665
5666 static void flush_icache_hard(int n)
5667 {
5668 uae_u32 i;
5669 blockinfo* bi, *dbi;
5670
5671 hard_flush_count++;
5672 #if 0
5673 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
5674 n,regs.pc,regs.pc_p,current_cache_size/1024);
5675 current_cache_size = 0;
5676 #endif
5677 bi=active;
5678 while(bi) {
5679 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
5680 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
5681 dbi=bi; bi=bi->next;
5682 free_blockinfo(dbi);
5683 }
5684 bi=dormant;
5685 while(bi) {
5686 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
5687 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
5688 dbi=bi; bi=bi->next;
5689 free_blockinfo(dbi);
5690 }
5691
5692 reset_lists();
5693 if (!compiled_code)
5694 return;
5695 current_compile_p=compiled_code;
5696 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
5697 }
5698
5699
5700 /* "Soft flushing" --- instead of actually throwing everything away,
5701 we simply mark everything as "needs to be checked".
5702 */
5703
5704 static inline void flush_icache_lazy(int n)
5705 {
5706 uae_u32 i;
5707 blockinfo* bi;
5708 blockinfo* bi2;
5709
5710 soft_flush_count++;
5711 if (!active)
5712 return;
5713
5714 bi=active;
5715 while (bi) {
5716 uae_u32 cl=cacheline(bi->pc_p);
5717 if (bi->status==BI_INVALID ||
5718 bi->status==BI_NEED_RECOMP) {
5719 if (bi==cache_tags[cl+1].bi)
5720 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
5721 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
5722 set_dhtu(bi,bi->direct_pen);
5723 bi->status=BI_INVALID;
5724 }
5725 else {
5726 if (bi==cache_tags[cl+1].bi)
5727 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
5728 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
5729 set_dhtu(bi,bi->direct_pcc);
5730 bi->status=BI_NEED_CHECK;
5731 }
5732 bi2=bi;
5733 bi=bi->next;
5734 }
5735 /* bi2 is now the last entry in the active list */
5736 bi2->next=dormant;
5737 if (dormant)
5738 dormant->prev_p=&(bi2->next);
5739
5740 dormant=active;
5741 active->prev_p=&dormant;
5742 active=NULL;
5743 }
5744
5745 static void catastrophe(void)
5746 {
5747 abort();
5748 }
5749
5750 int failure;
5751
5752 #define TARGET_M68K 0
5753 #define TARGET_POWERPC 1
5754 #define TARGET_X86 2
5755 #if defined(i386) || defined(__i386__)
5756 #define TARGET_NATIVE TARGET_X86
5757 #endif
5758 #if defined(powerpc) || defined(__powerpc__)
5759 #define TARGET_NATIVE TARGET_POWERPC
5760 #endif
5761
5762 #ifdef ENABLE_MON
5763 static uae_u32 mon_read_byte_jit(uae_u32 addr)
5764 {
5765 uae_u8 *m = (uae_u8 *)addr;
5766 return (uae_u32)(*m);
5767 }
5768
5769 static void mon_write_byte_jit(uae_u32 addr, uae_u32 b)
5770 {
5771 uae_u8 *m = (uae_u8 *)addr;
5772 *m = b;
5773 }
5774 #endif
5775
5776 void disasm_block(int target, uint8 * start, size_t length)
5777 {
5778 if (!JITDebug)
5779 return;
5780
5781 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
5782 char disasm_str[200];
5783 sprintf(disasm_str, "%s $%x $%x",
5784 target == TARGET_M68K ? "d68" :
5785 target == TARGET_X86 ? "d86" :
5786 target == TARGET_POWERPC ? "d" : "x",
5787 start, start + length - 1);
5788
5789 uae_u32 (*old_mon_read_byte)(uae_u32) = mon_read_byte;
5790 void (*old_mon_write_byte)(uae_u32, uae_u32) = mon_write_byte;
5791
5792 mon_read_byte = mon_read_byte_jit;
5793 mon_write_byte = mon_write_byte_jit;
5794
5795 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
5796 mon(4, arg);
5797
5798 mon_read_byte = old_mon_read_byte;
5799 mon_write_byte = old_mon_write_byte;
5800 #endif
5801 }
5802
5803 static inline void disasm_native_block(uint8 *start, size_t length)
5804 {
5805 disasm_block(TARGET_NATIVE, start, length);
5806 }
5807
5808 static inline void disasm_m68k_block(uint8 *start, size_t length)
5809 {
5810 disasm_block(TARGET_M68K, start, length);
5811 }
5812
5813 #ifdef HAVE_GET_WORD_UNSWAPPED
5814 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
5815 #else
5816 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
5817 #endif
5818
5819 #if JIT_DEBUG
5820 static uae_u8 *last_regs_pc_p = 0;
5821 static uae_u8 *last_compiled_block_addr = 0;
5822
5823 void compiler_dumpstate(void)
5824 {
5825 if (!JITDebug)
5826 return;
5827
5828 write_log("### Host addresses\n");
5829 write_log("MEM_BASE : %x\n", MEMBaseDiff);
5830 write_log("PC_P : %p\n", &regs.pc_p);
5831 write_log("SPCFLAGS : %p\n", &regs.spcflags);
5832 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
5833 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
5834 write_log("\n");
5835
5836 write_log("### M68k processor state\n");
5837 m68k_dumpstate(0);
5838 write_log("\n");
5839
5840 write_log("### Block in Mac address space\n");
5841 write_log("M68K block : %p\n",
5842 (void *)get_virtual_address(last_regs_pc_p));
5843 write_log("Native block : %p (%d bytes)\n",
5844 (void *)get_virtual_address(last_compiled_block_addr),
5845 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
5846 write_log("\n");
5847 }
5848 #endif
5849
5850 static void compile_block(cpu_history* pc_hist, int blocklen)
5851 {
5852 if (letit && compiled_code) {
5853 #if PROFILE_COMPILE_TIME
5854 compile_count++;
5855 clock_t start_time = clock();
5856 #endif
5857 #if JIT_DEBUG
5858 bool disasm_block = false;
5859 #endif
5860
5861 /* OK, here we need to 'compile' a block */
5862 int i;
5863 int r;
5864 int was_comp=0;
5865 uae_u8 liveflags[MAXRUN+1];
5866 uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
5867 uae_u32 min_pcp=max_pcp;
5868 uae_u32 cl=cacheline(pc_hist[0].location);
5869 void* specflags=(void*)&regs.spcflags;
5870 blockinfo* bi=NULL;
5871 blockinfo* bi2;
5872 int extra_len=0;
5873
5874 redo_current_block=0;
5875 if (current_compile_p>=max_compile_start)
5876 flush_icache_hard(7);
5877
5878 alloc_blockinfos();
5879
5880 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
5881 bi2=get_blockinfo(cl);
5882
5883 optlev=bi->optlevel;
5884 if (bi->status!=BI_INVALID) {
5885 Dif (bi!=bi2) {
5886 /* I don't think it can happen anymore. Shouldn't, in
5887 any case. So let's make sure... */
5888 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
5889 bi->count,bi->optlevel,bi->handler_to_use,
5890 cache_tags[cl].handler);
5891 abort();
5892 }
5893
5894 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
5895 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
5896 /* What the heck? We are not supposed to be here! */
5897 abort();
5898 }
5899 }
5900 if (bi->count==-1) {
5901 optlev++;
5902 while (!optcount[optlev])
5903 optlev++;
5904 bi->count=optcount[optlev]-1;
5905 }
5906 current_block_pc_p=(uae_u32)pc_hist[0].location;
5907
5908 remove_deps(bi); /* We are about to create new code */
5909 bi->optlevel=optlev;
5910 bi->pc_p=(uae_u8*)pc_hist[0].location;
5911
5912 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
5913 i=blocklen;
5914 while (i--) {
5915 uae_u16* currpcp=pc_hist[i].location;
5916 uae_u32 op=DO_GET_OPCODE(currpcp);
5917
5918 if ((uae_u32)currpcp<min_pcp)
5919 min_pcp=(uae_u32)currpcp;
5920 if ((uae_u32)currpcp>max_pcp)
5921 max_pcp=(uae_u32)currpcp;
5922
5923 liveflags[i]=((liveflags[i+1]&
5924 (~prop[op].set_flags))|
5925 prop[op].use_flags);
5926 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
5927 liveflags[i]&= ~FLAG_Z;
5928 }
5929
5930 bi->needed_flags=liveflags[0];
5931
5932 align_target(32);
5933 was_comp=0;
5934
5935 bi->direct_handler=(cpuop_func *)get_target();
5936 set_dhtu(bi,bi->direct_handler);
5937 bi->status=BI_COMPILING;
5938 current_block_start_target=(uae_u32)get_target();
5939
5940 log_startblock();
5941
5942 if (bi->count>=0) { /* Need to generate countdown code */
5943 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
5944 raw_sub_l_mi((uae_u32)&(bi->count),1);
5945 raw_jl((uae_u32)popall_recompile_block);
5946 }
5947 if (optlev==0) { /* No need to actually translate */
5948 /* Execute normally without keeping stats */
5949 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
5950 raw_jmp((uae_u32)popall_exec_nostats);
5951 }
5952 else {
5953 reg_alloc_run=0;
5954 next_pc_p=0;
5955 taken_pc_p=0;
5956 branch_cc=0;
5957
5958 comp_pc_p=(uae_u8*)pc_hist[0].location;
5959 init_comp();
5960 was_comp=1;
5961
5962 #if JIT_DEBUG
5963 if (JITDebug) {
5964 raw_mov_l_mi((uae_u32)&last_regs_pc_p,(uae_u32)pc_hist[0].location);
5965 raw_mov_l_mi((uae_u32)&last_compiled_block_addr,(uae_u32)current_block_start_target);
5966 }
5967 #endif
5968
5969 for (i=0;i<blocklen &&
5970 get_target_noopt()<max_compile_start;i++) {
5971 cpuop_func **cputbl;
5972 compop_func **comptbl;
5973 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
5974 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
5975 if (!needed_flags) {
5976 cputbl=nfcpufunctbl;
5977 comptbl=nfcompfunctbl;
5978 }
5979 else {
5980 cputbl=cpufunctbl;
5981 comptbl=compfunctbl;
5982 }
5983
5984 failure = 1; // gb-- defaults to failure state
5985 if (comptbl[opcode] && optlev>1) {
5986 failure=0;
5987 if (!was_comp) {
5988 comp_pc_p=(uae_u8*)pc_hist[i].location;
5989 init_comp();
5990 }
5991 was_comp++;
5992
5993 comptbl[opcode](opcode);
5994 freescratch();
5995 if (!(liveflags[i+1] & FLAG_CZNV)) {
5996 /* We can forget about flags */
5997 dont_care_flags();
5998 }
5999 #if INDIVIDUAL_INST
6000 flush(1);
6001 nop();
6002 flush(1);
6003 was_comp=0;
6004 #endif
6005 }
6006
6007 if (failure) {
6008 if (was_comp) {
6009 flush(1);
6010 was_comp=0;
6011 }
6012 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6013 #if USE_NORMAL_CALLING_CONVENTION
6014 raw_push_l_r(REG_PAR1);
6015 #endif
6016 raw_mov_l_mi((uae_u32)&regs.pc_p,
6017 (uae_u32)pc_hist[i].location);
6018 raw_call((uae_u32)cputbl[opcode]);
6019 //raw_add_l_mi((uae_u32)&oink,1); // FIXME
6020 #if USE_NORMAL_CALLING_CONVENTION
6021 raw_inc_sp(4);
6022 #endif
6023 if (needed_flags) {
6024 //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode+65536);
6025 }
6026 else {
6027 //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode);
6028 }
6029
6030 if (i < blocklen - 1) {
6031 uae_s8* branchadd;
6032
6033 raw_mov_l_rm(0,(uae_u32)specflags);
6034 raw_test_l_rr(0,0);
6035 raw_jz_b_oponly();
6036 branchadd=(uae_s8 *)get_target();
6037 emit_byte(0);
6038 raw_jmp((uae_u32)popall_do_nothing);
6039 *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
6040 }
6041 }
6042 }
6043 #if 1 /* This isn't completely kosher yet; It really needs to be
6044 be integrated into a general inter-block-dependency scheme */
6045 if (next_pc_p && taken_pc_p &&
6046 was_comp && taken_pc_p==current_block_pc_p) {
6047 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6048 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6049 uae_u8 x=bi1->needed_flags;
6050
6051 if (x==0xff || 1) { /* To be on the safe side */
6052 uae_u16* next=(uae_u16*)next_pc_p;
6053 uae_u32 op=DO_GET_OPCODE(next);
6054
6055 x=0x1f;
6056 x&=(~prop[op].set_flags);
6057 x|=prop[op].use_flags;
6058 }
6059
6060 x|=bi2->needed_flags;
6061 if (!(x & FLAG_CZNV)) {
6062 /* We can forget about flags */
6063 dont_care_flags();
6064 extra_len+=2; /* The next instruction now is part of this
6065 block */
6066 }
6067
6068 }
6069 #endif
6070 log_flush();
6071
6072 if (next_pc_p) { /* A branch was registered */
6073 uae_u32 t1=next_pc_p;
6074 uae_u32 t2=taken_pc_p;
6075 int cc=branch_cc;
6076
6077 uae_u32* branchadd;
6078 uae_u32* tba;
6079 bigstate tmp;
6080 blockinfo* tbi;
6081
6082 if (taken_pc_p<next_pc_p) {
6083 /* backward branch. Optimize for the "taken" case ---
6084 which means the raw_jcc should fall through when
6085 the 68k branch is taken. */
6086 t1=taken_pc_p;
6087 t2=next_pc_p;
6088 cc=branch_cc^1;
6089 }
6090
6091 tmp=live; /* ouch! This is big... */
6092 raw_jcc_l_oponly(cc);
6093 branchadd=(uae_u32*)get_target();
6094 emit_long(0);
6095
6096 /* predicted outcome */
6097 tbi=get_blockinfo_addr_new((void*)t1,1);
6098 match_states(tbi);
6099 raw_cmp_l_mi((uae_u32)specflags,0);
6100 raw_jcc_l_oponly(4);
6101 tba=(uae_u32*)get_target();
6102 emit_long(get_handler(t1)-((uae_u32)tba+4));
6103 raw_mov_l_mi((uae_u32)&regs.pc_p,t1);
6104 raw_jmp((uae_u32)popall_do_nothing);
6105 create_jmpdep(bi,0,tba,t1);
6106
6107 align_target(16);
6108 /* not-predicted outcome */
6109 *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6110 live=tmp; /* Ouch again */
6111 tbi=get_blockinfo_addr_new((void*)t2,1);
6112 match_states(tbi);
6113
6114 //flush(1); /* Can only get here if was_comp==1 */
6115 raw_cmp_l_mi((uae_u32)specflags,0);
6116 raw_jcc_l_oponly(4);
6117 tba=(uae_u32*)get_target();
6118 emit_long(get_handler(t2)-((uae_u32)tba+4));
6119 raw_mov_l_mi((uae_u32)&regs.pc_p,t2);
6120 raw_jmp((uae_u32)popall_do_nothing);
6121 create_jmpdep(bi,1,tba,t2);
6122 }
6123 else
6124 {
6125 if (was_comp) {
6126 flush(1);
6127 }
6128
6129 /* Let's find out where next_handler is... */
6130 if (was_comp && isinreg(PC_P)) {
6131 r=live.state[PC_P].realreg;
6132 raw_and_l_ri(r,TAGMASK);
6133 int r2 = (r==0) ? 1 : 0;
6134 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6135 raw_cmp_l_mi((uae_u32)specflags,0);
6136 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6137 raw_jmp_r(r2);
6138 }
6139 else if (was_comp && isconst(PC_P)) {
6140 uae_u32 v=live.state[PC_P].val;
6141 uae_u32* tba;
6142 blockinfo* tbi;
6143
6144 tbi=get_blockinfo_addr_new((void*)v,1);
6145 match_states(tbi);
6146
6147 raw_cmp_l_mi((uae_u32)specflags,0);
6148 raw_jcc_l_oponly(4);
6149 tba=(uae_u32*)get_target();
6150 emit_long(get_handler(v)-((uae_u32)tba+4));
6151 raw_mov_l_mi((uae_u32)&regs.pc_p,v);
6152 raw_jmp((uae_u32)popall_do_nothing);
6153 create_jmpdep(bi,0,tba,v);
6154 }
6155 else {
6156 r=REG_PC_TMP;
6157 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
6158 raw_and_l_ri(r,TAGMASK);
6159 int r2 = (r==0) ? 1 : 0;
6160 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6161 raw_cmp_l_mi((uae_u32)specflags,0);
6162 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6163 raw_jmp_r(r2);
6164 }
6165 }
6166 }
6167
6168 #if USE_MATCH
6169 if (callers_need_recompile(&live,&(bi->env))) {
6170 mark_callers_recompile(bi);
6171 }
6172
6173 big_to_small_state(&live,&(bi->env));
6174 #endif
6175
6176 if (next_pc_p+extra_len>=max_pcp &&
6177 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6178 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6179 else
6180 max_pcp+=LONGEST_68K_INST;
6181 bi->len=max_pcp-min_pcp;
6182 bi->min_pcp=min_pcp;
6183
6184 remove_from_list(bi);
6185 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6186 add_to_dormant(bi); /* No need to checksum it on cache flush.
6187 Please don't start changing ROMs in
6188 flight! */
6189 }
6190 else {
6191 calc_checksum(bi,&(bi->c1),&(bi->c2));
6192 add_to_active(bi);
6193 }
6194
6195 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6196
6197 #if JIT_DEBUG
6198 if (JITDebug)
6199 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6200
6201 if (JITDebug && disasm_block) {
6202 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6203 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6204 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6205 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6206 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6207 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6208 getchar();
6209 }
6210 #endif
6211
6212 log_dump();
6213 align_target(32);
6214
6215 /* This is the non-direct handler */
6216 bi->handler=
6217 bi->handler_to_use=(cpuop_func *)get_target();
6218 raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6219 raw_jnz((uae_u32)popall_cache_miss);
6220 comp_pc_p=(uae_u8*)pc_hist[0].location;
6221
6222 bi->status=BI_FINALIZING;
6223 init_comp();
6224 match_states(bi);
6225 flush(1);
6226
6227 raw_jmp((uae_u32)bi->direct_handler);
6228
6229 align_target(32);
6230 current_compile_p=get_target();
6231
6232 raise_in_cl_list(bi);
6233
6234 /* We will flush soon, anyway, so let's do it now */
6235 if (current_compile_p>=max_compile_start)
6236 flush_icache_hard(7);
6237
6238 bi->status=BI_ACTIVE;
6239 if (redo_current_block)
6240 block_need_recompile(bi);
6241
6242 #if PROFILE_COMPILE_TIME
6243 compile_time += (clock() - start_time);
6244 #endif
6245 }
6246 }
6247
6248 void do_nothing(void)
6249 {
6250 /* What did you expect this to do? */
6251 }
6252
6253 void exec_nostats(void)
6254 {
6255 for (;;) {
6256 uae_u32 opcode = GET_OPCODE;
6257 #ifdef X86_ASSEMBLY__disable
6258 __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6259 : : "b" (cpufunctbl[opcode]), "a" (opcode)
6260 : "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc");
6261 #else
6262 (*cpufunctbl[opcode])(opcode);
6263 #endif
6264 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6265 return; /* We will deal with the spcflags in the caller */
6266 }
6267 }
6268 }
6269
6270 void execute_normal(void)
6271 {
6272 if (!check_for_cache_miss()) {
6273 cpu_history pc_hist[MAXRUN];
6274 int blocklen = 0;
6275 #if REAL_ADDRESSING || DIRECT_ADDRESSING
6276 start_pc_p = regs.pc_p;
6277 start_pc = get_virtual_address(regs.pc_p);
6278 #else
6279 start_pc_p = regs.pc_oldp;
6280 start_pc = regs.pc;
6281 #endif
6282 for (;;) { /* Take note: This is the do-it-normal loop */
6283 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6284 uae_u32 opcode = GET_OPCODE;
6285 #if FLIGHT_RECORDER
6286 m68k_record_step(m68k_getpc());
6287 #endif
6288 #ifdef X86_ASSEMBLY__disable
6289 __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6290 : : "b" (cpufunctbl[opcode]), "a" (opcode)
6291 : "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc");
6292 #else
6293 (*cpufunctbl[opcode])(opcode);
6294 #endif
6295 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6296 compile_block(pc_hist, blocklen);
6297 return; /* We will deal with the spcflags in the caller */
6298 }
6299 /* No need to check regs.spcflags, because if they were set,
6300 we'd have ended up inside that "if" */
6301 }
6302 }
6303 }
6304
6305 typedef void (*compiled_handler)(void);
6306
6307 void m68k_do_compile_execute(void)
6308 {
6309 for (;;) {
6310 #ifdef X86_ASSEMBLY
6311 __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6312 : : "b" (cache_tags[cacheline(regs.pc_p)].handler)
6313 : "%edx", "%ecx", "%eax", "%esi", "%edi", "%ebp", "memory", "cc");
6314 #else
6315 ((compiled_handler)(pushall_call_handler))();
6316 #endif
6317 /* Whenever we return from that, we should check spcflags */
6318 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6319 if (m68k_do_specialties ())
6320 return;
6321 }
6322 }
6323 }