ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.3
Committed: 2002-09-18T09:55:37Z (21 years, 9 months ago) by gbeauche
Branch: MAIN
Changes since 1.2: +3 -11 lines
Log Message:
Don't forget to use vm_realease() to free up translation cache. Also free
the right amount of memory that was previously allocated.

File Contents

# Content
1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
2 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
3 #endif
4
5 #define USE_MATCH 0
6
7 /* kludge for Brian, so he can compile under MSVC++ */
8 #define USE_NORMAL_CALLING_CONVENTION 0
9
10 #ifndef WIN32
11 #include <sys/types.h>
12 #include <sys/mman.h>
13 #endif
14
15 #include <stdlib.h>
16 #include <fcntl.h>
17 #include <errno.h>
18
19 #include "sysdeps.h"
20 #include "cpu_emulation.h"
21 #include "main.h"
22 #include "prefs.h"
23 #include "user_strings.h"
24 #include "vm_alloc.h"
25
26 #include "m68k.h"
27 #include "memory.h"
28 #include "readcpu.h"
29 #include "newcpu.h"
30 #include "comptbl.h"
31 #include "compiler/compemu.h"
32 #include "fpu/fpu.h"
33 #include "fpu/flags.h"
34
35 #define DEBUG 1
36 #include "debug.h"
37
38 #ifdef ENABLE_MON
39 #include "mon.h"
40 #endif
41
42 #ifndef WIN32
43 #define PROFILE_COMPILE_TIME 1
44 #endif
45
46 #ifdef WIN32
47 #undef write_log
48 #define write_log dummy_write_log
49 static void dummy_write_log(const char *, ...) { }
50 #endif
51
52 #if JIT_DEBUG
53 #undef abort
54 #define abort() do { \
55 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
56 exit(EXIT_FAILURE); \
57 } while (0)
58 #endif
59
60 #if PROFILE_COMPILE_TIME
61 #include <time.h>
62 static uae_u32 compile_count = 0;
63 static clock_t compile_time = 0;
64 static clock_t emul_start_time = 0;
65 static clock_t emul_end_time = 0;
66 #endif
67
68 compop_func *compfunctbl[65536];
69 compop_func *nfcompfunctbl[65536];
70 cpuop_func *nfcpufunctbl[65536];
71 uae_u8* comp_pc_p;
72
73 // gb-- Extra data for Basilisk II/JIT
74 #if JIT_DEBUG
75 static bool JITDebug = false; // Enable runtime disassemblers through mon?
76 #else
77 const bool JITDebug = false; // Don't use JIT debug mode at all
78 #endif
79
80 const uae_u32 MIN_CACHE_SIZE = 2048; // Minimal translation cache size (2048 KB)
81 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
82 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
83 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
84 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
85 static bool have_cmov = false; // target has CMOV instructions ?
86 static bool have_rat_stall = true; // target has partial register stalls ?
87 static int zero_fd = -1;
88 static int optcount[10] = {
89 10, // How often a block has to be executed before it is translated
90 0, // How often to use naive translation
91 0, 0, 0, 0,
92 -1, -1, -1, -1
93 };
94
95 struct op_properties {
96 uae_u8 use_flags;
97 uae_u8 set_flags;
98 uae_u8 is_addx;
99 uae_u8 cflow;
100 };
101 static op_properties prop[65536];
102
103 // gb-- Control Flow Predicates
104
105 static inline int end_block(uae_u32 opcode)
106 {
107 return (prop[opcode].cflow & fl_end_block);
108 }
109
110 static inline bool may_trap(uae_u32 opcode)
111 {
112 return (prop[opcode].cflow & fl_trap);
113 }
114
115 uae_u8* start_pc_p;
116 uae_u32 start_pc;
117 uae_u32 current_block_pc_p;
118 uae_u32 current_block_start_target;
119 uae_u32 needed_flags;
120 static uae_u32 next_pc_p;
121 static uae_u32 taken_pc_p;
122 static int branch_cc;
123 static int redo_current_block;
124
125 int segvcount=0;
126 int soft_flush_count=0;
127 int hard_flush_count=0;
128 int checksum_count=0;
129 static uae_u8* current_compile_p=NULL;
130 static uae_u8* max_compile_start;
131 static uae_u8* compiled_code=NULL;
132 static uae_s32 reg_alloc_run;
133
134 void* pushall_call_handler=NULL;
135 static void* popall_do_nothing=NULL;
136 static void* popall_exec_nostats=NULL;
137 static void* popall_execute_normal=NULL;
138 static void* popall_cache_miss=NULL;
139 static void* popall_recompile_block=NULL;
140 static void* popall_check_checksum=NULL;
141
142 extern uae_u32 oink;
143 extern unsigned long foink3;
144 extern unsigned long foink;
145
146 /* The 68k only ever executes from even addresses. So right now, we
147 * waste half the entries in this array
148 * UPDATE: We now use those entries to store the start of the linked
149 * lists that we maintain for each hash result.
150 */
151 cacheline cache_tags[TAGSIZE];
152 int letit=0;
153 blockinfo* hold_bi[MAX_HOLD_BI];
154 blockinfo* active;
155 blockinfo* dormant;
156
157 /* 68040 */
158 extern struct cputbl op_smalltbl_0_nf[];
159 extern struct comptbl op_smalltbl_0_comp_nf[];
160 extern struct comptbl op_smalltbl_0_comp_ff[];
161
162 /* 68020 + 68881 */
163 extern struct cputbl op_smalltbl_1_nf[];
164
165 /* 68020 */
166 extern struct cputbl op_smalltbl_2_nf[];
167
168 /* 68010 */
169 extern struct cputbl op_smalltbl_3_nf[];
170
171 /* 68000 */
172 extern struct cputbl op_smalltbl_4_nf[];
173
174 /* 68000 slow but compatible. */
175 extern struct cputbl op_smalltbl_5_nf[];
176
177 static void flush_icache_hard(int n);
178 static void flush_icache_lazy(int n);
179 static void flush_icache_none(int n);
180 void (*flush_icache)(int n) = flush_icache_none;
181
182
183
184 bigstate live;
185 smallstate empty_ss;
186 smallstate default_ss;
187 static int optlev;
188
189 static int writereg(int r, int size);
190 static void unlock2(int r);
191 static void setlock(int r);
192 static int readreg_specific(int r, int size, int spec);
193 static int writereg_specific(int r, int size, int spec);
194 static void prepare_for_call_1(void);
195 static void prepare_for_call_2(void);
196 static void align_target(uae_u32 a);
197
198 static uae_s32 nextused[VREGS];
199
200 uae_u32 m68k_pc_offset;
201
202 /* Some arithmetic ooperations can be optimized away if the operands
203 * are known to be constant. But that's only a good idea when the
204 * side effects they would have on the flags are not important. This
205 * variable indicates whether we need the side effects or not
206 */
207 uae_u32 needflags=0;
208
209 /* Flag handling is complicated.
210 *
211 * x86 instructions create flags, which quite often are exactly what we
212 * want. So at times, the "68k" flags are actually in the x86 flags.
213 *
214 * Then again, sometimes we do x86 instructions that clobber the x86
215 * flags, but don't represent a corresponding m68k instruction. In that
216 * case, we have to save them.
217 *
218 * We used to save them to the stack, but now store them back directly
219 * into the regflags.cznv of the traditional emulation. Thus some odd
220 * names.
221 *
222 * So flags can be in either of two places (used to be three; boy were
223 * things complicated back then!); And either place can contain either
224 * valid flags or invalid trash (and on the stack, there was also the
225 * option of "nothing at all", now gone). A couple of variables keep
226 * track of the respective states.
227 *
228 * To make things worse, we might or might not be interested in the flags.
229 * by default, we are, but a call to dont_care_flags can change that
230 * until the next call to live_flags. If we are not, pretty much whatever
231 * is in the register and/or the native flags is seen as valid.
232 */
233
234 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
235 {
236 return cache_tags[cl+1].bi;
237 }
238
239 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
240 {
241 blockinfo* bi=get_blockinfo(cacheline(addr));
242
243 while (bi) {
244 if (bi->pc_p==addr)
245 return bi;
246 bi=bi->next_same_cl;
247 }
248 return NULL;
249 }
250
251
252 /*******************************************************************
253 * All sorts of list related functions for all of the lists *
254 *******************************************************************/
255
256 static __inline__ void remove_from_cl_list(blockinfo* bi)
257 {
258 uae_u32 cl=cacheline(bi->pc_p);
259
260 if (bi->prev_same_cl_p)
261 *(bi->prev_same_cl_p)=bi->next_same_cl;
262 if (bi->next_same_cl)
263 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
264 if (cache_tags[cl+1].bi)
265 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
266 else
267 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
268 }
269
270 static __inline__ void remove_from_list(blockinfo* bi)
271 {
272 if (bi->prev_p)
273 *(bi->prev_p)=bi->next;
274 if (bi->next)
275 bi->next->prev_p=bi->prev_p;
276 }
277
278 static __inline__ void remove_from_lists(blockinfo* bi)
279 {
280 remove_from_list(bi);
281 remove_from_cl_list(bi);
282 }
283
284 static __inline__ void add_to_cl_list(blockinfo* bi)
285 {
286 uae_u32 cl=cacheline(bi->pc_p);
287
288 if (cache_tags[cl+1].bi)
289 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
290 bi->next_same_cl=cache_tags[cl+1].bi;
291
292 cache_tags[cl+1].bi=bi;
293 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
294
295 cache_tags[cl].handler=bi->handler_to_use;
296 }
297
298 static __inline__ void raise_in_cl_list(blockinfo* bi)
299 {
300 remove_from_cl_list(bi);
301 add_to_cl_list(bi);
302 }
303
304 static __inline__ void add_to_active(blockinfo* bi)
305 {
306 if (active)
307 active->prev_p=&(bi->next);
308 bi->next=active;
309
310 active=bi;
311 bi->prev_p=&active;
312 }
313
314 static __inline__ void add_to_dormant(blockinfo* bi)
315 {
316 if (dormant)
317 dormant->prev_p=&(bi->next);
318 bi->next=dormant;
319
320 dormant=bi;
321 bi->prev_p=&dormant;
322 }
323
324 static __inline__ void remove_dep(dependency* d)
325 {
326 if (d->prev_p)
327 *(d->prev_p)=d->next;
328 if (d->next)
329 d->next->prev_p=d->prev_p;
330 d->prev_p=NULL;
331 d->next=NULL;
332 }
333
334 /* This block's code is about to be thrown away, so it no longer
335 depends on anything else */
336 static __inline__ void remove_deps(blockinfo* bi)
337 {
338 remove_dep(&(bi->dep[0]));
339 remove_dep(&(bi->dep[1]));
340 }
341
342 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
343 {
344 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
345 }
346
347 /********************************************************************
348 * Soft flush handling support functions *
349 ********************************************************************/
350
351 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
352 {
353 //write_log("bi is %p\n",bi);
354 if (dh!=bi->direct_handler_to_use) {
355 dependency* x=bi->deplist;
356 //write_log("bi->deplist=%p\n",bi->deplist);
357 while (x) {
358 //write_log("x is %p\n",x);
359 //write_log("x->next is %p\n",x->next);
360 //write_log("x->prev_p is %p\n",x->prev_p);
361
362 if (x->jmp_off) {
363 adjust_jmpdep(x,dh);
364 }
365 x=x->next;
366 }
367 bi->direct_handler_to_use=dh;
368 }
369 }
370
371 static __inline__ void invalidate_block(blockinfo* bi)
372 {
373 int i;
374
375 bi->optlevel=0;
376 bi->count=optcount[0]-1;
377 bi->handler=NULL;
378 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
379 bi->direct_handler=NULL;
380 set_dhtu(bi,bi->direct_pen);
381 bi->needed_flags=0xff;
382 bi->status=BI_INVALID;
383 for (i=0;i<2;i++) {
384 bi->dep[i].jmp_off=NULL;
385 bi->dep[i].target=NULL;
386 }
387 remove_deps(bi);
388 }
389
390 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
391 {
392 blockinfo* tbi=get_blockinfo_addr((void*)target);
393
394 Dif(!tbi) {
395 write_log("Could not create jmpdep!\n");
396 abort();
397 }
398 bi->dep[i].jmp_off=jmpaddr;
399 bi->dep[i].source=bi;
400 bi->dep[i].target=tbi;
401 bi->dep[i].next=tbi->deplist;
402 if (bi->dep[i].next)
403 bi->dep[i].next->prev_p=&(bi->dep[i].next);
404 bi->dep[i].prev_p=&(tbi->deplist);
405 tbi->deplist=&(bi->dep[i]);
406 }
407
408 static __inline__ void block_need_recompile(blockinfo * bi)
409 {
410 uae_u32 cl = cacheline(bi->pc_p);
411
412 set_dhtu(bi, bi->direct_pen);
413 bi->direct_handler = bi->direct_pen;
414
415 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
416 bi->handler = (cpuop_func *)popall_execute_normal;
417 if (bi == cache_tags[cl + 1].bi)
418 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
419 bi->status = BI_NEED_RECOMP;
420 }
421
422 static __inline__ void mark_callers_recompile(blockinfo * bi)
423 {
424 dependency *x = bi->deplist;
425
426 while (x) {
427 dependency *next = x->next; /* This disappears when we mark for
428 * recompilation and thus remove the
429 * blocks from the lists */
430 if (x->jmp_off) {
431 blockinfo *cbi = x->source;
432
433 Dif(cbi->status == BI_INVALID) {
434 // write_log("invalid block in dependency list\n"); // FIXME?
435 // abort();
436 }
437 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
438 block_need_recompile(cbi);
439 mark_callers_recompile(cbi);
440 }
441 else if (cbi->status == BI_COMPILING) {
442 redo_current_block = 1;
443 }
444 else if (cbi->status == BI_NEED_RECOMP) {
445 /* nothing */
446 }
447 else {
448 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
449 }
450 }
451 x = next;
452 }
453 }
454
455 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
456 {
457 blockinfo* bi=get_blockinfo_addr(addr);
458 int i;
459
460 if (!bi) {
461 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
462 if (hold_bi[i]) {
463 uae_u32 cl=cacheline(addr);
464
465 bi=hold_bi[i];
466 hold_bi[i]=NULL;
467 bi->pc_p=(uae_u8 *)addr;
468 invalidate_block(bi);
469 add_to_active(bi);
470 add_to_cl_list(bi);
471
472 }
473 }
474 }
475 if (!bi) {
476 write_log("Looking for blockinfo, can't find free one\n");
477 abort();
478 }
479 return bi;
480 }
481
482 static void prepare_block(blockinfo* bi);
483
484 /* Managment of blockinfos.
485
486 A blockinfo struct is allocated whenever a new block has to be
487 compiled. If the list of free blockinfos is empty, we allocate a new
488 pool of blockinfos and link the newly created blockinfos altogether
489 into the list of free blockinfos. Otherwise, we simply pop a structure
490 of the free list.
491
492 Blockinfo are lazily deallocated, i.e. chained altogether in the
493 list of free blockinfos whenvever a translation cache flush (hard or
494 soft) request occurs.
495 */
496
497 #if USE_SEPARATE_BIA
498 const int BLOCKINFO_POOL_SIZE = 128;
499 struct blockinfo_pool {
500 blockinfo bi[BLOCKINFO_POOL_SIZE];
501 blockinfo_pool *next;
502 };
503 static blockinfo_pool * blockinfo_pools = 0;
504 static blockinfo * free_blockinfos = 0;
505 #endif
506
507 static __inline__ blockinfo *alloc_blockinfo(void)
508 {
509 #if USE_SEPARATE_BIA
510 if (!free_blockinfos) {
511 // There is no blockinfo struct left, allocate a new
512 // pool and link the chunks into the free list
513 blockinfo_pool *bi_pool = (blockinfo_pool *)malloc(sizeof(blockinfo_pool));
514 for (blockinfo *bi = &bi_pool->bi[0]; bi < &bi_pool->bi[BLOCKINFO_POOL_SIZE]; bi++) {
515 bi->next = free_blockinfos;
516 free_blockinfos = bi;
517 }
518 bi_pool->next = blockinfo_pools;
519 blockinfo_pools = bi_pool;
520 }
521 blockinfo *bi = free_blockinfos;
522 free_blockinfos = bi->next;
523 #else
524 blockinfo *bi = (blockinfo*)current_compile_p;
525 current_compile_p += sizeof(blockinfo);
526 #endif
527 return bi;
528 }
529
530 static __inline__ void free_blockinfo(blockinfo *bi)
531 {
532 #if USE_SEPARATE_BIA
533 bi->next = free_blockinfos;
534 free_blockinfos = bi;
535 #endif
536 }
537
538 static void free_blockinfo_pools(void)
539 {
540 #if USE_SEPARATE_BIA
541 int blockinfo_pool_count = 0;
542 blockinfo_pool *curr_pool = blockinfo_pools;
543 while (curr_pool) {
544 blockinfo_pool_count++;
545 blockinfo_pool *dead_pool = curr_pool;
546 curr_pool = curr_pool->next;
547 free(dead_pool);
548 }
549
550 uae_u32 blockinfo_pools_size = blockinfo_pool_count * BLOCKINFO_POOL_SIZE * sizeof(blockinfo);
551 write_log("### Blockinfo allocation statistics\n");
552 write_log("Number of blockinfo pools : %d\n", blockinfo_pool_count);
553 write_log("Total number of blockinfos : %d (%d KB)\n",
554 blockinfo_pool_count * BLOCKINFO_POOL_SIZE,
555 blockinfo_pools_size / 1024);
556 write_log("\n");
557 #endif
558 }
559
560 static __inline__ void alloc_blockinfos(void)
561 {
562 int i;
563 blockinfo* bi;
564
565 for (i=0;i<MAX_HOLD_BI;i++) {
566 if (hold_bi[i])
567 return;
568 bi=hold_bi[i]=alloc_blockinfo();
569 prepare_block(bi);
570 }
571 }
572
573 /********************************************************************
574 * Functions to emit data into memory, and other general support *
575 ********************************************************************/
576
577 static uae_u8* target;
578
579 static void emit_init(void)
580 {
581 }
582
583 static __inline__ void emit_byte(uae_u8 x)
584 {
585 *target++=x;
586 }
587
588 static __inline__ void emit_word(uae_u16 x)
589 {
590 *((uae_u16*)target)=x;
591 target+=2;
592 }
593
594 static __inline__ void emit_long(uae_u32 x)
595 {
596 *((uae_u32*)target)=x;
597 target+=4;
598 }
599
600 static __inline__ uae_u32 reverse32(uae_u32 v)
601 {
602 #if 1
603 // gb-- We have specialized byteswapping functions, just use them
604 return do_byteswap_32(v);
605 #else
606 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
607 #endif
608 }
609
610 /********************************************************************
611 * Getting the information about the target CPU *
612 ********************************************************************/
613
614 #include "codegen_x86.cpp"
615
616 void set_target(uae_u8* t)
617 {
618 target=t;
619 }
620
621 static __inline__ uae_u8* get_target_noopt(void)
622 {
623 return target;
624 }
625
626 __inline__ uae_u8* get_target(void)
627 {
628 return get_target_noopt();
629 }
630
631
632 /********************************************************************
633 * Flags status handling. EMIT TIME! *
634 ********************************************************************/
635
636 static void bt_l_ri_noclobber(R4 r, IMM i);
637
638 static void make_flags_live_internal(void)
639 {
640 if (live.flags_in_flags==VALID)
641 return;
642 Dif (live.flags_on_stack==TRASH) {
643 write_log("Want flags, got something on stack, but it is TRASH\n");
644 abort();
645 }
646 if (live.flags_on_stack==VALID) {
647 int tmp;
648 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
649 raw_reg_to_flags(tmp);
650 unlock2(tmp);
651
652 live.flags_in_flags=VALID;
653 return;
654 }
655 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
656 live.flags_in_flags,live.flags_on_stack);
657 abort();
658 }
659
660 static void flags_to_stack(void)
661 {
662 if (live.flags_on_stack==VALID)
663 return;
664 if (!live.flags_are_important) {
665 live.flags_on_stack=VALID;
666 return;
667 }
668 Dif (live.flags_in_flags!=VALID)
669 abort();
670 else {
671 int tmp;
672 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
673 raw_flags_to_reg(tmp);
674 unlock2(tmp);
675 }
676 live.flags_on_stack=VALID;
677 }
678
679 static __inline__ void clobber_flags(void)
680 {
681 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
682 flags_to_stack();
683 live.flags_in_flags=TRASH;
684 }
685
686 /* Prepare for leaving the compiled stuff */
687 static __inline__ void flush_flags(void)
688 {
689 flags_to_stack();
690 return;
691 }
692
693 int touchcnt;
694
695 /********************************************************************
696 * register allocation per block logging *
697 ********************************************************************/
698
699 static uae_s8 vstate[VREGS];
700 static uae_s8 vwritten[VREGS];
701 static uae_s8 nstate[N_REGS];
702
703 #define L_UNKNOWN -127
704 #define L_UNAVAIL -1
705 #define L_NEEDED -2
706 #define L_UNNEEDED -3
707
708 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
709 {
710 int i;
711
712 for (i = 0; i < VREGS; i++)
713 s->virt[i] = vstate[i];
714 for (i = 0; i < N_REGS; i++)
715 s->nat[i] = nstate[i];
716 }
717
718 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
719 {
720 int i;
721 int reverse = 0;
722
723 for (i = 0; i < VREGS; i++) {
724 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
725 return 1;
726 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
727 reverse++;
728 }
729 for (i = 0; i < N_REGS; i++) {
730 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
731 return 1;
732 if (nstate[i] < 0 && s->nat[i] >= 0)
733 reverse++;
734 }
735 if (reverse >= 2 && USE_MATCH)
736 return 1; /* In this case, it might be worth recompiling the
737 * callers */
738 return 0;
739 }
740
741 static __inline__ void log_startblock(void)
742 {
743 int i;
744
745 for (i = 0; i < VREGS; i++) {
746 vstate[i] = L_UNKNOWN;
747 vwritten[i] = 0;
748 }
749 for (i = 0; i < N_REGS; i++)
750 nstate[i] = L_UNKNOWN;
751 }
752
753 /* Using an n-reg for a temp variable */
754 static __inline__ void log_isused(int n)
755 {
756 if (nstate[n] == L_UNKNOWN)
757 nstate[n] = L_UNAVAIL;
758 }
759
760 static __inline__ void log_visused(int r)
761 {
762 if (vstate[r] == L_UNKNOWN)
763 vstate[r] = L_NEEDED;
764 }
765
766 static __inline__ void do_load_reg(int n, int r)
767 {
768 if (r == FLAGTMP)
769 raw_load_flagreg(n, r);
770 else if (r == FLAGX)
771 raw_load_flagx(n, r);
772 else
773 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
774 }
775
776 static __inline__ void check_load_reg(int n, int r)
777 {
778 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
779 }
780
781 static __inline__ void log_vwrite(int r)
782 {
783 vwritten[r] = 1;
784 }
785
786 /* Using an n-reg to hold a v-reg */
787 static __inline__ void log_isreg(int n, int r)
788 {
789 static int count = 0;
790
791 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
792 nstate[n] = r;
793 else {
794 do_load_reg(n, r);
795 if (nstate[n] == L_UNKNOWN)
796 nstate[n] = L_UNAVAIL;
797 }
798 if (vstate[r] == L_UNKNOWN)
799 vstate[r] = L_NEEDED;
800 }
801
802 static __inline__ void log_clobberreg(int r)
803 {
804 if (vstate[r] == L_UNKNOWN)
805 vstate[r] = L_UNNEEDED;
806 }
807
808 /* This ends all possibility of clever register allocation */
809
810 static __inline__ void log_flush(void)
811 {
812 int i;
813
814 for (i = 0; i < VREGS; i++)
815 if (vstate[i] == L_UNKNOWN)
816 vstate[i] = L_NEEDED;
817 for (i = 0; i < N_REGS; i++)
818 if (nstate[i] == L_UNKNOWN)
819 nstate[i] = L_UNAVAIL;
820 }
821
822 static __inline__ void log_dump(void)
823 {
824 int i;
825
826 return;
827
828 write_log("----------------------\n");
829 for (i = 0; i < N_REGS; i++) {
830 switch (nstate[i]) {
831 case L_UNKNOWN:
832 write_log("Nat %d : UNKNOWN\n", i);
833 break;
834 case L_UNAVAIL:
835 write_log("Nat %d : UNAVAIL\n", i);
836 break;
837 default:
838 write_log("Nat %d : %d\n", i, nstate[i]);
839 break;
840 }
841 }
842 for (i = 0; i < VREGS; i++) {
843 if (vstate[i] == L_UNNEEDED)
844 write_log("Virt %d: UNNEEDED\n", i);
845 }
846 }
847
848 /********************************************************************
849 * register status handling. EMIT TIME! *
850 ********************************************************************/
851
852 static __inline__ void set_status(int r, int status)
853 {
854 if (status == ISCONST)
855 log_clobberreg(r);
856 live.state[r].status=status;
857 }
858
859 static __inline__ int isinreg(int r)
860 {
861 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
862 }
863
864 static __inline__ void adjust_nreg(int r, uae_u32 val)
865 {
866 if (!val)
867 return;
868 raw_lea_l_brr(r,r,val);
869 }
870
871 static void tomem(int r)
872 {
873 int rr=live.state[r].realreg;
874
875 if (isinreg(r)) {
876 if (live.state[r].val && live.nat[rr].nholds==1
877 && !live.nat[rr].locked) {
878 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
879 // live.state[r].val,r,rr,target);
880 adjust_nreg(rr,live.state[r].val);
881 live.state[r].val=0;
882 live.state[r].dirtysize=4;
883 set_status(r,DIRTY);
884 }
885 }
886
887 if (live.state[r].status==DIRTY) {
888 switch (live.state[r].dirtysize) {
889 case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
890 case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
891 case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
892 default: abort();
893 }
894 log_vwrite(r);
895 set_status(r,CLEAN);
896 live.state[r].dirtysize=0;
897 }
898 }
899
900 static __inline__ int isconst(int r)
901 {
902 return live.state[r].status==ISCONST;
903 }
904
905 int is_const(int r)
906 {
907 return isconst(r);
908 }
909
910 static __inline__ void writeback_const(int r)
911 {
912 if (!isconst(r))
913 return;
914 Dif (live.state[r].needflush==NF_HANDLER) {
915 write_log("Trying to write back constant NF_HANDLER!\n");
916 abort();
917 }
918
919 raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
920 log_vwrite(r);
921 live.state[r].val=0;
922 set_status(r,INMEM);
923 }
924
925 static __inline__ void tomem_c(int r)
926 {
927 if (isconst(r)) {
928 writeback_const(r);
929 }
930 else
931 tomem(r);
932 }
933
934 static void evict(int r)
935 {
936 int rr;
937
938 if (!isinreg(r))
939 return;
940 tomem(r);
941 rr=live.state[r].realreg;
942
943 Dif (live.nat[rr].locked &&
944 live.nat[rr].nholds==1) {
945 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
946 abort();
947 }
948
949 live.nat[rr].nholds--;
950 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
951 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
952 int thisind=live.state[r].realind;
953
954 live.nat[rr].holds[thisind]=topreg;
955 live.state[topreg].realind=thisind;
956 }
957 live.state[r].realreg=-1;
958 set_status(r,INMEM);
959 }
960
961 static __inline__ void free_nreg(int r)
962 {
963 int i=live.nat[r].nholds;
964
965 while (i) {
966 int vr;
967
968 --i;
969 vr=live.nat[r].holds[i];
970 evict(vr);
971 }
972 Dif (live.nat[r].nholds!=0) {
973 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
974 abort();
975 }
976 }
977
978 /* Use with care! */
979 static __inline__ void isclean(int r)
980 {
981 if (!isinreg(r))
982 return;
983 live.state[r].validsize=4;
984 live.state[r].dirtysize=0;
985 live.state[r].val=0;
986 set_status(r,CLEAN);
987 }
988
989 static __inline__ void disassociate(int r)
990 {
991 isclean(r);
992 evict(r);
993 }
994
995 static __inline__ void set_const(int r, uae_u32 val)
996 {
997 disassociate(r);
998 live.state[r].val=val;
999 set_status(r,ISCONST);
1000 }
1001
1002 static __inline__ uae_u32 get_offset(int r)
1003 {
1004 return live.state[r].val;
1005 }
1006
1007 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1008 {
1009 int bestreg;
1010 uae_s32 when;
1011 int i;
1012 uae_s32 badness=0; /* to shut up gcc */
1013 bestreg=-1;
1014 when=2000000000;
1015
1016 for (i=N_REGS;i--;) {
1017 badness=live.nat[i].touched;
1018 if (live.nat[i].nholds==0)
1019 badness=0;
1020 if (i==hint)
1021 badness-=200000000;
1022 if (!live.nat[i].locked && badness<when) {
1023 if ((size==1 && live.nat[i].canbyte) ||
1024 (size==2 && live.nat[i].canword) ||
1025 (size==4)) {
1026 bestreg=i;
1027 when=badness;
1028 if (live.nat[i].nholds==0 && hint<0)
1029 break;
1030 if (i==hint)
1031 break;
1032 }
1033 }
1034 }
1035 Dif (bestreg==-1)
1036 abort();
1037
1038 if (live.nat[bestreg].nholds>0) {
1039 free_nreg(bestreg);
1040 }
1041 if (isinreg(r)) {
1042 int rr=live.state[r].realreg;
1043 /* This will happen if we read a partially dirty register at a
1044 bigger size */
1045 Dif (willclobber || live.state[r].validsize>=size)
1046 abort();
1047 Dif (live.nat[rr].nholds!=1)
1048 abort();
1049 if (size==4 && live.state[r].validsize==2) {
1050 log_isused(bestreg);
1051 log_visused(r);
1052 raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
1053 raw_bswap_32(bestreg);
1054 raw_zero_extend_16_rr(rr,rr);
1055 raw_zero_extend_16_rr(bestreg,bestreg);
1056 raw_bswap_32(bestreg);
1057 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1058 live.state[r].validsize=4;
1059 live.nat[rr].touched=touchcnt++;
1060 return rr;
1061 }
1062 if (live.state[r].validsize==1) {
1063 /* Nothing yet */
1064 }
1065 evict(r);
1066 }
1067
1068 if (!willclobber) {
1069 if (live.state[r].status!=UNDEF) {
1070 if (isconst(r)) {
1071 raw_mov_l_ri(bestreg,live.state[r].val);
1072 live.state[r].val=0;
1073 live.state[r].dirtysize=4;
1074 set_status(r,DIRTY);
1075 log_isused(bestreg);
1076 }
1077 else {
1078 log_isreg(bestreg, r); /* This will also load it! */
1079 live.state[r].dirtysize=0;
1080 set_status(r,CLEAN);
1081 }
1082 }
1083 else {
1084 live.state[r].val=0;
1085 live.state[r].dirtysize=0;
1086 set_status(r,CLEAN);
1087 log_isused(bestreg);
1088 }
1089 live.state[r].validsize=4;
1090 }
1091 else { /* this is the easiest way, but not optimal. FIXME! */
1092 /* Now it's trickier, but hopefully still OK */
1093 if (!isconst(r) || size==4) {
1094 live.state[r].validsize=size;
1095 live.state[r].dirtysize=size;
1096 live.state[r].val=0;
1097 set_status(r,DIRTY);
1098 if (size == 4) {
1099 log_clobberreg(r);
1100 log_isused(bestreg);
1101 }
1102 else {
1103 log_visused(r);
1104 log_isused(bestreg);
1105 }
1106 }
1107 else {
1108 if (live.state[r].status!=UNDEF)
1109 raw_mov_l_ri(bestreg,live.state[r].val);
1110 live.state[r].val=0;
1111 live.state[r].validsize=4;
1112 live.state[r].dirtysize=4;
1113 set_status(r,DIRTY);
1114 log_isused(bestreg);
1115 }
1116 }
1117 live.state[r].realreg=bestreg;
1118 live.state[r].realind=live.nat[bestreg].nholds;
1119 live.nat[bestreg].touched=touchcnt++;
1120 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1121 live.nat[bestreg].nholds++;
1122
1123 return bestreg;
1124 }
1125
1126 static int alloc_reg(int r, int size, int willclobber)
1127 {
1128 return alloc_reg_hinted(r,size,willclobber,-1);
1129 }
1130
1131 static void unlock2(int r)
1132 {
1133 Dif (!live.nat[r].locked)
1134 abort();
1135 live.nat[r].locked--;
1136 }
1137
1138 static void setlock(int r)
1139 {
1140 live.nat[r].locked++;
1141 }
1142
1143
1144 static void mov_nregs(int d, int s)
1145 {
1146 int ns=live.nat[s].nholds;
1147 int nd=live.nat[d].nholds;
1148 int i;
1149
1150 if (s==d)
1151 return;
1152
1153 if (nd>0)
1154 free_nreg(d);
1155
1156 log_isused(d);
1157 raw_mov_l_rr(d,s);
1158
1159 for (i=0;i<live.nat[s].nholds;i++) {
1160 int vs=live.nat[s].holds[i];
1161
1162 live.state[vs].realreg=d;
1163 live.state[vs].realind=i;
1164 live.nat[d].holds[i]=vs;
1165 }
1166 live.nat[d].nholds=live.nat[s].nholds;
1167
1168 live.nat[s].nholds=0;
1169 }
1170
1171
1172 static __inline__ void make_exclusive(int r, int size, int spec)
1173 {
1174 int clobber;
1175 reg_status oldstate;
1176 int rr=live.state[r].realreg;
1177 int nr;
1178 int nind;
1179 int ndirt=0;
1180 int i;
1181
1182 if (!isinreg(r))
1183 return;
1184 if (live.nat[rr].nholds==1)
1185 return;
1186 for (i=0;i<live.nat[rr].nholds;i++) {
1187 int vr=live.nat[rr].holds[i];
1188 if (vr!=r &&
1189 (live.state[vr].status==DIRTY || live.state[vr].val))
1190 ndirt++;
1191 }
1192 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1193 /* Everything else is clean, so let's keep this register */
1194 for (i=0;i<live.nat[rr].nholds;i++) {
1195 int vr=live.nat[rr].holds[i];
1196 if (vr!=r) {
1197 evict(vr);
1198 i--; /* Try that index again! */
1199 }
1200 }
1201 Dif (live.nat[rr].nholds!=1) {
1202 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1203 rr,live.nat[rr].nholds,r);
1204 abort();
1205 }
1206 return;
1207 }
1208
1209 /* We have to split the register */
1210 oldstate=live.state[r];
1211
1212 setlock(rr); /* Make sure this doesn't go away */
1213 /* Forget about r being in the register rr */
1214 disassociate(r);
1215 /* Get a new register, that we will clobber completely */
1216 if (oldstate.status==DIRTY) {
1217 /* If dirtysize is <4, we need a register that can handle the
1218 eventual smaller memory store! Thanks to Quake68k for exposing
1219 this detail ;-) */
1220 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1221 }
1222 else {
1223 nr=alloc_reg_hinted(r,4,1,spec);
1224 }
1225 nind=live.state[r].realind;
1226 live.state[r]=oldstate; /* Keep all the old state info */
1227 live.state[r].realreg=nr;
1228 live.state[r].realind=nind;
1229
1230 if (size<live.state[r].validsize) {
1231 if (live.state[r].val) {
1232 /* Might as well compensate for the offset now */
1233 raw_lea_l_brr(nr,rr,oldstate.val);
1234 live.state[r].val=0;
1235 live.state[r].dirtysize=4;
1236 set_status(r,DIRTY);
1237 }
1238 else
1239 raw_mov_l_rr(nr,rr); /* Make another copy */
1240 }
1241 unlock2(rr);
1242 }
1243
1244 static __inline__ void add_offset(int r, uae_u32 off)
1245 {
1246 live.state[r].val+=off;
1247 }
1248
1249 static __inline__ void remove_offset(int r, int spec)
1250 {
1251 reg_status oldstate;
1252 int rr;
1253
1254 if (isconst(r))
1255 return;
1256 if (live.state[r].val==0)
1257 return;
1258 if (isinreg(r) && live.state[r].validsize<4)
1259 evict(r);
1260
1261 if (!isinreg(r))
1262 alloc_reg_hinted(r,4,0,spec);
1263
1264 Dif (live.state[r].validsize!=4) {
1265 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1266 abort();
1267 }
1268 make_exclusive(r,0,-1);
1269 /* make_exclusive might have done the job already */
1270 if (live.state[r].val==0)
1271 return;
1272
1273 rr=live.state[r].realreg;
1274
1275 if (live.nat[rr].nholds==1) {
1276 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1277 // live.state[r].val,r,rr,target);
1278 adjust_nreg(rr,live.state[r].val);
1279 live.state[r].dirtysize=4;
1280 live.state[r].val=0;
1281 set_status(r,DIRTY);
1282 return;
1283 }
1284 write_log("Failed in remove_offset\n");
1285 abort();
1286 }
1287
1288 static __inline__ void remove_all_offsets(void)
1289 {
1290 int i;
1291
1292 for (i=0;i<VREGS;i++)
1293 remove_offset(i,-1);
1294 }
1295
1296 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1297 {
1298 int n;
1299 int answer=-1;
1300
1301 if (live.state[r].status==UNDEF) {
1302 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1303 }
1304 if (!can_offset)
1305 remove_offset(r,spec);
1306
1307 if (isinreg(r) && live.state[r].validsize>=size) {
1308 n=live.state[r].realreg;
1309 switch(size) {
1310 case 1:
1311 if (live.nat[n].canbyte || spec>=0) {
1312 answer=n;
1313 }
1314 break;
1315 case 2:
1316 if (live.nat[n].canword || spec>=0) {
1317 answer=n;
1318 }
1319 break;
1320 case 4:
1321 answer=n;
1322 break;
1323 default: abort();
1324 }
1325 if (answer<0)
1326 evict(r);
1327 }
1328 /* either the value was in memory to start with, or it was evicted and
1329 is in memory now */
1330 if (answer<0) {
1331 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1332 }
1333
1334 if (spec>=0 && spec!=answer) {
1335 /* Too bad */
1336 mov_nregs(spec,answer);
1337 answer=spec;
1338 }
1339 live.nat[answer].locked++;
1340 live.nat[answer].touched=touchcnt++;
1341 return answer;
1342 }
1343
1344
1345
1346 static int readreg(int r, int size)
1347 {
1348 return readreg_general(r,size,-1,0);
1349 }
1350
1351 static int readreg_specific(int r, int size, int spec)
1352 {
1353 return readreg_general(r,size,spec,0);
1354 }
1355
1356 static int readreg_offset(int r, int size)
1357 {
1358 return readreg_general(r,size,-1,1);
1359 }
1360
1361 /* writereg_general(r, size, spec)
1362 *
1363 * INPUT
1364 * - r : mid-layer register
1365 * - size : requested size (1/2/4)
1366 * - spec : -1 if find or make a register free, otherwise specifies
1367 * the physical register to use in any case
1368 *
1369 * OUTPUT
1370 * - hard (physical, x86 here) register allocated to virtual register r
1371 */
1372 static __inline__ int writereg_general(int r, int size, int spec)
1373 {
1374 int n;
1375 int answer=-1;
1376
1377 if (size<4) {
1378 remove_offset(r,spec);
1379 }
1380
1381 make_exclusive(r,size,spec);
1382 if (isinreg(r)) {
1383 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1384 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1385 n=live.state[r].realreg;
1386
1387 Dif (live.nat[n].nholds!=1)
1388 abort();
1389 switch(size) {
1390 case 1:
1391 if (live.nat[n].canbyte || spec>=0) {
1392 live.state[r].dirtysize=ndsize;
1393 live.state[r].validsize=nvsize;
1394 answer=n;
1395 }
1396 break;
1397 case 2:
1398 if (live.nat[n].canword || spec>=0) {
1399 live.state[r].dirtysize=ndsize;
1400 live.state[r].validsize=nvsize;
1401 answer=n;
1402 }
1403 break;
1404 case 4:
1405 live.state[r].dirtysize=ndsize;
1406 live.state[r].validsize=nvsize;
1407 answer=n;
1408 break;
1409 default: abort();
1410 }
1411 if (answer<0)
1412 evict(r);
1413 }
1414 /* either the value was in memory to start with, or it was evicted and
1415 is in memory now */
1416 if (answer<0) {
1417 answer=alloc_reg_hinted(r,size,1,spec);
1418 }
1419 if (spec>=0 && spec!=answer) {
1420 mov_nregs(spec,answer);
1421 answer=spec;
1422 }
1423 if (live.state[r].status==UNDEF)
1424 live.state[r].validsize=4;
1425 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1426 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1427
1428 live.nat[answer].locked++;
1429 live.nat[answer].touched=touchcnt++;
1430 if (size==4) {
1431 live.state[r].val=0;
1432 }
1433 else {
1434 Dif (live.state[r].val) {
1435 write_log("Problem with val\n");
1436 abort();
1437 }
1438 }
1439 set_status(r,DIRTY);
1440 return answer;
1441 }
1442
1443 static int writereg(int r, int size)
1444 {
1445 return writereg_general(r,size,-1);
1446 }
1447
1448 static int writereg_specific(int r, int size, int spec)
1449 {
1450 return writereg_general(r,size,spec);
1451 }
1452
1453 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1454 {
1455 int n;
1456 int answer=-1;
1457
1458 if (live.state[r].status==UNDEF) {
1459 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1460 }
1461 remove_offset(r,spec);
1462 make_exclusive(r,0,spec);
1463
1464 Dif (wsize<rsize) {
1465 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1466 abort();
1467 }
1468 if (isinreg(r) && live.state[r].validsize>=rsize) {
1469 n=live.state[r].realreg;
1470 Dif (live.nat[n].nholds!=1)
1471 abort();
1472
1473 switch(rsize) {
1474 case 1:
1475 if (live.nat[n].canbyte || spec>=0) {
1476 answer=n;
1477 }
1478 break;
1479 case 2:
1480 if (live.nat[n].canword || spec>=0) {
1481 answer=n;
1482 }
1483 break;
1484 case 4:
1485 answer=n;
1486 break;
1487 default: abort();
1488 }
1489 if (answer<0)
1490 evict(r);
1491 }
1492 /* either the value was in memory to start with, or it was evicted and
1493 is in memory now */
1494 if (answer<0) {
1495 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1496 }
1497
1498 if (spec>=0 && spec!=answer) {
1499 /* Too bad */
1500 mov_nregs(spec,answer);
1501 answer=spec;
1502 }
1503 if (wsize>live.state[r].dirtysize)
1504 live.state[r].dirtysize=wsize;
1505 if (wsize>live.state[r].validsize)
1506 live.state[r].validsize=wsize;
1507 set_status(r,DIRTY);
1508
1509 live.nat[answer].locked++;
1510 live.nat[answer].touched=touchcnt++;
1511
1512 Dif (live.state[r].val) {
1513 write_log("Problem with val(rmw)\n");
1514 abort();
1515 }
1516 return answer;
1517 }
1518
1519 static int rmw(int r, int wsize, int rsize)
1520 {
1521 return rmw_general(r,wsize,rsize,-1);
1522 }
1523
1524 static int rmw_specific(int r, int wsize, int rsize, int spec)
1525 {
1526 return rmw_general(r,wsize,rsize,spec);
1527 }
1528
1529
1530 /* needed for restoring the carry flag on non-P6 cores */
1531 static void bt_l_ri_noclobber(R4 r, IMM i)
1532 {
1533 int size=4;
1534 if (i<16)
1535 size=2;
1536 r=readreg(r,size);
1537 raw_bt_l_ri(r,i);
1538 unlock2(r);
1539 }
1540
1541 /********************************************************************
1542 * FPU register status handling. EMIT TIME! *
1543 ********************************************************************/
1544
1545 static void f_tomem(int r)
1546 {
1547 if (live.fate[r].status==DIRTY) {
1548 #if USE_LONG_DOUBLE
1549 raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1550 #else
1551 raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1552 #endif
1553 live.fate[r].status=CLEAN;
1554 }
1555 }
1556
1557 static void f_tomem_drop(int r)
1558 {
1559 if (live.fate[r].status==DIRTY) {
1560 #if USE_LONG_DOUBLE
1561 raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1562 #else
1563 raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1564 #endif
1565 live.fate[r].status=INMEM;
1566 }
1567 }
1568
1569
1570 static __inline__ int f_isinreg(int r)
1571 {
1572 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1573 }
1574
1575 static void f_evict(int r)
1576 {
1577 int rr;
1578
1579 if (!f_isinreg(r))
1580 return;
1581 rr=live.fate[r].realreg;
1582 if (live.fat[rr].nholds==1)
1583 f_tomem_drop(r);
1584 else
1585 f_tomem(r);
1586
1587 Dif (live.fat[rr].locked &&
1588 live.fat[rr].nholds==1) {
1589 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1590 abort();
1591 }
1592
1593 live.fat[rr].nholds--;
1594 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1595 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1596 int thisind=live.fate[r].realind;
1597 live.fat[rr].holds[thisind]=topreg;
1598 live.fate[topreg].realind=thisind;
1599 }
1600 live.fate[r].status=INMEM;
1601 live.fate[r].realreg=-1;
1602 }
1603
1604 static __inline__ void f_free_nreg(int r)
1605 {
1606 int i=live.fat[r].nholds;
1607
1608 while (i) {
1609 int vr;
1610
1611 --i;
1612 vr=live.fat[r].holds[i];
1613 f_evict(vr);
1614 }
1615 Dif (live.fat[r].nholds!=0) {
1616 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1617 abort();
1618 }
1619 }
1620
1621
1622 /* Use with care! */
1623 static __inline__ void f_isclean(int r)
1624 {
1625 if (!f_isinreg(r))
1626 return;
1627 live.fate[r].status=CLEAN;
1628 }
1629
1630 static __inline__ void f_disassociate(int r)
1631 {
1632 f_isclean(r);
1633 f_evict(r);
1634 }
1635
1636
1637
1638 static int f_alloc_reg(int r, int willclobber)
1639 {
1640 int bestreg;
1641 uae_s32 when;
1642 int i;
1643 uae_s32 badness;
1644 bestreg=-1;
1645 when=2000000000;
1646 for (i=N_FREGS;i--;) {
1647 badness=live.fat[i].touched;
1648 if (live.fat[i].nholds==0)
1649 badness=0;
1650
1651 if (!live.fat[i].locked && badness<when) {
1652 bestreg=i;
1653 when=badness;
1654 if (live.fat[i].nholds==0)
1655 break;
1656 }
1657 }
1658 Dif (bestreg==-1)
1659 abort();
1660
1661 if (live.fat[bestreg].nholds>0) {
1662 f_free_nreg(bestreg);
1663 }
1664 if (f_isinreg(r)) {
1665 f_evict(r);
1666 }
1667
1668 if (!willclobber) {
1669 if (live.fate[r].status!=UNDEF) {
1670 #if USE_LONG_DOUBLE
1671 raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
1672 #else
1673 raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
1674 #endif
1675 }
1676 live.fate[r].status=CLEAN;
1677 }
1678 else {
1679 live.fate[r].status=DIRTY;
1680 }
1681 live.fate[r].realreg=bestreg;
1682 live.fate[r].realind=live.fat[bestreg].nholds;
1683 live.fat[bestreg].touched=touchcnt++;
1684 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
1685 live.fat[bestreg].nholds++;
1686
1687 return bestreg;
1688 }
1689
1690 static void f_unlock(int r)
1691 {
1692 Dif (!live.fat[r].locked)
1693 abort();
1694 live.fat[r].locked--;
1695 }
1696
1697 static void f_setlock(int r)
1698 {
1699 live.fat[r].locked++;
1700 }
1701
1702 static __inline__ int f_readreg(int r)
1703 {
1704 int n;
1705 int answer=-1;
1706
1707 if (f_isinreg(r)) {
1708 n=live.fate[r].realreg;
1709 answer=n;
1710 }
1711 /* either the value was in memory to start with, or it was evicted and
1712 is in memory now */
1713 if (answer<0)
1714 answer=f_alloc_reg(r,0);
1715
1716 live.fat[answer].locked++;
1717 live.fat[answer].touched=touchcnt++;
1718 return answer;
1719 }
1720
1721 static __inline__ void f_make_exclusive(int r, int clobber)
1722 {
1723 freg_status oldstate;
1724 int rr=live.fate[r].realreg;
1725 int nr;
1726 int nind;
1727 int ndirt=0;
1728 int i;
1729
1730 if (!f_isinreg(r))
1731 return;
1732 if (live.fat[rr].nholds==1)
1733 return;
1734 for (i=0;i<live.fat[rr].nholds;i++) {
1735 int vr=live.fat[rr].holds[i];
1736 if (vr!=r && live.fate[vr].status==DIRTY)
1737 ndirt++;
1738 }
1739 if (!ndirt && !live.fat[rr].locked) {
1740 /* Everything else is clean, so let's keep this register */
1741 for (i=0;i<live.fat[rr].nholds;i++) {
1742 int vr=live.fat[rr].holds[i];
1743 if (vr!=r) {
1744 f_evict(vr);
1745 i--; /* Try that index again! */
1746 }
1747 }
1748 Dif (live.fat[rr].nholds!=1) {
1749 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
1750 for (i=0;i<live.fat[rr].nholds;i++) {
1751 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
1752 live.fate[live.fat[rr].holds[i]].realreg,
1753 live.fate[live.fat[rr].holds[i]].realind);
1754 }
1755 write_log("\n");
1756 abort();
1757 }
1758 return;
1759 }
1760
1761 /* We have to split the register */
1762 oldstate=live.fate[r];
1763
1764 f_setlock(rr); /* Make sure this doesn't go away */
1765 /* Forget about r being in the register rr */
1766 f_disassociate(r);
1767 /* Get a new register, that we will clobber completely */
1768 nr=f_alloc_reg(r,1);
1769 nind=live.fate[r].realind;
1770 if (!clobber)
1771 raw_fmov_rr(nr,rr); /* Make another copy */
1772 live.fate[r]=oldstate; /* Keep all the old state info */
1773 live.fate[r].realreg=nr;
1774 live.fate[r].realind=nind;
1775 f_unlock(rr);
1776 }
1777
1778
1779 static __inline__ int f_writereg(int r)
1780 {
1781 int n;
1782 int answer=-1;
1783
1784 f_make_exclusive(r,1);
1785 if (f_isinreg(r)) {
1786 n=live.fate[r].realreg;
1787 answer=n;
1788 }
1789 if (answer<0) {
1790 answer=f_alloc_reg(r,1);
1791 }
1792 live.fate[r].status=DIRTY;
1793 live.fat[answer].locked++;
1794 live.fat[answer].touched=touchcnt++;
1795 return answer;
1796 }
1797
1798 static int f_rmw(int r)
1799 {
1800 int n;
1801
1802 f_make_exclusive(r,0);
1803 if (f_isinreg(r)) {
1804 n=live.fate[r].realreg;
1805 }
1806 else
1807 n=f_alloc_reg(r,0);
1808 live.fate[r].status=DIRTY;
1809 live.fat[n].locked++;
1810 live.fat[n].touched=touchcnt++;
1811 return n;
1812 }
1813
1814 static void fflags_into_flags_internal(uae_u32 tmp)
1815 {
1816 int r;
1817
1818 clobber_flags();
1819 r=f_readreg(FP_RESULT);
1820 if (FFLAG_NREG_CLOBBER_CONDITION) {
1821 int tmp2=tmp;
1822 tmp=writereg_specific(tmp,4,FFLAG_NREG);
1823 raw_fflags_into_flags(r);
1824 unlock2(tmp);
1825 forget_about(tmp2);
1826 }
1827 else
1828 raw_fflags_into_flags(r);
1829 f_unlock(r);
1830 }
1831
1832
1833
1834
1835 /********************************************************************
1836 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
1837 ********************************************************************/
1838
1839 /*
1840 * RULES FOR HANDLING REGISTERS:
1841 *
1842 * * In the function headers, order the parameters
1843 * - 1st registers written to
1844 * - 2nd read/modify/write registers
1845 * - 3rd registers read from
1846 * * Before calling raw_*, you must call readreg, writereg or rmw for
1847 * each register
1848 * * The order for this is
1849 * - 1st call remove_offset for all registers written to with size<4
1850 * - 2nd call readreg for all registers read without offset
1851 * - 3rd call rmw for all rmw registers
1852 * - 4th call readreg_offset for all registers that can handle offsets
1853 * - 5th call get_offset for all the registers from the previous step
1854 * - 6th call writereg for all written-to registers
1855 * - 7th call raw_*
1856 * - 8th unlock2 all registers that were locked
1857 */
1858
1859 MIDFUNC(0,live_flags,(void))
1860 {
1861 live.flags_on_stack=TRASH;
1862 live.flags_in_flags=VALID;
1863 live.flags_are_important=1;
1864 }
1865 MENDFUNC(0,live_flags,(void))
1866
1867 MIDFUNC(0,dont_care_flags,(void))
1868 {
1869 live.flags_are_important=0;
1870 }
1871 MENDFUNC(0,dont_care_flags,(void))
1872
1873
1874 MIDFUNC(0,duplicate_carry,(void))
1875 {
1876 evict(FLAGX);
1877 make_flags_live_internal();
1878 COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
1879 log_vwrite(FLAGX);
1880 }
1881 MENDFUNC(0,duplicate_carry,(void))
1882
1883 MIDFUNC(0,restore_carry,(void))
1884 {
1885 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
1886 bt_l_ri_noclobber(FLAGX,0);
1887 }
1888 else { /* Avoid the stall the above creates.
1889 This is slow on non-P6, though.
1890 */
1891 COMPCALL(rol_b_ri(FLAGX,8));
1892 isclean(FLAGX);
1893 }
1894 }
1895 MENDFUNC(0,restore_carry,(void))
1896
1897 MIDFUNC(0,start_needflags,(void))
1898 {
1899 needflags=1;
1900 }
1901 MENDFUNC(0,start_needflags,(void))
1902
1903 MIDFUNC(0,end_needflags,(void))
1904 {
1905 needflags=0;
1906 }
1907 MENDFUNC(0,end_needflags,(void))
1908
1909 MIDFUNC(0,make_flags_live,(void))
1910 {
1911 make_flags_live_internal();
1912 }
1913 MENDFUNC(0,make_flags_live,(void))
1914
1915 MIDFUNC(1,fflags_into_flags,(W2 tmp))
1916 {
1917 clobber_flags();
1918 fflags_into_flags_internal(tmp);
1919 }
1920 MENDFUNC(1,fflags_into_flags,(W2 tmp))
1921
1922
1923 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
1924 {
1925 int size=4;
1926 if (i<16)
1927 size=2;
1928 CLOBBER_BT;
1929 r=readreg(r,size);
1930 raw_bt_l_ri(r,i);
1931 unlock2(r);
1932 }
1933 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
1934
1935 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
1936 {
1937 CLOBBER_BT;
1938 r=readreg(r,4);
1939 b=readreg(b,4);
1940 raw_bt_l_rr(r,b);
1941 unlock2(r);
1942 unlock2(b);
1943 }
1944 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
1945
1946 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
1947 {
1948 int size=4;
1949 if (i<16)
1950 size=2;
1951 CLOBBER_BT;
1952 r=rmw(r,size,size);
1953 raw_btc_l_ri(r,i);
1954 unlock2(r);
1955 }
1956 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
1957
1958 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
1959 {
1960 CLOBBER_BT;
1961 b=readreg(b,4);
1962 r=rmw(r,4,4);
1963 raw_btc_l_rr(r,b);
1964 unlock2(r);
1965 unlock2(b);
1966 }
1967 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
1968
1969
1970 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
1971 {
1972 int size=4;
1973 if (i<16)
1974 size=2;
1975 CLOBBER_BT;
1976 r=rmw(r,size,size);
1977 raw_btr_l_ri(r,i);
1978 unlock2(r);
1979 }
1980 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
1981
1982 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
1983 {
1984 CLOBBER_BT;
1985 b=readreg(b,4);
1986 r=rmw(r,4,4);
1987 raw_btr_l_rr(r,b);
1988 unlock2(r);
1989 unlock2(b);
1990 }
1991 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
1992
1993
1994 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
1995 {
1996 int size=4;
1997 if (i<16)
1998 size=2;
1999 CLOBBER_BT;
2000 r=rmw(r,size,size);
2001 raw_bts_l_ri(r,i);
2002 unlock2(r);
2003 }
2004 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2005
2006 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2007 {
2008 CLOBBER_BT;
2009 b=readreg(b,4);
2010 r=rmw(r,4,4);
2011 raw_bts_l_rr(r,b);
2012 unlock2(r);
2013 unlock2(b);
2014 }
2015 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2016
2017 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2018 {
2019 CLOBBER_MOV;
2020 d=writereg(d,4);
2021 raw_mov_l_rm(d,s);
2022 unlock2(d);
2023 }
2024 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2025
2026
2027 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2028 {
2029 r=readreg(r,4);
2030 raw_call_r(r);
2031 unlock2(r);
2032 }
2033 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2034
2035 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2036 {
2037 CLOBBER_SUB;
2038 raw_sub_l_mi(d,s) ;
2039 }
2040 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2041
2042 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2043 {
2044 CLOBBER_MOV;
2045 raw_mov_l_mi(d,s) ;
2046 }
2047 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2048
2049 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2050 {
2051 CLOBBER_MOV;
2052 raw_mov_w_mi(d,s) ;
2053 }
2054 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2055
2056 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2057 {
2058 CLOBBER_MOV;
2059 raw_mov_b_mi(d,s) ;
2060 }
2061 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2062
2063 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2064 {
2065 if (!i && !needflags)
2066 return;
2067 CLOBBER_ROL;
2068 r=rmw(r,1,1);
2069 raw_rol_b_ri(r,i);
2070 unlock2(r);
2071 }
2072 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2073
2074 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2075 {
2076 if (!i && !needflags)
2077 return;
2078 CLOBBER_ROL;
2079 r=rmw(r,2,2);
2080 raw_rol_w_ri(r,i);
2081 unlock2(r);
2082 }
2083 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2084
2085 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2086 {
2087 if (!i && !needflags)
2088 return;
2089 CLOBBER_ROL;
2090 r=rmw(r,4,4);
2091 raw_rol_l_ri(r,i);
2092 unlock2(r);
2093 }
2094 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2095
2096 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2097 {
2098 if (isconst(r)) {
2099 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2100 return;
2101 }
2102 CLOBBER_ROL;
2103 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2104 d=rmw(d,4,4);
2105 Dif (r!=1) {
2106 write_log("Illegal register %d in raw_rol_b\n",r);
2107 abort();
2108 }
2109 raw_rol_l_rr(d,r) ;
2110 unlock2(r);
2111 unlock2(d);
2112 }
2113 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2114
2115 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2116 { /* Can only do this with r==1, i.e. cl */
2117
2118 if (isconst(r)) {
2119 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2120 return;
2121 }
2122 CLOBBER_ROL;
2123 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2124 d=rmw(d,2,2);
2125 Dif (r!=1) {
2126 write_log("Illegal register %d in raw_rol_b\n",r);
2127 abort();
2128 }
2129 raw_rol_w_rr(d,r) ;
2130 unlock2(r);
2131 unlock2(d);
2132 }
2133 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2134
2135 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2136 { /* Can only do this with r==1, i.e. cl */
2137
2138 if (isconst(r)) {
2139 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2140 return;
2141 }
2142
2143 CLOBBER_ROL;
2144 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2145 d=rmw(d,1,1);
2146 Dif (r!=1) {
2147 write_log("Illegal register %d in raw_rol_b\n",r);
2148 abort();
2149 }
2150 raw_rol_b_rr(d,r) ;
2151 unlock2(r);
2152 unlock2(d);
2153 }
2154 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2155
2156
2157 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2158 {
2159 if (isconst(r)) {
2160 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2161 return;
2162 }
2163 CLOBBER_SHLL;
2164 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2165 d=rmw(d,4,4);
2166 Dif (r!=1) {
2167 write_log("Illegal register %d in raw_rol_b\n",r);
2168 abort();
2169 }
2170 raw_shll_l_rr(d,r) ;
2171 unlock2(r);
2172 unlock2(d);
2173 }
2174 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2175
2176 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2177 { /* Can only do this with r==1, i.e. cl */
2178
2179 if (isconst(r)) {
2180 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2181 return;
2182 }
2183 CLOBBER_SHLL;
2184 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2185 d=rmw(d,2,2);
2186 Dif (r!=1) {
2187 write_log("Illegal register %d in raw_shll_b\n",r);
2188 abort();
2189 }
2190 raw_shll_w_rr(d,r) ;
2191 unlock2(r);
2192 unlock2(d);
2193 }
2194 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2195
2196 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2197 { /* Can only do this with r==1, i.e. cl */
2198
2199 if (isconst(r)) {
2200 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2201 return;
2202 }
2203
2204 CLOBBER_SHLL;
2205 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2206 d=rmw(d,1,1);
2207 Dif (r!=1) {
2208 write_log("Illegal register %d in raw_shll_b\n",r);
2209 abort();
2210 }
2211 raw_shll_b_rr(d,r) ;
2212 unlock2(r);
2213 unlock2(d);
2214 }
2215 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2216
2217
2218 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2219 {
2220 if (!i && !needflags)
2221 return;
2222 CLOBBER_ROR;
2223 r=rmw(r,1,1);
2224 raw_ror_b_ri(r,i);
2225 unlock2(r);
2226 }
2227 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2228
2229 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2230 {
2231 if (!i && !needflags)
2232 return;
2233 CLOBBER_ROR;
2234 r=rmw(r,2,2);
2235 raw_ror_w_ri(r,i);
2236 unlock2(r);
2237 }
2238 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2239
2240 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2241 {
2242 if (!i && !needflags)
2243 return;
2244 CLOBBER_ROR;
2245 r=rmw(r,4,4);
2246 raw_ror_l_ri(r,i);
2247 unlock2(r);
2248 }
2249 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2250
2251 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2252 {
2253 if (isconst(r)) {
2254 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2255 return;
2256 }
2257 CLOBBER_ROR;
2258 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2259 d=rmw(d,4,4);
2260 raw_ror_l_rr(d,r) ;
2261 unlock2(r);
2262 unlock2(d);
2263 }
2264 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2265
2266 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2267 {
2268 if (isconst(r)) {
2269 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2270 return;
2271 }
2272 CLOBBER_ROR;
2273 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2274 d=rmw(d,2,2);
2275 raw_ror_w_rr(d,r) ;
2276 unlock2(r);
2277 unlock2(d);
2278 }
2279 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2280
2281 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2282 {
2283 if (isconst(r)) {
2284 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2285 return;
2286 }
2287
2288 CLOBBER_ROR;
2289 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2290 d=rmw(d,1,1);
2291 raw_ror_b_rr(d,r) ;
2292 unlock2(r);
2293 unlock2(d);
2294 }
2295 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2296
2297 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2298 {
2299 if (isconst(r)) {
2300 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2301 return;
2302 }
2303 CLOBBER_SHRL;
2304 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2305 d=rmw(d,4,4);
2306 Dif (r!=1) {
2307 write_log("Illegal register %d in raw_rol_b\n",r);
2308 abort();
2309 }
2310 raw_shrl_l_rr(d,r) ;
2311 unlock2(r);
2312 unlock2(d);
2313 }
2314 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2315
2316 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2317 { /* Can only do this with r==1, i.e. cl */
2318
2319 if (isconst(r)) {
2320 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2321 return;
2322 }
2323 CLOBBER_SHRL;
2324 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2325 d=rmw(d,2,2);
2326 Dif (r!=1) {
2327 write_log("Illegal register %d in raw_shrl_b\n",r);
2328 abort();
2329 }
2330 raw_shrl_w_rr(d,r) ;
2331 unlock2(r);
2332 unlock2(d);
2333 }
2334 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2335
2336 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2337 { /* Can only do this with r==1, i.e. cl */
2338
2339 if (isconst(r)) {
2340 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2341 return;
2342 }
2343
2344 CLOBBER_SHRL;
2345 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2346 d=rmw(d,1,1);
2347 Dif (r!=1) {
2348 write_log("Illegal register %d in raw_shrl_b\n",r);
2349 abort();
2350 }
2351 raw_shrl_b_rr(d,r) ;
2352 unlock2(r);
2353 unlock2(d);
2354 }
2355 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2356
2357
2358
2359 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2360 {
2361 if (!i && !needflags)
2362 return;
2363 if (isconst(r) && !needflags) {
2364 live.state[r].val<<=i;
2365 return;
2366 }
2367 CLOBBER_SHLL;
2368 r=rmw(r,4,4);
2369 raw_shll_l_ri(r,i);
2370 unlock2(r);
2371 }
2372 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2373
2374 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2375 {
2376 if (!i && !needflags)
2377 return;
2378 CLOBBER_SHLL;
2379 r=rmw(r,2,2);
2380 raw_shll_w_ri(r,i);
2381 unlock2(r);
2382 }
2383 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2384
2385 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2386 {
2387 if (!i && !needflags)
2388 return;
2389 CLOBBER_SHLL;
2390 r=rmw(r,1,1);
2391 raw_shll_b_ri(r,i);
2392 unlock2(r);
2393 }
2394 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2395
2396 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2397 {
2398 if (!i && !needflags)
2399 return;
2400 if (isconst(r) && !needflags) {
2401 live.state[r].val>>=i;
2402 return;
2403 }
2404 CLOBBER_SHRL;
2405 r=rmw(r,4,4);
2406 raw_shrl_l_ri(r,i);
2407 unlock2(r);
2408 }
2409 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2410
2411 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2412 {
2413 if (!i && !needflags)
2414 return;
2415 CLOBBER_SHRL;
2416 r=rmw(r,2,2);
2417 raw_shrl_w_ri(r,i);
2418 unlock2(r);
2419 }
2420 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2421
2422 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2423 {
2424 if (!i && !needflags)
2425 return;
2426 CLOBBER_SHRL;
2427 r=rmw(r,1,1);
2428 raw_shrl_b_ri(r,i);
2429 unlock2(r);
2430 }
2431 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2432
2433 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2434 {
2435 if (!i && !needflags)
2436 return;
2437 CLOBBER_SHRA;
2438 r=rmw(r,4,4);
2439 raw_shra_l_ri(r,i);
2440 unlock2(r);
2441 }
2442 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2443
2444 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2445 {
2446 if (!i && !needflags)
2447 return;
2448 CLOBBER_SHRA;
2449 r=rmw(r,2,2);
2450 raw_shra_w_ri(r,i);
2451 unlock2(r);
2452 }
2453 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2454
2455 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2456 {
2457 if (!i && !needflags)
2458 return;
2459 CLOBBER_SHRA;
2460 r=rmw(r,1,1);
2461 raw_shra_b_ri(r,i);
2462 unlock2(r);
2463 }
2464 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2465
2466 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2467 {
2468 if (isconst(r)) {
2469 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2470 return;
2471 }
2472 CLOBBER_SHRA;
2473 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2474 d=rmw(d,4,4);
2475 Dif (r!=1) {
2476 write_log("Illegal register %d in raw_rol_b\n",r);
2477 abort();
2478 }
2479 raw_shra_l_rr(d,r) ;
2480 unlock2(r);
2481 unlock2(d);
2482 }
2483 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2484
2485 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2486 { /* Can only do this with r==1, i.e. cl */
2487
2488 if (isconst(r)) {
2489 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2490 return;
2491 }
2492 CLOBBER_SHRA;
2493 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2494 d=rmw(d,2,2);
2495 Dif (r!=1) {
2496 write_log("Illegal register %d in raw_shra_b\n",r);
2497 abort();
2498 }
2499 raw_shra_w_rr(d,r) ;
2500 unlock2(r);
2501 unlock2(d);
2502 }
2503 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2504
2505 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2506 { /* Can only do this with r==1, i.e. cl */
2507
2508 if (isconst(r)) {
2509 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2510 return;
2511 }
2512
2513 CLOBBER_SHRA;
2514 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2515 d=rmw(d,1,1);
2516 Dif (r!=1) {
2517 write_log("Illegal register %d in raw_shra_b\n",r);
2518 abort();
2519 }
2520 raw_shra_b_rr(d,r) ;
2521 unlock2(r);
2522 unlock2(d);
2523 }
2524 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2525
2526
2527 MIDFUNC(2,setcc,(W1 d, IMM cc))
2528 {
2529 CLOBBER_SETCC;
2530 d=writereg(d,1);
2531 raw_setcc(d,cc);
2532 unlock2(d);
2533 }
2534 MENDFUNC(2,setcc,(W1 d, IMM cc))
2535
2536 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2537 {
2538 CLOBBER_SETCC;
2539 raw_setcc_m(d,cc);
2540 }
2541 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2542
2543 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2544 {
2545 if (d==s)
2546 return;
2547 CLOBBER_CMOV;
2548 s=readreg(s,4);
2549 d=rmw(d,4,4);
2550 raw_cmov_l_rr(d,s,cc);
2551 unlock2(s);
2552 unlock2(d);
2553 }
2554 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2555
2556 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2557 {
2558 CLOBBER_CMOV;
2559 d=rmw(d,4,4);
2560 raw_cmov_l_rm(d,s,cc);
2561 unlock2(d);
2562 }
2563 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2564
2565 MIDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2566 {
2567 CLOBBER_BSF;
2568 s=readreg(s,4);
2569 d=writereg(d,4);
2570 raw_bsf_l_rr(d,s);
2571 unlock2(s);
2572 unlock2(d);
2573 }
2574 MENDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2575
2576 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2577 {
2578 CLOBBER_MUL;
2579 s=readreg(s,4);
2580 d=rmw(d,4,4);
2581 raw_imul_32_32(d,s);
2582 unlock2(s);
2583 unlock2(d);
2584 }
2585 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2586
2587 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2588 {
2589 CLOBBER_MUL;
2590 s=rmw_specific(s,4,4,MUL_NREG2);
2591 d=rmw_specific(d,4,4,MUL_NREG1);
2592 raw_imul_64_32(d,s);
2593 unlock2(s);
2594 unlock2(d);
2595 }
2596 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2597
2598 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2599 {
2600 CLOBBER_MUL;
2601 s=rmw_specific(s,4,4,MUL_NREG2);
2602 d=rmw_specific(d,4,4,MUL_NREG1);
2603 raw_mul_64_32(d,s);
2604 unlock2(s);
2605 unlock2(d);
2606 }
2607 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2608
2609 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2610 {
2611 CLOBBER_MUL;
2612 s=readreg(s,4);
2613 d=rmw(d,4,4);
2614 raw_mul_32_32(d,s);
2615 unlock2(s);
2616 unlock2(d);
2617 }
2618 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
2619
2620 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2621 {
2622 int isrmw;
2623
2624 if (isconst(s)) {
2625 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
2626 return;
2627 }
2628
2629 CLOBBER_SE16;
2630 isrmw=(s==d);
2631 if (!isrmw) {
2632 s=readreg(s,2);
2633 d=writereg(d,4);
2634 }
2635 else { /* If we try to lock this twice, with different sizes, we
2636 are int trouble! */
2637 s=d=rmw(s,4,2);
2638 }
2639 raw_sign_extend_16_rr(d,s);
2640 if (!isrmw) {
2641 unlock2(d);
2642 unlock2(s);
2643 }
2644 else {
2645 unlock2(s);
2646 }
2647 }
2648 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2649
2650 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
2651 {
2652 int isrmw;
2653
2654 if (isconst(s)) {
2655 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
2656 return;
2657 }
2658
2659 isrmw=(s==d);
2660 CLOBBER_SE8;
2661 if (!isrmw) {
2662 s=readreg(s,1);
2663 d=writereg(d,4);
2664 }
2665 else { /* If we try to lock this twice, with different sizes, we
2666 are int trouble! */
2667 s=d=rmw(s,4,1);
2668 }
2669
2670 raw_sign_extend_8_rr(d,s);
2671
2672 if (!isrmw) {
2673 unlock2(d);
2674 unlock2(s);
2675 }
2676 else {
2677 unlock2(s);
2678 }
2679 }
2680 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
2681
2682
2683 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
2684 {
2685 int isrmw;
2686
2687 if (isconst(s)) {
2688 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
2689 return;
2690 }
2691
2692 isrmw=(s==d);
2693 CLOBBER_ZE16;
2694 if (!isrmw) {
2695 s=readreg(s,2);
2696 d=writereg(d,4);
2697 }
2698 else { /* If we try to lock this twice, with different sizes, we
2699 are int trouble! */
2700 s=d=rmw(s,4,2);
2701 }
2702 raw_zero_extend_16_rr(d,s);
2703 if (!isrmw) {
2704 unlock2(d);
2705 unlock2(s);
2706 }
2707 else {
2708 unlock2(s);
2709 }
2710 }
2711 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
2712
2713 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
2714 {
2715 int isrmw;
2716 if (isconst(s)) {
2717 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
2718 return;
2719 }
2720
2721 isrmw=(s==d);
2722 CLOBBER_ZE8;
2723 if (!isrmw) {
2724 s=readreg(s,1);
2725 d=writereg(d,4);
2726 }
2727 else { /* If we try to lock this twice, with different sizes, we
2728 are int trouble! */
2729 s=d=rmw(s,4,1);
2730 }
2731
2732 raw_zero_extend_8_rr(d,s);
2733
2734 if (!isrmw) {
2735 unlock2(d);
2736 unlock2(s);
2737 }
2738 else {
2739 unlock2(s);
2740 }
2741 }
2742 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
2743
2744 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
2745 {
2746 if (d==s)
2747 return;
2748 if (isconst(s)) {
2749 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
2750 return;
2751 }
2752
2753 CLOBBER_MOV;
2754 s=readreg(s,1);
2755 d=writereg(d,1);
2756 raw_mov_b_rr(d,s);
2757 unlock2(d);
2758 unlock2(s);
2759 }
2760 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
2761
2762 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
2763 {
2764 if (d==s)
2765 return;
2766 if (isconst(s)) {
2767 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
2768 return;
2769 }
2770
2771 CLOBBER_MOV;
2772 s=readreg(s,2);
2773 d=writereg(d,2);
2774 raw_mov_w_rr(d,s);
2775 unlock2(d);
2776 unlock2(s);
2777 }
2778 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
2779
2780
2781 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
2782 {
2783 CLOBBER_MOV;
2784 baser=readreg(baser,4);
2785 index=readreg(index,4);
2786 d=writereg(d,4);
2787
2788 raw_mov_l_rrm_indexed(d,baser,index,factor);
2789 unlock2(d);
2790 unlock2(baser);
2791 unlock2(index);
2792 }
2793 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
2794
2795 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
2796 {
2797 CLOBBER_MOV;
2798 baser=readreg(baser,4);
2799 index=readreg(index,4);
2800 d=writereg(d,2);
2801
2802 raw_mov_w_rrm_indexed(d,baser,index,factor);
2803 unlock2(d);
2804 unlock2(baser);
2805 unlock2(index);
2806 }
2807 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
2808
2809 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
2810 {
2811 CLOBBER_MOV;
2812 baser=readreg(baser,4);
2813 index=readreg(index,4);
2814 d=writereg(d,1);
2815
2816 raw_mov_b_rrm_indexed(d,baser,index,factor);
2817
2818 unlock2(d);
2819 unlock2(baser);
2820 unlock2(index);
2821 }
2822 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
2823
2824
2825 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
2826 {
2827 CLOBBER_MOV;
2828 baser=readreg(baser,4);
2829 index=readreg(index,4);
2830 s=readreg(s,4);
2831
2832 Dif (baser==s || index==s)
2833 abort();
2834
2835
2836 raw_mov_l_mrr_indexed(baser,index,factor,s);
2837 unlock2(s);
2838 unlock2(baser);
2839 unlock2(index);
2840 }
2841 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
2842
2843 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2844 {
2845 CLOBBER_MOV;
2846 baser=readreg(baser,4);
2847 index=readreg(index,4);
2848 s=readreg(s,2);
2849
2850 raw_mov_w_mrr_indexed(baser,index,factor,s);
2851 unlock2(s);
2852 unlock2(baser);
2853 unlock2(index);
2854 }
2855 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2856
2857 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2858 {
2859 CLOBBER_MOV;
2860 s=readreg(s,1);
2861 baser=readreg(baser,4);
2862 index=readreg(index,4);
2863
2864 raw_mov_b_mrr_indexed(baser,index,factor,s);
2865 unlock2(s);
2866 unlock2(baser);
2867 unlock2(index);
2868 }
2869 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2870
2871
2872 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2873 {
2874 int basereg=baser;
2875 int indexreg=index;
2876
2877 CLOBBER_MOV;
2878 s=readreg(s,4);
2879 baser=readreg_offset(baser,4);
2880 index=readreg_offset(index,4);
2881
2882 base+=get_offset(basereg);
2883 base+=factor*get_offset(indexreg);
2884
2885 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
2886 unlock2(s);
2887 unlock2(baser);
2888 unlock2(index);
2889 }
2890 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2891
2892 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2893 {
2894 int basereg=baser;
2895 int indexreg=index;
2896
2897 CLOBBER_MOV;
2898 s=readreg(s,2);
2899 baser=readreg_offset(baser,4);
2900 index=readreg_offset(index,4);
2901
2902 base+=get_offset(basereg);
2903 base+=factor*get_offset(indexreg);
2904
2905 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
2906 unlock2(s);
2907 unlock2(baser);
2908 unlock2(index);
2909 }
2910 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2911
2912 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2913 {
2914 int basereg=baser;
2915 int indexreg=index;
2916
2917 CLOBBER_MOV;
2918 s=readreg(s,1);
2919 baser=readreg_offset(baser,4);
2920 index=readreg_offset(index,4);
2921
2922 base+=get_offset(basereg);
2923 base+=factor*get_offset(indexreg);
2924
2925 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
2926 unlock2(s);
2927 unlock2(baser);
2928 unlock2(index);
2929 }
2930 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2931
2932
2933
2934 /* Read a long from base+baser+factor*index */
2935 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2936 {
2937 int basereg=baser;
2938 int indexreg=index;
2939
2940 CLOBBER_MOV;
2941 baser=readreg_offset(baser,4);
2942 index=readreg_offset(index,4);
2943 base+=get_offset(basereg);
2944 base+=factor*get_offset(indexreg);
2945 d=writereg(d,4);
2946 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
2947 unlock2(d);
2948 unlock2(baser);
2949 unlock2(index);
2950 }
2951 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2952
2953
2954 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2955 {
2956 int basereg=baser;
2957 int indexreg=index;
2958
2959 CLOBBER_MOV;
2960 remove_offset(d,-1);
2961 baser=readreg_offset(baser,4);
2962 index=readreg_offset(index,4);
2963 base+=get_offset(basereg);
2964 base+=factor*get_offset(indexreg);
2965 d=writereg(d,2);
2966 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
2967 unlock2(d);
2968 unlock2(baser);
2969 unlock2(index);
2970 }
2971 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2972
2973
2974 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2975 {
2976 int basereg=baser;
2977 int indexreg=index;
2978
2979 CLOBBER_MOV;
2980 remove_offset(d,-1);
2981 baser=readreg_offset(baser,4);
2982 index=readreg_offset(index,4);
2983 base+=get_offset(basereg);
2984 base+=factor*get_offset(indexreg);
2985 d=writereg(d,1);
2986 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
2987 unlock2(d);
2988 unlock2(baser);
2989 unlock2(index);
2990 }
2991 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2992
2993 /* Read a long from base+factor*index */
2994 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2995 {
2996 int indexreg=index;
2997
2998 if (isconst(index)) {
2999 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3000 return;
3001 }
3002
3003 CLOBBER_MOV;
3004 index=readreg_offset(index,4);
3005 base+=get_offset(indexreg)*factor;
3006 d=writereg(d,4);
3007
3008 raw_mov_l_rm_indexed(d,base,index,factor);
3009 unlock2(index);
3010 unlock2(d);
3011 }
3012 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3013
3014
3015 /* read the long at the address contained in s+offset and store in d */
3016 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3017 {
3018 if (isconst(s)) {
3019 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3020 return;
3021 }
3022 CLOBBER_MOV;
3023 s=readreg(s,4);
3024 d=writereg(d,4);
3025
3026 raw_mov_l_rR(d,s,offset);
3027 unlock2(d);
3028 unlock2(s);
3029 }
3030 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3031
3032 /* read the word at the address contained in s+offset and store in d */
3033 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3034 {
3035 if (isconst(s)) {
3036 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3037 return;
3038 }
3039 CLOBBER_MOV;
3040 s=readreg(s,4);
3041 d=writereg(d,2);
3042
3043 raw_mov_w_rR(d,s,offset);
3044 unlock2(d);
3045 unlock2(s);
3046 }
3047 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3048
3049 /* read the word at the address contained in s+offset and store in d */
3050 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3051 {
3052 if (isconst(s)) {
3053 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3054 return;
3055 }
3056 CLOBBER_MOV;
3057 s=readreg(s,4);
3058 d=writereg(d,1);
3059
3060 raw_mov_b_rR(d,s,offset);
3061 unlock2(d);
3062 unlock2(s);
3063 }
3064 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3065
3066 /* read the long at the address contained in s+offset and store in d */
3067 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3068 {
3069 int sreg=s;
3070 if (isconst(s)) {
3071 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3072 return;
3073 }
3074 CLOBBER_MOV;
3075 s=readreg_offset(s,4);
3076 offset+=get_offset(sreg);
3077 d=writereg(d,4);
3078
3079 raw_mov_l_brR(d,s,offset);
3080 unlock2(d);
3081 unlock2(s);
3082 }
3083 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3084
3085 /* read the word at the address contained in s+offset and store in d */
3086 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3087 {
3088 int sreg=s;
3089 if (isconst(s)) {
3090 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3091 return;
3092 }
3093 CLOBBER_MOV;
3094 remove_offset(d,-1);
3095 s=readreg_offset(s,4);
3096 offset+=get_offset(sreg);
3097 d=writereg(d,2);
3098
3099 raw_mov_w_brR(d,s,offset);
3100 unlock2(d);
3101 unlock2(s);
3102 }
3103 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3104
3105 /* read the word at the address contained in s+offset and store in d */
3106 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3107 {
3108 int sreg=s;
3109 if (isconst(s)) {
3110 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3111 return;
3112 }
3113 CLOBBER_MOV;
3114 remove_offset(d,-1);
3115 s=readreg_offset(s,4);
3116 offset+=get_offset(sreg);
3117 d=writereg(d,1);
3118
3119 raw_mov_b_brR(d,s,offset);
3120 unlock2(d);
3121 unlock2(s);
3122 }
3123 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3124
3125 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3126 {
3127 int dreg=d;
3128 if (isconst(d)) {
3129 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3130 return;
3131 }
3132
3133 CLOBBER_MOV;
3134 d=readreg_offset(d,4);
3135 offset+=get_offset(dreg);
3136 raw_mov_l_Ri(d,i,offset);
3137 unlock2(d);
3138 }
3139 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3140
3141 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3142 {
3143 int dreg=d;
3144 if (isconst(d)) {
3145 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3146 return;
3147 }
3148
3149 CLOBBER_MOV;
3150 d=readreg_offset(d,4);
3151 offset+=get_offset(dreg);
3152 raw_mov_w_Ri(d,i,offset);
3153 unlock2(d);
3154 }
3155 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3156
3157 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3158 {
3159 int dreg=d;
3160 if (isconst(d)) {
3161 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3162 return;
3163 }
3164
3165 CLOBBER_MOV;
3166 d=readreg_offset(d,4);
3167 offset+=get_offset(dreg);
3168 raw_mov_b_Ri(d,i,offset);
3169 unlock2(d);
3170 }
3171 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3172
3173 /* Warning! OFFSET is byte sized only! */
3174 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3175 {
3176 if (isconst(d)) {
3177 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3178 return;
3179 }
3180 if (isconst(s)) {
3181 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3182 return;
3183 }
3184
3185 CLOBBER_MOV;
3186 s=readreg(s,4);
3187 d=readreg(d,4);
3188
3189 raw_mov_l_Rr(d,s,offset);
3190 unlock2(d);
3191 unlock2(s);
3192 }
3193 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3194
3195 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3196 {
3197 if (isconst(d)) {
3198 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3199 return;
3200 }
3201 if (isconst(s)) {
3202 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3203 return;
3204 }
3205
3206 CLOBBER_MOV;
3207 s=readreg(s,2);
3208 d=readreg(d,4);
3209 raw_mov_w_Rr(d,s,offset);
3210 unlock2(d);
3211 unlock2(s);
3212 }
3213 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3214
3215 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3216 {
3217 if (isconst(d)) {
3218 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3219 return;
3220 }
3221 if (isconst(s)) {
3222 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3223 return;
3224 }
3225
3226 CLOBBER_MOV;
3227 s=readreg(s,1);
3228 d=readreg(d,4);
3229 raw_mov_b_Rr(d,s,offset);
3230 unlock2(d);
3231 unlock2(s);
3232 }
3233 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3234
3235 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3236 {
3237 if (isconst(s)) {
3238 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3239 return;
3240 }
3241 #if USE_OFFSET
3242 if (d==s) {
3243 add_offset(d,offset);
3244 return;
3245 }
3246 #endif
3247 CLOBBER_LEA;
3248 s=readreg(s,4);
3249 d=writereg(d,4);
3250 raw_lea_l_brr(d,s,offset);
3251 unlock2(d);
3252 unlock2(s);
3253 }
3254 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3255
3256 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3257 {
3258 if (!offset) {
3259 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3260 return;
3261 }
3262 CLOBBER_LEA;
3263 s=readreg(s,4);
3264 index=readreg(index,4);
3265 d=writereg(d,4);
3266
3267 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3268 unlock2(d);
3269 unlock2(index);
3270 unlock2(s);
3271 }
3272 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3273
3274 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3275 {
3276 CLOBBER_LEA;
3277 s=readreg(s,4);
3278 index=readreg(index,4);
3279 d=writereg(d,4);
3280
3281 raw_lea_l_rr_indexed(d,s,index,factor);
3282 unlock2(d);
3283 unlock2(index);
3284 unlock2(s);
3285 }
3286 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3287
3288 /* write d to the long at the address contained in s+offset */
3289 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3290 {
3291 int dreg=d;
3292 if (isconst(d)) {
3293 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3294 return;
3295 }
3296
3297 CLOBBER_MOV;
3298 s=readreg(s,4);
3299 d=readreg_offset(d,4);
3300 offset+=get_offset(dreg);
3301
3302 raw_mov_l_bRr(d,s,offset);
3303 unlock2(d);
3304 unlock2(s);
3305 }
3306 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3307
3308 /* write the word at the address contained in s+offset and store in d */
3309 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3310 {
3311 int dreg=d;
3312
3313 if (isconst(d)) {
3314 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3315 return;
3316 }
3317
3318 CLOBBER_MOV;
3319 s=readreg(s,2);
3320 d=readreg_offset(d,4);
3321 offset+=get_offset(dreg);
3322 raw_mov_w_bRr(d,s,offset);
3323 unlock2(d);
3324 unlock2(s);
3325 }
3326 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3327
3328 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3329 {
3330 int dreg=d;
3331 if (isconst(d)) {
3332 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3333 return;
3334 }
3335
3336 CLOBBER_MOV;
3337 s=readreg(s,1);
3338 d=readreg_offset(d,4);
3339 offset+=get_offset(dreg);
3340 raw_mov_b_bRr(d,s,offset);
3341 unlock2(d);
3342 unlock2(s);
3343 }
3344 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3345
3346 MIDFUNC(1,bswap_32,(RW4 r))
3347 {
3348 int reg=r;
3349
3350 if (isconst(r)) {
3351 uae_u32 oldv=live.state[r].val;
3352 live.state[r].val=reverse32(oldv);
3353 return;
3354 }
3355
3356 CLOBBER_SW32;
3357 r=rmw(r,4,4);
3358 raw_bswap_32(r);
3359 unlock2(r);
3360 }
3361 MENDFUNC(1,bswap_32,(RW4 r))
3362
3363 MIDFUNC(1,bswap_16,(RW2 r))
3364 {
3365 if (isconst(r)) {
3366 uae_u32 oldv=live.state[r].val;
3367 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3368 (oldv&0xffff0000);
3369 return;
3370 }
3371
3372 CLOBBER_SW16;
3373 r=rmw(r,2,2);
3374
3375 raw_bswap_16(r);
3376 unlock2(r);
3377 }
3378 MENDFUNC(1,bswap_16,(RW2 r))
3379
3380
3381
3382 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3383 {
3384 int olds;
3385
3386 if (d==s) { /* How pointless! */
3387 return;
3388 }
3389 if (isconst(s)) {
3390 COMPCALL(mov_l_ri)(d,live.state[s].val);
3391 return;
3392 }
3393 olds=s;
3394 disassociate(d);
3395 s=readreg_offset(s,4);
3396 live.state[d].realreg=s;
3397 live.state[d].realind=live.nat[s].nholds;
3398 live.state[d].val=live.state[olds].val;
3399 live.state[d].validsize=4;
3400 live.state[d].dirtysize=4;
3401 set_status(d,DIRTY);
3402
3403 live.nat[s].holds[live.nat[s].nholds]=d;
3404 live.nat[s].nholds++;
3405 log_clobberreg(d);
3406 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3407 d,s,live.state[d].realind,live.nat[s].nholds); */
3408 unlock2(s);
3409 }
3410 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3411
3412 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3413 {
3414 if (isconst(s)) {
3415 COMPCALL(mov_l_mi)(d,live.state[s].val);
3416 return;
3417 }
3418 CLOBBER_MOV;
3419 s=readreg(s,4);
3420
3421 raw_mov_l_mr(d,s);
3422 unlock2(s);
3423 }
3424 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3425
3426
3427 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3428 {
3429 if (isconst(s)) {
3430 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3431 return;
3432 }
3433 CLOBBER_MOV;
3434 s=readreg(s,2);
3435
3436 raw_mov_w_mr(d,s);
3437 unlock2(s);
3438 }
3439 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3440
3441 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3442 {
3443 CLOBBER_MOV;
3444 d=writereg(d,2);
3445
3446 raw_mov_w_rm(d,s);
3447 unlock2(d);
3448 }
3449 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3450
3451 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3452 {
3453 if (isconst(s)) {
3454 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3455 return;
3456 }
3457
3458 CLOBBER_MOV;
3459 s=readreg(s,1);
3460
3461 raw_mov_b_mr(d,s);
3462 unlock2(s);
3463 }
3464 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3465
3466 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3467 {
3468 CLOBBER_MOV;
3469 d=writereg(d,1);
3470
3471 raw_mov_b_rm(d,s);
3472 unlock2(d);
3473 }
3474 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3475
3476 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3477 {
3478 set_const(d,s);
3479 return;
3480 }
3481 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3482
3483 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3484 {
3485 CLOBBER_MOV;
3486 d=writereg(d,2);
3487
3488 raw_mov_w_ri(d,s);
3489 unlock2(d);
3490 }
3491 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3492
3493 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3494 {
3495 CLOBBER_MOV;
3496 d=writereg(d,1);
3497
3498 raw_mov_b_ri(d,s);
3499 unlock2(d);
3500 }
3501 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3502
3503
3504 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3505 {
3506 CLOBBER_ADD;
3507 raw_add_l_mi(d,s) ;
3508 }
3509 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3510
3511 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3512 {
3513 CLOBBER_ADD;
3514 raw_add_w_mi(d,s) ;
3515 }
3516 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3517
3518 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3519 {
3520 CLOBBER_ADD;
3521 raw_add_b_mi(d,s) ;
3522 }
3523 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3524
3525
3526 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3527 {
3528 CLOBBER_TEST;
3529 d=readreg(d,4);
3530
3531 raw_test_l_ri(d,i);
3532 unlock2(d);
3533 }
3534 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3535
3536 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3537 {
3538 CLOBBER_TEST;
3539 d=readreg(d,4);
3540 s=readreg(s,4);
3541
3542 raw_test_l_rr(d,s);;
3543 unlock2(d);
3544 unlock2(s);
3545 }
3546 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3547
3548 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3549 {
3550 CLOBBER_TEST;
3551 d=readreg(d,2);
3552 s=readreg(s,2);
3553
3554 raw_test_w_rr(d,s);
3555 unlock2(d);
3556 unlock2(s);
3557 }
3558 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3559
3560 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3561 {
3562 CLOBBER_TEST;
3563 d=readreg(d,1);
3564 s=readreg(s,1);
3565
3566 raw_test_b_rr(d,s);
3567 unlock2(d);
3568 unlock2(s);
3569 }
3570 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3571
3572
3573 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3574 {
3575 if (isconst(d) && !needflags) {
3576 live.state[d].val &= i;
3577 return;
3578 }
3579
3580 CLOBBER_AND;
3581 d=rmw(d,4,4);
3582
3583 raw_and_l_ri(d,i);
3584 unlock2(d);
3585 }
3586 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
3587
3588 MIDFUNC(2,and_l,(RW4 d, R4 s))
3589 {
3590 CLOBBER_AND;
3591 s=readreg(s,4);
3592 d=rmw(d,4,4);
3593
3594 raw_and_l(d,s);
3595 unlock2(d);
3596 unlock2(s);
3597 }
3598 MENDFUNC(2,and_l,(RW4 d, R4 s))
3599
3600 MIDFUNC(2,and_w,(RW2 d, R2 s))
3601 {
3602 CLOBBER_AND;
3603 s=readreg(s,2);
3604 d=rmw(d,2,2);
3605
3606 raw_and_w(d,s);
3607 unlock2(d);
3608 unlock2(s);
3609 }
3610 MENDFUNC(2,and_w,(RW2 d, R2 s))
3611
3612 MIDFUNC(2,and_b,(RW1 d, R1 s))
3613 {
3614 CLOBBER_AND;
3615 s=readreg(s,1);
3616 d=rmw(d,1,1);
3617
3618 raw_and_b(d,s);
3619 unlock2(d);
3620 unlock2(s);
3621 }
3622 MENDFUNC(2,and_b,(RW1 d, R1 s))
3623
3624 // gb-- used for making an fpcr value in compemu_fpp.cpp
3625 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
3626 {
3627 CLOBBER_OR;
3628 d=rmw(d,4,4);
3629
3630 raw_or_l_rm(d,s);
3631 unlock2(d);
3632 }
3633 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
3634
3635 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
3636 {
3637 if (isconst(d) && !needflags) {
3638 live.state[d].val|=i;
3639 return;
3640 }
3641 CLOBBER_OR;
3642 d=rmw(d,4,4);
3643
3644 raw_or_l_ri(d,i);
3645 unlock2(d);
3646 }
3647 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
3648
3649 MIDFUNC(2,or_l,(RW4 d, R4 s))
3650 {
3651 if (isconst(d) && isconst(s) && !needflags) {
3652 live.state[d].val|=live.state[s].val;
3653 return;
3654 }
3655 CLOBBER_OR;
3656 s=readreg(s,4);
3657 d=rmw(d,4,4);
3658
3659 raw_or_l(d,s);
3660 unlock2(d);
3661 unlock2(s);
3662 }
3663 MENDFUNC(2,or_l,(RW4 d, R4 s))
3664
3665 MIDFUNC(2,or_w,(RW2 d, R2 s))
3666 {
3667 CLOBBER_OR;
3668 s=readreg(s,2);
3669 d=rmw(d,2,2);
3670
3671 raw_or_w(d,s);
3672 unlock2(d);
3673 unlock2(s);
3674 }
3675 MENDFUNC(2,or_w,(RW2 d, R2 s))
3676
3677 MIDFUNC(2,or_b,(RW1 d, R1 s))
3678 {
3679 CLOBBER_OR;
3680 s=readreg(s,1);
3681 d=rmw(d,1,1);
3682
3683 raw_or_b(d,s);
3684 unlock2(d);
3685 unlock2(s);
3686 }
3687 MENDFUNC(2,or_b,(RW1 d, R1 s))
3688
3689 MIDFUNC(2,adc_l,(RW4 d, R4 s))
3690 {
3691 CLOBBER_ADC;
3692 s=readreg(s,4);
3693 d=rmw(d,4,4);
3694
3695 raw_adc_l(d,s);
3696
3697 unlock2(d);
3698 unlock2(s);
3699 }
3700 MENDFUNC(2,adc_l,(RW4 d, R4 s))
3701
3702 MIDFUNC(2,adc_w,(RW2 d, R2 s))
3703 {
3704 CLOBBER_ADC;
3705 s=readreg(s,2);
3706 d=rmw(d,2,2);
3707
3708 raw_adc_w(d,s);
3709 unlock2(d);
3710 unlock2(s);
3711 }
3712 MENDFUNC(2,adc_w,(RW2 d, R2 s))
3713
3714 MIDFUNC(2,adc_b,(RW1 d, R1 s))
3715 {
3716 CLOBBER_ADC;
3717 s=readreg(s,1);
3718 d=rmw(d,1,1);
3719
3720 raw_adc_b(d,s);
3721 unlock2(d);
3722 unlock2(s);
3723 }
3724 MENDFUNC(2,adc_b,(RW1 d, R1 s))
3725
3726 MIDFUNC(2,add_l,(RW4 d, R4 s))
3727 {
3728 if (isconst(s)) {
3729 COMPCALL(add_l_ri)(d,live.state[s].val);
3730 return;
3731 }
3732
3733 CLOBBER_ADD;
3734 s=readreg(s,4);
3735 d=rmw(d,4,4);
3736
3737 raw_add_l(d,s);
3738
3739 unlock2(d);
3740 unlock2(s);
3741 }
3742 MENDFUNC(2,add_l,(RW4 d, R4 s))
3743
3744 MIDFUNC(2,add_w,(RW2 d, R2 s))
3745 {
3746 if (isconst(s)) {
3747 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
3748 return;
3749 }
3750
3751 CLOBBER_ADD;
3752 s=readreg(s,2);
3753 d=rmw(d,2,2);
3754
3755 raw_add_w(d,s);
3756 unlock2(d);
3757 unlock2(s);
3758 }
3759 MENDFUNC(2,add_w,(RW2 d, R2 s))
3760
3761 MIDFUNC(2,add_b,(RW1 d, R1 s))
3762 {
3763 if (isconst(s)) {
3764 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
3765 return;
3766 }
3767
3768 CLOBBER_ADD;
3769 s=readreg(s,1);
3770 d=rmw(d,1,1);
3771
3772 raw_add_b(d,s);
3773 unlock2(d);
3774 unlock2(s);
3775 }
3776 MENDFUNC(2,add_b,(RW1 d, R1 s))
3777
3778 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
3779 {
3780 if (!i && !needflags)
3781 return;
3782 if (isconst(d) && !needflags) {
3783 live.state[d].val-=i;
3784 return;
3785 }
3786 #if USE_OFFSET
3787 if (!needflags) {
3788 add_offset(d,-i);
3789 return;
3790 }
3791 #endif
3792
3793 CLOBBER_SUB;
3794 d=rmw(d,4,4);
3795
3796 raw_sub_l_ri(d,i);
3797 unlock2(d);
3798 }
3799 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
3800
3801 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
3802 {
3803 if (!i && !needflags)
3804 return;
3805
3806 CLOBBER_SUB;
3807 d=rmw(d,2,2);
3808
3809 raw_sub_w_ri(d,i);
3810 unlock2(d);
3811 }
3812 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
3813
3814 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
3815 {
3816 if (!i && !needflags)
3817 return;
3818
3819 CLOBBER_SUB;
3820 d=rmw(d,1,1);
3821
3822 raw_sub_b_ri(d,i);
3823
3824 unlock2(d);
3825 }
3826 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
3827
3828 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
3829 {
3830 if (!i && !needflags)
3831 return;
3832 if (isconst(d) && !needflags) {
3833 live.state[d].val+=i;
3834 return;
3835 }
3836 #if USE_OFFSET
3837 if (!needflags) {
3838 add_offset(d,i);
3839 return;
3840 }
3841 #endif
3842 CLOBBER_ADD;
3843 d=rmw(d,4,4);
3844 raw_add_l_ri(d,i);
3845 unlock2(d);
3846 }
3847 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
3848
3849 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
3850 {
3851 if (!i && !needflags)
3852 return;
3853
3854 CLOBBER_ADD;
3855 d=rmw(d,2,2);
3856
3857 raw_add_w_ri(d,i);
3858 unlock2(d);
3859 }
3860 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
3861
3862 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
3863 {
3864 if (!i && !needflags)
3865 return;
3866
3867 CLOBBER_ADD;
3868 d=rmw(d,1,1);
3869
3870 raw_add_b_ri(d,i);
3871
3872 unlock2(d);
3873 }
3874 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
3875
3876 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
3877 {
3878 CLOBBER_SBB;
3879 s=readreg(s,4);
3880 d=rmw(d,4,4);
3881
3882 raw_sbb_l(d,s);
3883 unlock2(d);
3884 unlock2(s);
3885 }
3886 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
3887
3888 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
3889 {
3890 CLOBBER_SBB;
3891 s=readreg(s,2);
3892 d=rmw(d,2,2);
3893
3894 raw_sbb_w(d,s);
3895 unlock2(d);
3896 unlock2(s);
3897 }
3898 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
3899
3900 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
3901 {
3902 CLOBBER_SBB;
3903 s=readreg(s,1);
3904 d=rmw(d,1,1);
3905
3906 raw_sbb_b(d,s);
3907 unlock2(d);
3908 unlock2(s);
3909 }
3910 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
3911
3912 MIDFUNC(2,sub_l,(RW4 d, R4 s))
3913 {
3914 if (isconst(s)) {
3915 COMPCALL(sub_l_ri)(d,live.state[s].val);
3916 return;
3917 }
3918
3919 CLOBBER_SUB;
3920 s=readreg(s,4);
3921 d=rmw(d,4,4);
3922
3923 raw_sub_l(d,s);
3924 unlock2(d);
3925 unlock2(s);
3926 }
3927 MENDFUNC(2,sub_l,(RW4 d, R4 s))
3928
3929 MIDFUNC(2,sub_w,(RW2 d, R2 s))
3930 {
3931 if (isconst(s)) {
3932 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
3933 return;
3934 }
3935
3936 CLOBBER_SUB;
3937 s=readreg(s,2);
3938 d=rmw(d,2,2);
3939
3940 raw_sub_w(d,s);
3941 unlock2(d);
3942 unlock2(s);
3943 }
3944 MENDFUNC(2,sub_w,(RW2 d, R2 s))
3945
3946 MIDFUNC(2,sub_b,(RW1 d, R1 s))
3947 {
3948 if (isconst(s)) {
3949 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
3950 return;
3951 }
3952
3953 CLOBBER_SUB;
3954 s=readreg(s,1);
3955 d=rmw(d,1,1);
3956
3957 raw_sub_b(d,s);
3958 unlock2(d);
3959 unlock2(s);
3960 }
3961 MENDFUNC(2,sub_b,(RW1 d, R1 s))
3962
3963 MIDFUNC(2,cmp_l,(R4 d, R4 s))
3964 {
3965 CLOBBER_CMP;
3966 s=readreg(s,4);
3967 d=readreg(d,4);
3968
3969 raw_cmp_l(d,s);
3970 unlock2(d);
3971 unlock2(s);
3972 }
3973 MENDFUNC(2,cmp_l,(R4 d, R4 s))
3974
3975 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
3976 {
3977 CLOBBER_CMP;
3978 r=readreg(r,4);
3979
3980 raw_cmp_l_ri(r,i);
3981 unlock2(r);
3982 }
3983 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
3984
3985 MIDFUNC(2,cmp_w,(R2 d, R2 s))
3986 {
3987 CLOBBER_CMP;
3988 s=readreg(s,2);
3989 d=readreg(d,2);
3990
3991 raw_cmp_w(d,s);
3992 unlock2(d);
3993 unlock2(s);
3994 }
3995 MENDFUNC(2,cmp_w,(R2 d, R2 s))
3996
3997 MIDFUNC(2,cmp_b,(R1 d, R1 s))
3998 {
3999 CLOBBER_CMP;
4000 s=readreg(s,1);
4001 d=readreg(d,1);
4002
4003 raw_cmp_b(d,s);
4004 unlock2(d);
4005 unlock2(s);
4006 }
4007 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4008
4009
4010 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4011 {
4012 CLOBBER_XOR;
4013 s=readreg(s,4);
4014 d=rmw(d,4,4);
4015
4016 raw_xor_l(d,s);
4017 unlock2(d);
4018 unlock2(s);
4019 }
4020 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4021
4022 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4023 {
4024 CLOBBER_XOR;
4025 s=readreg(s,2);
4026 d=rmw(d,2,2);
4027
4028 raw_xor_w(d,s);
4029 unlock2(d);
4030 unlock2(s);
4031 }
4032 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4033
4034 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4035 {
4036 CLOBBER_XOR;
4037 s=readreg(s,1);
4038 d=rmw(d,1,1);
4039
4040 raw_xor_b(d,s);
4041 unlock2(d);
4042 unlock2(s);
4043 }
4044 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4045
4046 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4047 {
4048 clobber_flags();
4049 remove_all_offsets();
4050 if (osize==4) {
4051 if (out1!=in1 && out1!=r) {
4052 COMPCALL(forget_about)(out1);
4053 }
4054 }
4055 else {
4056 tomem_c(out1);
4057 }
4058
4059 in1=readreg_specific(in1,isize,REG_PAR1);
4060 r=readreg(r,4);
4061 prepare_for_call_1(); /* This should ensure that there won't be
4062 any need for swapping nregs in prepare_for_call_2
4063 */
4064 #if USE_NORMAL_CALLING_CONVENTION
4065 raw_push_l_r(in1);
4066 #endif
4067 unlock2(in1);
4068 unlock2(r);
4069
4070 prepare_for_call_2();
4071 raw_call_r(r);
4072
4073 #if USE_NORMAL_CALLING_CONVENTION
4074 raw_inc_sp(4);
4075 #endif
4076
4077
4078 live.nat[REG_RESULT].holds[0]=out1;
4079 live.nat[REG_RESULT].nholds=1;
4080 live.nat[REG_RESULT].touched=touchcnt++;
4081
4082 live.state[out1].realreg=REG_RESULT;
4083 live.state[out1].realind=0;
4084 live.state[out1].val=0;
4085 live.state[out1].validsize=osize;
4086 live.state[out1].dirtysize=osize;
4087 set_status(out1,DIRTY);
4088 }
4089 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4090
4091 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4092 {
4093 clobber_flags();
4094 remove_all_offsets();
4095 in1=readreg_specific(in1,isize1,REG_PAR1);
4096 in2=readreg_specific(in2,isize2,REG_PAR2);
4097 r=readreg(r,4);
4098 prepare_for_call_1(); /* This should ensure that there won't be
4099 any need for swapping nregs in prepare_for_call_2
4100 */
4101 #if USE_NORMAL_CALLING_CONVENTION
4102 raw_push_l_r(in2);
4103 raw_push_l_r(in1);
4104 #endif
4105 unlock2(r);
4106 unlock2(in1);
4107 unlock2(in2);
4108 prepare_for_call_2();
4109 raw_call_r(r);
4110 #if USE_NORMAL_CALLING_CONVENTION
4111 raw_inc_sp(8);
4112 #endif
4113 }
4114 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4115
4116 /* forget_about() takes a mid-layer register */
4117 MIDFUNC(1,forget_about,(W4 r))
4118 {
4119 if (isinreg(r))
4120 disassociate(r);
4121 live.state[r].val=0;
4122 set_status(r,UNDEF);
4123 }
4124 MENDFUNC(1,forget_about,(W4 r))
4125
4126 MIDFUNC(0,nop,(void))
4127 {
4128 raw_nop();
4129 }
4130 MENDFUNC(0,nop,(void))
4131
4132
4133 MIDFUNC(1,f_forget_about,(FW r))
4134 {
4135 if (f_isinreg(r))
4136 f_disassociate(r);
4137 live.fate[r].status=UNDEF;
4138 }
4139 MENDFUNC(1,f_forget_about,(FW r))
4140
4141 MIDFUNC(1,fmov_pi,(FW r))
4142 {
4143 r=f_writereg(r);
4144 raw_fmov_pi(r);
4145 f_unlock(r);
4146 }
4147 MENDFUNC(1,fmov_pi,(FW r))
4148
4149 MIDFUNC(1,fmov_log10_2,(FW r))
4150 {
4151 r=f_writereg(r);
4152 raw_fmov_log10_2(r);
4153 f_unlock(r);
4154 }
4155 MENDFUNC(1,fmov_log10_2,(FW r))
4156
4157 MIDFUNC(1,fmov_log2_e,(FW r))
4158 {
4159 r=f_writereg(r);
4160 raw_fmov_log2_e(r);
4161 f_unlock(r);
4162 }
4163 MENDFUNC(1,fmov_log2_e,(FW r))
4164
4165 MIDFUNC(1,fmov_loge_2,(FW r))
4166 {
4167 r=f_writereg(r);
4168 raw_fmov_loge_2(r);
4169 f_unlock(r);
4170 }
4171 MENDFUNC(1,fmov_loge_2,(FW r))
4172
4173 MIDFUNC(1,fmov_1,(FW r))
4174 {
4175 r=f_writereg(r);
4176 raw_fmov_1(r);
4177 f_unlock(r);
4178 }
4179 MENDFUNC(1,fmov_1,(FW r))
4180
4181 MIDFUNC(1,fmov_0,(FW r))
4182 {
4183 r=f_writereg(r);
4184 raw_fmov_0(r);
4185 f_unlock(r);
4186 }
4187 MENDFUNC(1,fmov_0,(FW r))
4188
4189 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4190 {
4191 r=f_writereg(r);
4192 raw_fmov_rm(r,m);
4193 f_unlock(r);
4194 }
4195 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4196
4197 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4198 {
4199 r=f_writereg(r);
4200 raw_fmovi_rm(r,m);
4201 f_unlock(r);
4202 }
4203 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4204
4205 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4206 {
4207 r=f_readreg(r);
4208 raw_fmovi_mr(m,r);
4209 f_unlock(r);
4210 }
4211 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4212
4213 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4214 {
4215 r=f_writereg(r);
4216 raw_fmovs_rm(r,m);
4217 f_unlock(r);
4218 }
4219 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4220
4221 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4222 {
4223 r=f_readreg(r);
4224 raw_fmovs_mr(m,r);
4225 f_unlock(r);
4226 }
4227 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4228
4229 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4230 {
4231 r=f_readreg(r);
4232 raw_fmov_ext_mr(m,r);
4233 f_unlock(r);
4234 }
4235 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4236
4237 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4238 {
4239 r=f_readreg(r);
4240 raw_fmov_mr(m,r);
4241 f_unlock(r);
4242 }
4243 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4244
4245 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4246 {
4247 r=f_writereg(r);
4248 raw_fmov_ext_rm(r,m);
4249 f_unlock(r);
4250 }
4251 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4252
4253 MIDFUNC(2,fmov_rr,(FW d, FR s))
4254 {
4255 if (d==s) { /* How pointless! */
4256 return;
4257 }
4258 #if USE_F_ALIAS
4259 f_disassociate(d);
4260 s=f_readreg(s);
4261 live.fate[d].realreg=s;
4262 live.fate[d].realind=live.fat[s].nholds;
4263 live.fate[d].status=DIRTY;
4264 live.fat[s].holds[live.fat[s].nholds]=d;
4265 live.fat[s].nholds++;
4266 f_unlock(s);
4267 #else
4268 s=f_readreg(s);
4269 d=f_writereg(d);
4270 raw_fmov_rr(d,s);
4271 f_unlock(s);
4272 f_unlock(d);
4273 #endif
4274 }
4275 MENDFUNC(2,fmov_rr,(FW d, FR s))
4276
4277 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4278 {
4279 index=readreg(index,4);
4280
4281 raw_fldcw_m_indexed(index,base);
4282 unlock2(index);
4283 }
4284 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4285
4286 MIDFUNC(1,ftst_r,(FR r))
4287 {
4288 r=f_readreg(r);
4289 raw_ftst_r(r);
4290 f_unlock(r);
4291 }
4292 MENDFUNC(1,ftst_r,(FR r))
4293
4294 MIDFUNC(0,dont_care_fflags,(void))
4295 {
4296 f_disassociate(FP_RESULT);
4297 }
4298 MENDFUNC(0,dont_care_fflags,(void))
4299
4300 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4301 {
4302 s=f_readreg(s);
4303 d=f_writereg(d);
4304 raw_fsqrt_rr(d,s);
4305 f_unlock(s);
4306 f_unlock(d);
4307 }
4308 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4309
4310 MIDFUNC(2,fabs_rr,(FW d, FR s))
4311 {
4312 s=f_readreg(s);
4313 d=f_writereg(d);
4314 raw_fabs_rr(d,s);
4315 f_unlock(s);
4316 f_unlock(d);
4317 }
4318 MENDFUNC(2,fabs_rr,(FW d, FR s))
4319
4320 MIDFUNC(2,fsin_rr,(FW d, FR s))
4321 {
4322 s=f_readreg(s);
4323 d=f_writereg(d);
4324 raw_fsin_rr(d,s);
4325 f_unlock(s);
4326 f_unlock(d);
4327 }
4328 MENDFUNC(2,fsin_rr,(FW d, FR s))
4329
4330 MIDFUNC(2,fcos_rr,(FW d, FR s))
4331 {
4332 s=f_readreg(s);
4333 d=f_writereg(d);
4334 raw_fcos_rr(d,s);
4335 f_unlock(s);
4336 f_unlock(d);
4337 }
4338 MENDFUNC(2,fcos_rr,(FW d, FR s))
4339
4340 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4341 {
4342 s=f_readreg(s);
4343 d=f_writereg(d);
4344 raw_ftwotox_rr(d,s);
4345 f_unlock(s);
4346 f_unlock(d);
4347 }
4348 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4349
4350 MIDFUNC(2,fetox_rr,(FW d, FR s))
4351 {
4352 s=f_readreg(s);
4353 d=f_writereg(d);
4354 raw_fetox_rr(d,s);
4355 f_unlock(s);
4356 f_unlock(d);
4357 }
4358 MENDFUNC(2,fetox_rr,(FW d, FR s))
4359
4360 MIDFUNC(2,frndint_rr,(FW d, FR s))
4361 {
4362 s=f_readreg(s);
4363 d=f_writereg(d);
4364 raw_frndint_rr(d,s);
4365 f_unlock(s);
4366 f_unlock(d);
4367 }
4368 MENDFUNC(2,frndint_rr,(FW d, FR s))
4369
4370 MIDFUNC(2,flog2_rr,(FW d, FR s))
4371 {
4372 s=f_readreg(s);
4373 d=f_writereg(d);
4374 raw_flog2_rr(d,s);
4375 f_unlock(s);
4376 f_unlock(d);
4377 }
4378 MENDFUNC(2,flog2_rr,(FW d, FR s))
4379
4380 MIDFUNC(2,fneg_rr,(FW d, FR s))
4381 {
4382 s=f_readreg(s);
4383 d=f_writereg(d);
4384 raw_fneg_rr(d,s);
4385 f_unlock(s);
4386 f_unlock(d);
4387 }
4388 MENDFUNC(2,fneg_rr,(FW d, FR s))
4389
4390 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4391 {
4392 s=f_readreg(s);
4393 d=f_rmw(d);
4394 raw_fadd_rr(d,s);
4395 f_unlock(s);
4396 f_unlock(d);
4397 }
4398 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4399
4400 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4401 {
4402 s=f_readreg(s);
4403 d=f_rmw(d);
4404 raw_fsub_rr(d,s);
4405 f_unlock(s);
4406 f_unlock(d);
4407 }
4408 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4409
4410 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4411 {
4412 d=f_readreg(d);
4413 s=f_readreg(s);
4414 raw_fcmp_rr(d,s);
4415 f_unlock(s);
4416 f_unlock(d);
4417 }
4418 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4419
4420 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4421 {
4422 s=f_readreg(s);
4423 d=f_rmw(d);
4424 raw_fdiv_rr(d,s);
4425 f_unlock(s);
4426 f_unlock(d);
4427 }
4428 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4429
4430 MIDFUNC(2,frem_rr,(FRW d, FR s))
4431 {
4432 s=f_readreg(s);
4433 d=f_rmw(d);
4434 raw_frem_rr(d,s);
4435 f_unlock(s);
4436 f_unlock(d);
4437 }
4438 MENDFUNC(2,frem_rr,(FRW d, FR s))
4439
4440 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4441 {
4442 s=f_readreg(s);
4443 d=f_rmw(d);
4444 raw_frem1_rr(d,s);
4445 f_unlock(s);
4446 f_unlock(d);
4447 }
4448 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4449
4450 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4451 {
4452 s=f_readreg(s);
4453 d=f_rmw(d);
4454 raw_fmul_rr(d,s);
4455 f_unlock(s);
4456 f_unlock(d);
4457 }
4458 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4459
4460 /********************************************************************
4461 * Support functions exposed to gencomp. CREATE time *
4462 ********************************************************************/
4463
4464 int kill_rodent(int r)
4465 {
4466 return KILLTHERAT &&
4467 have_rat_stall &&
4468 (live.state[r].status==INMEM ||
4469 live.state[r].status==CLEAN ||
4470 live.state[r].status==ISCONST ||
4471 live.state[r].dirtysize==4);
4472 }
4473
4474 uae_u32 get_const(int r)
4475 {
4476 Dif (!isconst(r)) {
4477 write_log("Register %d should be constant, but isn't\n",r);
4478 abort();
4479 }
4480 return live.state[r].val;
4481 }
4482
4483 void sync_m68k_pc(void)
4484 {
4485 if (m68k_pc_offset) {
4486 add_l_ri(PC_P,m68k_pc_offset);
4487 comp_pc_p+=m68k_pc_offset;
4488 m68k_pc_offset=0;
4489 }
4490 }
4491
4492 /********************************************************************
4493 * Scratch registers management *
4494 ********************************************************************/
4495
4496 struct scratch_t {
4497 uae_u32 regs[VREGS];
4498 fpu_register fregs[VFREGS];
4499 };
4500
4501 static scratch_t scratch;
4502
4503 /********************************************************************
4504 * Support functions exposed to newcpu *
4505 ********************************************************************/
4506
4507 static inline const char *str_on_off(bool b)
4508 {
4509 return b ? "on" : "off";
4510 }
4511
4512 static __inline__ unsigned int cft_map (unsigned int f)
4513 {
4514 #ifndef HAVE_GET_WORD_UNSWAPPED
4515 return f;
4516 #else
4517 return ((f >> 8) & 255) | ((f & 255) << 8);
4518 #endif
4519 }
4520
4521 void compiler_init(void)
4522 {
4523 static bool initialized = false;
4524 if (initialized)
4525 return;
4526
4527 #ifndef WIN32
4528 // Open /dev/zero
4529 zero_fd = open("/dev/zero", O_RDWR);
4530 if (zero_fd < 0) {
4531 char str[200];
4532 sprintf(str, GetString(STR_NO_DEV_ZERO_ERR), strerror(errno));
4533 ErrorAlert(str);
4534 QuitEmulator();
4535 }
4536 #endif
4537
4538 #if JIT_DEBUG
4539 // JIT debug mode ?
4540 JITDebug = PrefsFindBool("jitdebug");
4541 #endif
4542 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4543
4544 #ifdef USE_JIT_FPU
4545 // Use JIT compiler for FPU instructions ?
4546 avoid_fpu = !PrefsFindBool("jitfpu");
4547 #else
4548 // JIT FPU is always disabled
4549 avoid_fpu = true;
4550 #endif
4551 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4552
4553 // Get size of the translation cache (in KB)
4554 cache_size = PrefsFindInt32("jitcachesize");
4555 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4556
4557 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4558 raw_init_cpu();
4559 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4560 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4561
4562 // Translation cache flush mechanism
4563 lazy_flush = PrefsFindBool("jitlazyflush");
4564 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4565 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4566
4567 // Compiler features
4568 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4569 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4570 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4571 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4572
4573 // Build compiler tables
4574 build_comp();
4575
4576 initialized = true;
4577
4578 #if PROFILE_COMPILE_TIME
4579 write_log("<JIT compiler> : gather statistics on translation time\n");
4580 emul_start_time = clock();
4581 #endif
4582 }
4583
4584 void compiler_exit(void)
4585 {
4586 #if PROFILE_COMPILE_TIME
4587 emul_end_time = clock();
4588 #endif
4589
4590 // Deallocate translation cache
4591 if (compiled_code) {
4592 vm_release(compiled_code, cache_size * 1024);
4593 compiled_code = 0;
4594 }
4595
4596 // Deallocate blockinfo pools
4597 free_blockinfo_pools();
4598
4599 #ifndef WIN32
4600 // Close /dev/zero
4601 if (zero_fd > 0)
4602 close(zero_fd);
4603 #endif
4604
4605 #if PROFILE_COMPILE_TIME
4606 write_log("### Compile Block statistics\n");
4607 write_log("Number of calls to compile_block : %d\n", compile_count);
4608 uae_u32 emul_time = emul_end_time - emul_start_time;
4609 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
4610 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
4611 100.0*double(compile_time)/double(emul_time));
4612 write_log("\n");
4613 #endif
4614 }
4615
4616 bool compiler_use_jit(void)
4617 {
4618 // Check for the "jit" prefs item
4619 if (!PrefsFindBool("jit"))
4620 return false;
4621
4622 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
4623 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
4624 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
4625 return false;
4626 }
4627
4628 // FIXME: there are currently problems with JIT compilation and anything below a 68040
4629 if (CPUType < 4) {
4630 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
4631 return false;
4632 }
4633
4634 return true;
4635 }
4636
4637 void init_comp(void)
4638 {
4639 int i;
4640 uae_s8* cb=can_byte;
4641 uae_s8* cw=can_word;
4642 uae_s8* au=always_used;
4643
4644 for (i=0;i<VREGS;i++) {
4645 live.state[i].realreg=-1;
4646 live.state[i].needflush=NF_SCRATCH;
4647 live.state[i].val=0;
4648 set_status(i,UNDEF);
4649 }
4650
4651 for (i=0;i<VFREGS;i++) {
4652 live.fate[i].status=UNDEF;
4653 live.fate[i].realreg=-1;
4654 live.fate[i].needflush=NF_SCRATCH;
4655 }
4656
4657 for (i=0;i<VREGS;i++) {
4658 if (i<16) { /* First 16 registers map to 68k registers */
4659 live.state[i].mem=((uae_u32*)&regs)+i;
4660 live.state[i].needflush=NF_TOMEM;
4661 set_status(i,INMEM);
4662 }
4663 else
4664 live.state[i].mem=scratch.regs+i;
4665 }
4666 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
4667 live.state[PC_P].needflush=NF_TOMEM;
4668 set_const(PC_P,(uae_u32)comp_pc_p);
4669
4670 live.state[FLAGX].mem=&(regflags.x);
4671 live.state[FLAGX].needflush=NF_TOMEM;
4672 set_status(FLAGX,INMEM);
4673
4674 live.state[FLAGTMP].mem=&(regflags.cznv);
4675 live.state[FLAGTMP].needflush=NF_TOMEM;
4676 set_status(FLAGTMP,INMEM);
4677
4678 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
4679 set_status(NEXT_HANDLER,UNDEF);
4680
4681 for (i=0;i<VFREGS;i++) {
4682 if (i<8) { /* First 8 registers map to 68k FPU registers */
4683 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
4684 live.fate[i].needflush=NF_TOMEM;
4685 live.fate[i].status=INMEM;
4686 }
4687 else if (i==FP_RESULT) {
4688 live.fate[i].mem=(uae_u32*)(&fpu.result);
4689 live.fate[i].needflush=NF_TOMEM;
4690 live.fate[i].status=INMEM;
4691 }
4692 else
4693 live.fate[i].mem=(uae_u32*)(scratch.fregs+i);
4694 }
4695
4696
4697 for (i=0;i<N_REGS;i++) {
4698 live.nat[i].touched=0;
4699 live.nat[i].nholds=0;
4700 live.nat[i].locked=0;
4701 if (*cb==i) {
4702 live.nat[i].canbyte=1; cb++;
4703 } else live.nat[i].canbyte=0;
4704 if (*cw==i) {
4705 live.nat[i].canword=1; cw++;
4706 } else live.nat[i].canword=0;
4707 if (*au==i) {
4708 live.nat[i].locked=1; au++;
4709 }
4710 }
4711
4712 for (i=0;i<N_FREGS;i++) {
4713 live.fat[i].touched=0;
4714 live.fat[i].nholds=0;
4715 live.fat[i].locked=0;
4716 }
4717
4718 touchcnt=1;
4719 m68k_pc_offset=0;
4720 live.flags_in_flags=TRASH;
4721 live.flags_on_stack=VALID;
4722 live.flags_are_important=1;
4723
4724 raw_fp_init();
4725 }
4726
4727 /* Only do this if you really mean it! The next call should be to init!*/
4728 void flush(int save_regs)
4729 {
4730 int fi,i;
4731
4732 log_flush();
4733 flush_flags(); /* low level */
4734 sync_m68k_pc(); /* mid level */
4735
4736 if (save_regs) {
4737 for (i=0;i<VFREGS;i++) {
4738 if (live.fate[i].needflush==NF_SCRATCH ||
4739 live.fate[i].status==CLEAN) {
4740 f_disassociate(i);
4741 }
4742 }
4743 for (i=0;i<VREGS;i++) {
4744 if (live.state[i].needflush==NF_TOMEM) {
4745 switch(live.state[i].status) {
4746 case INMEM:
4747 if (live.state[i].val) {
4748 raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
4749 log_vwrite(i);
4750 live.state[i].val=0;
4751 }
4752 break;
4753 case CLEAN:
4754 case DIRTY:
4755 remove_offset(i,-1); tomem(i); break;
4756 case ISCONST:
4757 if (i!=PC_P)
4758 writeback_const(i);
4759 break;
4760 default: break;
4761 }
4762 Dif (live.state[i].val && i!=PC_P) {
4763 write_log("Register %d still has val %x\n",
4764 i,live.state[i].val);
4765 }
4766 }
4767 }
4768 for (i=0;i<VFREGS;i++) {
4769 if (live.fate[i].needflush==NF_TOMEM &&
4770 live.fate[i].status==DIRTY) {
4771 f_evict(i);
4772 }
4773 }
4774 raw_fp_cleanup_drop();
4775 }
4776 if (needflags) {
4777 write_log("Warning! flush with needflags=1!\n");
4778 }
4779 }
4780
4781 static void flush_keepflags(void)
4782 {
4783 int fi,i;
4784
4785 for (i=0;i<VFREGS;i++) {
4786 if (live.fate[i].needflush==NF_SCRATCH ||
4787 live.fate[i].status==CLEAN) {
4788 f_disassociate(i);
4789 }
4790 }
4791 for (i=0;i<VREGS;i++) {
4792 if (live.state[i].needflush==NF_TOMEM) {
4793 switch(live.state[i].status) {
4794 case INMEM:
4795 /* Can't adjust the offset here --- that needs "add" */
4796 break;
4797 case CLEAN:
4798 case DIRTY:
4799 remove_offset(i,-1); tomem(i); break;
4800 case ISCONST:
4801 if (i!=PC_P)
4802 writeback_const(i);
4803 break;
4804 default: break;
4805 }
4806 }
4807 }
4808 for (i=0;i<VFREGS;i++) {
4809 if (live.fate[i].needflush==NF_TOMEM &&
4810 live.fate[i].status==DIRTY) {
4811 f_evict(i);
4812 }
4813 }
4814 raw_fp_cleanup_drop();
4815 }
4816
4817 void freescratch(void)
4818 {
4819 int i;
4820 for (i=0;i<N_REGS;i++)
4821 if (live.nat[i].locked && i!=4)
4822 write_log("Warning! %d is locked\n",i);
4823
4824 for (i=0;i<VREGS;i++)
4825 if (live.state[i].needflush==NF_SCRATCH) {
4826 forget_about(i);
4827 }
4828
4829 for (i=0;i<VFREGS;i++)
4830 if (live.fate[i].needflush==NF_SCRATCH) {
4831 f_forget_about(i);
4832 }
4833 }
4834
4835 /********************************************************************
4836 * Support functions, internal *
4837 ********************************************************************/
4838
4839
4840 static void align_target(uae_u32 a)
4841 {
4842 /* Fill with NOPs --- makes debugging with gdb easier */
4843 while ((uae_u32)target&(a-1))
4844 *target++=0x90;
4845 }
4846
4847 static __inline__ int isinrom(uintptr addr)
4848 {
4849 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
4850 }
4851
4852 static void flush_all(void)
4853 {
4854 int i;
4855
4856 log_flush();
4857 for (i=0;i<VREGS;i++)
4858 if (live.state[i].status==DIRTY) {
4859 if (!call_saved[live.state[i].realreg]) {
4860 tomem(i);
4861 }
4862 }
4863 for (i=0;i<VFREGS;i++)
4864 if (f_isinreg(i))
4865 f_evict(i);
4866 raw_fp_cleanup_drop();
4867 }
4868
4869 /* Make sure all registers that will get clobbered by a call are
4870 save and sound in memory */
4871 static void prepare_for_call_1(void)
4872 {
4873 flush_all(); /* If there are registers that don't get clobbered,
4874 * we should be a bit more selective here */
4875 }
4876
4877 /* We will call a C routine in a moment. That will clobber all registers,
4878 so we need to disassociate everything */
4879 static void prepare_for_call_2(void)
4880 {
4881 int i;
4882 for (i=0;i<N_REGS;i++)
4883 if (!call_saved[i] && live.nat[i].nholds>0)
4884 free_nreg(i);
4885
4886 for (i=0;i<N_FREGS;i++)
4887 if (live.fat[i].nholds>0)
4888 f_free_nreg(i);
4889
4890 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
4891 flags at the very start of the call_r
4892 functions! */
4893 }
4894
4895 /********************************************************************
4896 * Memory access and related functions, CREATE time *
4897 ********************************************************************/
4898
4899 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
4900 {
4901 next_pc_p=not_taken;
4902 taken_pc_p=taken;
4903 branch_cc=cond;
4904 }
4905
4906
4907 static uae_u32 get_handler_address(uae_u32 addr)
4908 {
4909 uae_u32 cl=cacheline(addr);
4910 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
4911 return (uae_u32)&(bi->direct_handler_to_use);
4912 }
4913
4914 static uae_u32 get_handler(uae_u32 addr)
4915 {
4916 uae_u32 cl=cacheline(addr);
4917 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
4918 return (uae_u32)bi->direct_handler_to_use;
4919 }
4920
4921 static void load_handler(int reg, uae_u32 addr)
4922 {
4923 mov_l_rm(reg,get_handler_address(addr));
4924 }
4925
4926 /* This version assumes that it is writing *real* memory, and *will* fail
4927 * if that assumption is wrong! No branches, no second chances, just
4928 * straight go-for-it attitude */
4929
4930 static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
4931 {
4932 int f=tmp;
4933
4934 if (clobber)
4935 f=source;
4936 switch(size) {
4937 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
4938 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
4939 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
4940 }
4941 forget_about(tmp);
4942 forget_about(f);
4943 }
4944
4945 void writebyte(int address, int source, int tmp)
4946 {
4947 writemem_real(address,source,20,1,tmp,0);
4948 }
4949
4950 static __inline__ void writeword_general(int address, int source, int tmp,
4951 int clobber)
4952 {
4953 writemem_real(address,source,16,2,tmp,clobber);
4954 }
4955
4956 void writeword_clobber(int address, int source, int tmp)
4957 {
4958 writeword_general(address,source,tmp,1);
4959 }
4960
4961 void writeword(int address, int source, int tmp)
4962 {
4963 writeword_general(address,source,tmp,0);
4964 }
4965
4966 static __inline__ void writelong_general(int address, int source, int tmp,
4967 int clobber)
4968 {
4969 writemem_real(address,source,12,4,tmp,clobber);
4970 }
4971
4972 void writelong_clobber(int address, int source, int tmp)
4973 {
4974 writelong_general(address,source,tmp,1);
4975 }
4976
4977 void writelong(int address, int source, int tmp)
4978 {
4979 writelong_general(address,source,tmp,0);
4980 }
4981
4982
4983
4984 /* This version assumes that it is reading *real* memory, and *will* fail
4985 * if that assumption is wrong! No branches, no second chances, just
4986 * straight go-for-it attitude */
4987
4988 static void readmem_real(int address, int dest, int offset, int size, int tmp)
4989 {
4990 int f=tmp;
4991
4992 if (size==4 && address!=dest)
4993 f=dest;
4994
4995 switch(size) {
4996 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
4997 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
4998 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
4999 }
5000 forget_about(tmp);
5001 }
5002
5003 void readbyte(int address, int dest, int tmp)
5004 {
5005 readmem_real(address,dest,8,1,tmp);
5006 }
5007
5008 void readword(int address, int dest, int tmp)
5009 {
5010 readmem_real(address,dest,4,2,tmp);
5011 }
5012
5013 void readlong(int address, int dest, int tmp)
5014 {
5015 readmem_real(address,dest,0,4,tmp);
5016 }
5017
5018 void get_n_addr(int address, int dest, int tmp)
5019 {
5020 // a is the register containing the virtual address
5021 // after the offset had been fetched
5022 int a=tmp;
5023
5024 // f is the register that will contain the offset
5025 int f=tmp;
5026
5027 // a == f == tmp if (address == dest)
5028 if (address!=dest) {
5029 a=address;
5030 f=dest;
5031 }
5032
5033 #if REAL_ADDRESSING
5034 mov_l_rr(dest, address);
5035 #elif DIRECT_ADDRESSING
5036 lea_l_brr(dest,address,MEMBaseDiff);
5037 #endif
5038 forget_about(tmp);
5039 }
5040
5041 void get_n_addr_jmp(int address, int dest, int tmp)
5042 {
5043 /* For this, we need to get the same address as the rest of UAE
5044 would --- otherwise we end up translating everything twice */
5045 get_n_addr(address,dest,tmp);
5046 }
5047
5048
5049 /* base is a register, but dp is an actual value.
5050 target is a register, as is tmp */
5051 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5052 {
5053 int reg = (dp >> 12) & 15;
5054 int regd_shift=(dp >> 9) & 3;
5055
5056 if (dp & 0x100) {
5057 int ignorebase=(dp&0x80);
5058 int ignorereg=(dp&0x40);
5059 int addbase=0;
5060 int outer=0;
5061
5062 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5063 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5064
5065 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5066 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5067
5068 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5069 if (!ignorereg) {
5070 if ((dp & 0x800) == 0)
5071 sign_extend_16_rr(target,reg);
5072 else
5073 mov_l_rr(target,reg);
5074 shll_l_ri(target,regd_shift);
5075 }
5076 else
5077 mov_l_ri(target,0);
5078
5079 /* target is now regd */
5080 if (!ignorebase)
5081 add_l(target,base);
5082 add_l_ri(target,addbase);
5083 if (dp&0x03) readlong(target,target,tmp);
5084 } else { /* do the getlong first, then add regd */
5085 if (!ignorebase) {
5086 mov_l_rr(target,base);
5087 add_l_ri(target,addbase);
5088 }
5089 else
5090 mov_l_ri(target,addbase);
5091 if (dp&0x03) readlong(target,target,tmp);
5092
5093 if (!ignorereg) {
5094 if ((dp & 0x800) == 0)
5095 sign_extend_16_rr(tmp,reg);
5096 else
5097 mov_l_rr(tmp,reg);
5098 shll_l_ri(tmp,regd_shift);
5099 /* tmp is now regd */
5100 add_l(target,tmp);
5101 }
5102 }
5103 add_l_ri(target,outer);
5104 }
5105 else { /* 68000 version */
5106 if ((dp & 0x800) == 0) { /* Sign extend */
5107 sign_extend_16_rr(target,reg);
5108 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5109 }
5110 else {
5111 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5112 }
5113 }
5114 forget_about(tmp);
5115 }
5116
5117
5118
5119
5120
5121 void set_cache_state(int enabled)
5122 {
5123 if (enabled!=letit)
5124 flush_icache_hard(77);
5125 letit=enabled;
5126 }
5127
5128 int get_cache_state(void)
5129 {
5130 return letit;
5131 }
5132
5133 uae_u32 get_jitted_size(void)
5134 {
5135 if (compiled_code)
5136 return current_compile_p-compiled_code;
5137 return 0;
5138 }
5139
5140 void alloc_cache(void)
5141 {
5142 if (compiled_code) {
5143 flush_icache_hard(6);
5144 vm_release(compiled_code, cache_size * 1024);
5145 compiled_code = 0;
5146 }
5147
5148 if (cache_size == 0)
5149 return;
5150
5151 while (!compiled_code && cache_size) {
5152 if ((compiled_code = (uae_u8 *)vm_acquire(cache_size * 1024)) == VM_MAP_FAILED) {
5153 compiled_code = 0;
5154 cache_size /= 2;
5155 }
5156 }
5157 vm_protect(compiled_code, cache_size, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5158
5159 if (compiled_code) {
5160 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5161 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5162 current_compile_p = compiled_code;
5163 current_cache_size = 0;
5164 }
5165 }
5166
5167
5168
5169 extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM;
5170
5171 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5172 {
5173 uae_u32 k1=0;
5174 uae_u32 k2=0;
5175 uae_s32 len=bi->len;
5176 uae_u32 tmp=bi->min_pcp;
5177 uae_u32* pos;
5178
5179 len+=(tmp&3);
5180 tmp&=(~3);
5181 pos=(uae_u32*)tmp;
5182
5183 if (len<0 || len>MAX_CHECKSUM_LEN) {
5184 *c1=0;
5185 *c2=0;
5186 }
5187 else {
5188 while (len>0) {
5189 k1+=*pos;
5190 k2^=*pos;
5191 pos++;
5192 len-=4;
5193 }
5194 *c1=k1;
5195 *c2=k2;
5196 }
5197 }
5198
5199 static void show_checksum(blockinfo* bi)
5200 {
5201 uae_u32 k1=0;
5202 uae_u32 k2=0;
5203 uae_s32 len=bi->len;
5204 uae_u32 tmp=(uae_u32)bi->pc_p;
5205 uae_u32* pos;
5206
5207 len+=(tmp&3);
5208 tmp&=(~3);
5209 pos=(uae_u32*)tmp;
5210
5211 if (len<0 || len>MAX_CHECKSUM_LEN) {
5212 return;
5213 }
5214 else {
5215 while (len>0) {
5216 write_log("%08x ",*pos);
5217 pos++;
5218 len-=4;
5219 }
5220 write_log(" bla\n");
5221 }
5222 }
5223
5224
5225 int check_for_cache_miss(void)
5226 {
5227 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5228
5229 if (bi) {
5230 int cl=cacheline(regs.pc_p);
5231 if (bi!=cache_tags[cl+1].bi) {
5232 raise_in_cl_list(bi);
5233 return 1;
5234 }
5235 }
5236 return 0;
5237 }
5238
5239
5240 static void recompile_block(void)
5241 {
5242 /* An existing block's countdown code has expired. We need to make
5243 sure that execute_normal doesn't refuse to recompile due to a
5244 perceived cache miss... */
5245 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5246
5247 Dif (!bi)
5248 abort();
5249 raise_in_cl_list(bi);
5250 execute_normal();
5251 return;
5252 }
5253 static void cache_miss(void)
5254 {
5255 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5256 uae_u32 cl=cacheline(regs.pc_p);
5257 blockinfo* bi2=get_blockinfo(cl);
5258
5259 if (!bi) {
5260 execute_normal(); /* Compile this block now */
5261 return;
5262 }
5263 Dif (!bi2 || bi==bi2) {
5264 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5265 abort();
5266 }
5267 raise_in_cl_list(bi);
5268 return;
5269 }
5270
5271 static int called_check_checksum(blockinfo* bi);
5272
5273 static inline int block_check_checksum(blockinfo* bi)
5274 {
5275 uae_u32 c1,c2;
5276 int isgood;
5277
5278 if (bi->status!=BI_NEED_CHECK)
5279 return 1; /* This block is in a checked state */
5280
5281 checksum_count++;
5282 if (bi->c1 || bi->c2)
5283 calc_checksum(bi,&c1,&c2);
5284 else {
5285 c1=c2=1; /* Make sure it doesn't match */
5286 }
5287
5288 isgood=(c1==bi->c1 && c2==bi->c2);
5289 if (isgood) {
5290 /* This block is still OK. So we reactivate. Of course, that
5291 means we have to move it into the needs-to-be-flushed list */
5292 bi->handler_to_use=bi->handler;
5293 set_dhtu(bi,bi->direct_handler);
5294 bi->status=BI_CHECKING;
5295 isgood=called_check_checksum(bi);
5296 }
5297 if (isgood) {
5298 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5299 c1,c2,bi->c1,bi->c2);*/
5300 remove_from_list(bi);
5301 add_to_active(bi);
5302 raise_in_cl_list(bi);
5303 bi->status=BI_ACTIVE;
5304 }
5305 else {
5306 /* This block actually changed. We need to invalidate it,
5307 and set it up to be recompiled */
5308 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5309 c1,c2,bi->c1,bi->c2); */
5310 invalidate_block(bi);
5311 raise_in_cl_list(bi);
5312 }
5313 return isgood;
5314 }
5315
5316 static int called_check_checksum(blockinfo* bi)
5317 {
5318 dependency* x=bi->deplist;
5319 int isgood=1;
5320 int i;
5321
5322 for (i=0;i<2 && isgood;i++) {
5323 if (bi->dep[i].jmp_off) {
5324 isgood=block_check_checksum(bi->dep[i].target);
5325 }
5326 }
5327 return isgood;
5328 }
5329
5330 static void check_checksum(void)
5331 {
5332 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5333 uae_u32 cl=cacheline(regs.pc_p);
5334 blockinfo* bi2=get_blockinfo(cl);
5335
5336 /* These are not the droids you are looking for... */
5337 if (!bi) {
5338 /* Whoever is the primary target is in a dormant state, but
5339 calling it was accidental, and we should just compile this
5340 new block */
5341 execute_normal();
5342 return;
5343 }
5344 if (bi!=bi2) {
5345 /* The block was hit accidentally, but it does exist. Cache miss */
5346 cache_miss();
5347 return;
5348 }
5349
5350 if (!block_check_checksum(bi))
5351 execute_normal();
5352 }
5353
5354 static __inline__ void match_states(blockinfo* bi)
5355 {
5356 int i;
5357 smallstate* s=&(bi->env);
5358
5359 if (bi->status==BI_NEED_CHECK) {
5360 block_check_checksum(bi);
5361 }
5362 if (bi->status==BI_ACTIVE ||
5363 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5364 block makes (about not using
5365 certain vregs) */
5366 for (i=0;i<16;i++) {
5367 if (s->virt[i]==L_UNNEEDED) {
5368 // write_log("unneeded reg %d at %p\n",i,target);
5369 COMPCALL(forget_about)(i); // FIXME
5370 }
5371 }
5372 }
5373 flush(1);
5374
5375 /* And now deal with the *demands* the block makes */
5376 for (i=0;i<N_REGS;i++) {
5377 int v=s->nat[i];
5378 if (v>=0) {
5379 // printf("Loading reg %d into %d at %p\n",v,i,target);
5380 readreg_specific(v,4,i);
5381 // do_load_reg(i,v);
5382 // setlock(i);
5383 }
5384 }
5385 for (i=0;i<N_REGS;i++) {
5386 int v=s->nat[i];
5387 if (v>=0) {
5388 unlock2(i);
5389 }
5390 }
5391 }
5392
5393 static uae_u8 popallspace[1024]; /* That should be enough space */
5394
5395 static __inline__ void create_popalls(void)
5396 {
5397 int i,r;
5398
5399 current_compile_p=popallspace;
5400 set_target(current_compile_p);
5401 #if USE_PUSH_POP
5402 /* If we can't use gcc inline assembly, we need to pop some
5403 registers before jumping back to the various get-out routines.
5404 This generates the code for it.
5405 */
5406 popall_do_nothing=current_compile_p;
5407 for (i=0;i<N_REGS;i++) {
5408 if (need_to_preserve[i])
5409 raw_pop_l_r(i);
5410 }
5411 raw_jmp((uae_u32)do_nothing);
5412 align_target(32);
5413
5414 popall_execute_normal=get_target();
5415 for (i=0;i<N_REGS;i++) {
5416 if (need_to_preserve[i])
5417 raw_pop_l_r(i);
5418 }
5419 raw_jmp((uae_u32)execute_normal);
5420 align_target(32);
5421
5422 popall_cache_miss=get_target();
5423 for (i=0;i<N_REGS;i++) {
5424 if (need_to_preserve[i])
5425 raw_pop_l_r(i);
5426 }
5427 raw_jmp((uae_u32)cache_miss);
5428 align_target(32);
5429
5430 popall_recompile_block=get_target();
5431 for (i=0;i<N_REGS;i++) {
5432 if (need_to_preserve[i])
5433 raw_pop_l_r(i);
5434 }
5435 raw_jmp((uae_u32)recompile_block);
5436 align_target(32);
5437
5438 popall_exec_nostats=get_target();
5439 for (i=0;i<N_REGS;i++) {
5440 if (need_to_preserve[i])
5441 raw_pop_l_r(i);
5442 }
5443 raw_jmp((uae_u32)exec_nostats);
5444 align_target(32);
5445
5446 popall_check_checksum=get_target();
5447 for (i=0;i<N_REGS;i++) {
5448 if (need_to_preserve[i])
5449 raw_pop_l_r(i);
5450 }
5451 raw_jmp((uae_u32)check_checksum);
5452 align_target(32);
5453
5454 current_compile_p=get_target();
5455 #else
5456 popall_exec_nostats=(void *)exec_nostats;
5457 popall_execute_normal=(void *)execute_normal;
5458 popall_cache_miss=(void *)cache_miss;
5459 popall_recompile_block=(void *)recompile_block;
5460 popall_do_nothing=(void *)do_nothing;
5461 popall_check_checksum=(void *)check_checksum;
5462 pushall_call_handler=get_target();
5463 #endif
5464
5465 /* And now, the code to do the matching pushes and then jump
5466 into a handler routine */
5467 pushall_call_handler=get_target();
5468 #if USE_PUSH_POP
5469 for (i=N_REGS;i--;) {
5470 if (need_to_preserve[i])
5471 raw_push_l_r(i);
5472 }
5473 #endif
5474 r=REG_PC_TMP;
5475 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5476 raw_and_l_ri(r,TAGMASK);
5477 raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5478 }
5479
5480 static __inline__ void reset_lists(void)
5481 {
5482 int i;
5483
5484 for (i=0;i<MAX_HOLD_BI;i++)
5485 hold_bi[i]=NULL;
5486 active=NULL;
5487 dormant=NULL;
5488 }
5489
5490 static void prepare_block(blockinfo* bi)
5491 {
5492 int i;
5493
5494 set_target(current_compile_p);
5495 align_target(32);
5496 bi->direct_pen=(cpuop_func *)get_target();
5497 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5498 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5499 raw_jmp((uae_u32)popall_execute_normal);
5500
5501 align_target(32);
5502 bi->direct_pcc=(cpuop_func *)get_target();
5503 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5504 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5505 raw_jmp((uae_u32)popall_check_checksum);
5506
5507 align_target(32);
5508 current_compile_p=get_target();
5509
5510 bi->deplist=NULL;
5511 for (i=0;i<2;i++) {
5512 bi->dep[i].prev_p=NULL;
5513 bi->dep[i].next=NULL;
5514 }
5515 bi->env=default_ss;
5516 bi->status=BI_INVALID;
5517 bi->havestate=0;
5518 //bi->env=empty_ss;
5519 }
5520
5521 void build_comp(void)
5522 {
5523 int i;
5524 int jumpcount=0;
5525 unsigned long opcode;
5526 struct comptbl* tbl=op_smalltbl_0_comp_ff;
5527 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
5528 int count;
5529 int cpu_level = 0; // 68000 (default)
5530 if (CPUType == 4)
5531 cpu_level = 4; // 68040 with FPU
5532 else {
5533 if (FPUType)
5534 cpu_level = 3; // 68020 with FPU
5535 else if (CPUType >= 2)
5536 cpu_level = 2; // 68020
5537 else if (CPUType == 1)
5538 cpu_level = 1;
5539 }
5540 struct cputbl *nfctbl = (
5541 cpu_level == 4 ? op_smalltbl_0_nf
5542 : cpu_level == 3 ? op_smalltbl_1_nf
5543 : cpu_level == 2 ? op_smalltbl_2_nf
5544 : cpu_level == 1 ? op_smalltbl_3_nf
5545 : op_smalltbl_4_nf);
5546
5547 write_log ("<JIT compiler> : building compiler function tables\n");
5548
5549 for (opcode = 0; opcode < 65536; opcode++) {
5550 nfcpufunctbl[opcode] = op_illg_1;
5551 compfunctbl[opcode] = NULL;
5552 nfcompfunctbl[opcode] = NULL;
5553 prop[opcode].use_flags = 0x1f;
5554 prop[opcode].set_flags = 0x1f;
5555 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
5556 }
5557
5558 for (i = 0; tbl[i].opcode < 65536; i++) {
5559 int cflow = table68k[tbl[i].opcode].cflow;
5560 prop[cft_map(tbl[i].opcode)].cflow = cflow;
5561
5562 int uses_fpu = tbl[i].specific & 32;
5563 if (uses_fpu && avoid_fpu)
5564 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
5565 else
5566 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
5567 }
5568
5569 for (i = 0; nftbl[i].opcode < 65536; i++) {
5570 int uses_fpu = tbl[i].specific & 32;
5571 if (uses_fpu && avoid_fpu)
5572 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
5573 else
5574 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
5575
5576 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
5577 }
5578
5579 for (i = 0; nfctbl[i].handler; i++) {
5580 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
5581 }
5582
5583 for (opcode = 0; opcode < 65536; opcode++) {
5584 compop_func *f;
5585 compop_func *nff;
5586 cpuop_func *nfcf;
5587 int isaddx,cflow;
5588
5589 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
5590 continue;
5591
5592 if (table68k[opcode].handler != -1) {
5593 f = compfunctbl[cft_map(table68k[opcode].handler)];
5594 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
5595 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
5596 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
5597 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
5598 prop[cft_map(opcode)].cflow = cflow;
5599 prop[cft_map(opcode)].is_addx = isaddx;
5600 compfunctbl[cft_map(opcode)] = f;
5601 nfcompfunctbl[cft_map(opcode)] = nff;
5602 Dif (nfcf == op_illg_1)
5603 abort();
5604 nfcpufunctbl[cft_map(opcode)] = nfcf;
5605 }
5606 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
5607 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
5608 }
5609 for (i = 0; nfctbl[i].handler != NULL; i++) {
5610 if (nfctbl[i].specific)
5611 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
5612 }
5613
5614 count=0;
5615 for (opcode = 0; opcode < 65536; opcode++) {
5616 if (compfunctbl[cft_map(opcode)])
5617 count++;
5618 }
5619 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
5620
5621 /* Initialise state */
5622 create_popalls();
5623 alloc_cache();
5624 reset_lists();
5625
5626 for (i=0;i<TAGSIZE;i+=2) {
5627 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
5628 cache_tags[i+1].bi=NULL;
5629 }
5630
5631 #if 0
5632 for (i=0;i<N_REGS;i++) {
5633 empty_ss.nat[i].holds=-1;
5634 empty_ss.nat[i].validsize=0;
5635 empty_ss.nat[i].dirtysize=0;
5636 }
5637 #endif
5638 for (i=0;i<VREGS;i++) {
5639 empty_ss.virt[i]=L_NEEDED;
5640 }
5641 for (i=0;i<N_REGS;i++) {
5642 empty_ss.nat[i]=L_UNKNOWN;
5643 }
5644 default_ss=empty_ss;
5645 }
5646
5647
5648 static void flush_icache_none(int n)
5649 {
5650 /* Nothing to do. */
5651 }
5652
5653 static void flush_icache_hard(int n)
5654 {
5655 uae_u32 i;
5656 blockinfo* bi, *dbi;
5657
5658 hard_flush_count++;
5659 #if 0
5660 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
5661 n,regs.pc,regs.pc_p,current_cache_size/1024);
5662 current_cache_size = 0;
5663 #endif
5664 bi=active;
5665 while(bi) {
5666 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
5667 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
5668 dbi=bi; bi=bi->next;
5669 free_blockinfo(dbi);
5670 }
5671 bi=dormant;
5672 while(bi) {
5673 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
5674 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
5675 dbi=bi; bi=bi->next;
5676 free_blockinfo(dbi);
5677 }
5678
5679 reset_lists();
5680 if (!compiled_code)
5681 return;
5682 current_compile_p=compiled_code;
5683 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
5684 }
5685
5686
5687 /* "Soft flushing" --- instead of actually throwing everything away,
5688 we simply mark everything as "needs to be checked".
5689 */
5690
5691 static inline void flush_icache_lazy(int n)
5692 {
5693 uae_u32 i;
5694 blockinfo* bi;
5695 blockinfo* bi2;
5696
5697 soft_flush_count++;
5698 if (!active)
5699 return;
5700
5701 bi=active;
5702 while (bi) {
5703 uae_u32 cl=cacheline(bi->pc_p);
5704 if (bi->status==BI_INVALID ||
5705 bi->status==BI_NEED_RECOMP) {
5706 if (bi==cache_tags[cl+1].bi)
5707 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
5708 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
5709 set_dhtu(bi,bi->direct_pen);
5710 bi->status=BI_INVALID;
5711 }
5712 else {
5713 if (bi==cache_tags[cl+1].bi)
5714 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
5715 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
5716 set_dhtu(bi,bi->direct_pcc);
5717 bi->status=BI_NEED_CHECK;
5718 }
5719 bi2=bi;
5720 bi=bi->next;
5721 }
5722 /* bi2 is now the last entry in the active list */
5723 bi2->next=dormant;
5724 if (dormant)
5725 dormant->prev_p=&(bi2->next);
5726
5727 dormant=active;
5728 active->prev_p=&dormant;
5729 active=NULL;
5730 }
5731
5732 static void catastrophe(void)
5733 {
5734 abort();
5735 }
5736
5737 int failure;
5738
5739 #define TARGET_M68K 0
5740 #define TARGET_POWERPC 1
5741 #define TARGET_X86 2
5742 #if defined(i386) || defined(__i386__)
5743 #define TARGET_NATIVE TARGET_X86
5744 #endif
5745 #if defined(powerpc) || defined(__powerpc__)
5746 #define TARGET_NATIVE TARGET_POWERPC
5747 #endif
5748
5749 #ifdef ENABLE_MON
5750 static uae_u32 mon_read_byte_jit(uae_u32 addr)
5751 {
5752 uae_u8 *m = (uae_u8 *)addr;
5753 return (uae_u32)(*m);
5754 }
5755
5756 static void mon_write_byte_jit(uae_u32 addr, uae_u32 b)
5757 {
5758 uae_u8 *m = (uae_u8 *)addr;
5759 *m = b;
5760 }
5761 #endif
5762
5763 void disasm_block(int target, uint8 * start, size_t length)
5764 {
5765 if (!JITDebug)
5766 return;
5767
5768 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
5769 char disasm_str[200];
5770 sprintf(disasm_str, "%s $%x $%x",
5771 target == TARGET_M68K ? "d68" :
5772 target == TARGET_X86 ? "d86" :
5773 target == TARGET_POWERPC ? "d" : "x",
5774 start, start + length - 1);
5775
5776 uae_u32 (*old_mon_read_byte)(uae_u32) = mon_read_byte;
5777 void (*old_mon_write_byte)(uae_u32, uae_u32) = mon_write_byte;
5778
5779 mon_read_byte = mon_read_byte_jit;
5780 mon_write_byte = mon_write_byte_jit;
5781
5782 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
5783 mon(4, arg);
5784
5785 mon_read_byte = old_mon_read_byte;
5786 mon_write_byte = old_mon_write_byte;
5787 #endif
5788 }
5789
5790 static inline void disasm_native_block(uint8 *start, size_t length)
5791 {
5792 disasm_block(TARGET_NATIVE, start, length);
5793 }
5794
5795 static inline void disasm_m68k_block(uint8 *start, size_t length)
5796 {
5797 disasm_block(TARGET_M68K, start, length);
5798 }
5799
5800 #ifdef HAVE_GET_WORD_UNSWAPPED
5801 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
5802 #else
5803 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
5804 #endif
5805
5806 #if JIT_DEBUG
5807 static uae_u8 *last_regs_pc_p = 0;
5808 static uae_u8 *last_compiled_block_addr = 0;
5809
5810 void compiler_dumpstate(void)
5811 {
5812 if (!JITDebug)
5813 return;
5814
5815 write_log("### Host addresses\n");
5816 write_log("MEM_BASE : %x\n", MEMBaseDiff);
5817 write_log("PC_P : %p\n", &regs.pc_p);
5818 write_log("SPCFLAGS : %p\n", &regs.spcflags);
5819 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
5820 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
5821 write_log("\n");
5822
5823 write_log("### M68k processor state\n");
5824 m68k_dumpstate(0);
5825 write_log("\n");
5826
5827 write_log("### Block in Mac address space\n");
5828 write_log("M68K block : %p\n",
5829 (void *)get_virtual_address(last_regs_pc_p));
5830 write_log("Native block : %p (%d bytes)\n",
5831 (void *)get_virtual_address(last_compiled_block_addr),
5832 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
5833 write_log("\n");
5834 }
5835 #endif
5836
5837 static void compile_block(cpu_history* pc_hist, int blocklen)
5838 {
5839 if (letit && compiled_code) {
5840 #if PROFILE_COMPILE_TIME
5841 compile_count++;
5842 clock_t start_time = clock();
5843 #endif
5844 #if JIT_DEBUG
5845 bool disasm_block = false;
5846 #endif
5847
5848 /* OK, here we need to 'compile' a block */
5849 int i;
5850 int r;
5851 int was_comp=0;
5852 uae_u8 liveflags[MAXRUN+1];
5853 uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
5854 uae_u32 min_pcp=max_pcp;
5855 uae_u32 cl=cacheline(pc_hist[0].location);
5856 void* specflags=(void*)&regs.spcflags;
5857 blockinfo* bi=NULL;
5858 blockinfo* bi2;
5859 int extra_len=0;
5860
5861 redo_current_block=0;
5862 if (current_compile_p>=max_compile_start)
5863 flush_icache_hard(7);
5864
5865 alloc_blockinfos();
5866
5867 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
5868 bi2=get_blockinfo(cl);
5869
5870 optlev=bi->optlevel;
5871 if (bi->status!=BI_INVALID) {
5872 Dif (bi!=bi2) {
5873 /* I don't think it can happen anymore. Shouldn't, in
5874 any case. So let's make sure... */
5875 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
5876 bi->count,bi->optlevel,bi->handler_to_use,
5877 cache_tags[cl].handler);
5878 abort();
5879 }
5880
5881 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
5882 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
5883 /* What the heck? We are not supposed to be here! */
5884 abort();
5885 }
5886 }
5887 if (bi->count==-1) {
5888 optlev++;
5889 while (!optcount[optlev])
5890 optlev++;
5891 bi->count=optcount[optlev]-1;
5892 }
5893 current_block_pc_p=(uae_u32)pc_hist[0].location;
5894
5895 remove_deps(bi); /* We are about to create new code */
5896 bi->optlevel=optlev;
5897 bi->pc_p=(uae_u8*)pc_hist[0].location;
5898
5899 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
5900 i=blocklen;
5901 while (i--) {
5902 uae_u16* currpcp=pc_hist[i].location;
5903 uae_u32 op=DO_GET_OPCODE(currpcp);
5904
5905 if ((uae_u32)currpcp<min_pcp)
5906 min_pcp=(uae_u32)currpcp;
5907 if ((uae_u32)currpcp>max_pcp)
5908 max_pcp=(uae_u32)currpcp;
5909
5910 liveflags[i]=((liveflags[i+1]&
5911 (~prop[op].set_flags))|
5912 prop[op].use_flags);
5913 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
5914 liveflags[i]&= ~FLAG_Z;
5915 }
5916
5917 bi->needed_flags=liveflags[0];
5918
5919 align_target(32);
5920 was_comp=0;
5921
5922 bi->direct_handler=(cpuop_func *)get_target();
5923 set_dhtu(bi,bi->direct_handler);
5924 bi->status=BI_COMPILING;
5925 current_block_start_target=(uae_u32)get_target();
5926
5927 log_startblock();
5928
5929 if (bi->count>=0) { /* Need to generate countdown code */
5930 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
5931 raw_sub_l_mi((uae_u32)&(bi->count),1);
5932 raw_jl((uae_u32)popall_recompile_block);
5933 }
5934 if (optlev==0) { /* No need to actually translate */
5935 /* Execute normally without keeping stats */
5936 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
5937 raw_jmp((uae_u32)popall_exec_nostats);
5938 }
5939 else {
5940 reg_alloc_run=0;
5941 next_pc_p=0;
5942 taken_pc_p=0;
5943 branch_cc=0;
5944
5945 comp_pc_p=(uae_u8*)pc_hist[0].location;
5946 init_comp();
5947 was_comp=1;
5948
5949 #if JIT_DEBUG
5950 if (JITDebug) {
5951 raw_mov_l_mi((uae_u32)&last_regs_pc_p,(uae_u32)pc_hist[0].location);
5952 raw_mov_l_mi((uae_u32)&last_compiled_block_addr,(uae_u32)current_block_start_target);
5953 }
5954 #endif
5955
5956 for (i=0;i<blocklen &&
5957 get_target_noopt()<max_compile_start;i++) {
5958 cpuop_func **cputbl;
5959 compop_func **comptbl;
5960 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
5961 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
5962 if (!needed_flags) {
5963 cputbl=nfcpufunctbl;
5964 comptbl=nfcompfunctbl;
5965 }
5966 else {
5967 cputbl=cpufunctbl;
5968 comptbl=compfunctbl;
5969 }
5970
5971 failure = 1; // gb-- defaults to failure state
5972 if (comptbl[opcode] && optlev>1) {
5973 failure=0;
5974 if (!was_comp) {
5975 comp_pc_p=(uae_u8*)pc_hist[i].location;
5976 init_comp();
5977 }
5978 was_comp++;
5979
5980 comptbl[opcode](opcode);
5981 freescratch();
5982 if (!(liveflags[i+1] & FLAG_CZNV)) {
5983 /* We can forget about flags */
5984 dont_care_flags();
5985 }
5986 #if INDIVIDUAL_INST
5987 flush(1);
5988 nop();
5989 flush(1);
5990 was_comp=0;
5991 #endif
5992 }
5993
5994 if (failure) {
5995 if (was_comp) {
5996 flush(1);
5997 was_comp=0;
5998 }
5999 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6000 #if USE_NORMAL_CALLING_CONVENTION
6001 raw_push_l_r(REG_PAR1);
6002 #endif
6003 raw_mov_l_mi((uae_u32)&regs.pc_p,
6004 (uae_u32)pc_hist[i].location);
6005 raw_call((uae_u32)cputbl[opcode]);
6006 //raw_add_l_mi((uae_u32)&oink,1); // FIXME
6007 #if USE_NORMAL_CALLING_CONVENTION
6008 raw_inc_sp(4);
6009 #endif
6010 if (needed_flags) {
6011 //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode+65536);
6012 }
6013 else {
6014 //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode);
6015 }
6016
6017 if (i < blocklen - 1) {
6018 uae_s8* branchadd;
6019
6020 raw_mov_l_rm(0,(uae_u32)specflags);
6021 raw_test_l_rr(0,0);
6022 raw_jz_b_oponly();
6023 branchadd=(uae_s8 *)get_target();
6024 emit_byte(0);
6025 raw_jmp((uae_u32)popall_do_nothing);
6026 *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
6027 }
6028 }
6029 }
6030 #if 1 /* This isn't completely kosher yet; It really needs to be
6031 be integrated into a general inter-block-dependency scheme */
6032 if (next_pc_p && taken_pc_p &&
6033 was_comp && taken_pc_p==current_block_pc_p) {
6034 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6035 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6036 uae_u8 x=bi1->needed_flags;
6037
6038 if (x==0xff || 1) { /* To be on the safe side */
6039 uae_u16* next=(uae_u16*)next_pc_p;
6040 uae_u32 op=DO_GET_OPCODE(next);
6041
6042 x=0x1f;
6043 x&=(~prop[op].set_flags);
6044 x|=prop[op].use_flags;
6045 }
6046
6047 x|=bi2->needed_flags;
6048 if (!(x & FLAG_CZNV)) {
6049 /* We can forget about flags */
6050 dont_care_flags();
6051 extra_len+=2; /* The next instruction now is part of this
6052 block */
6053 }
6054
6055 }
6056 #endif
6057 log_flush();
6058
6059 if (next_pc_p) { /* A branch was registered */
6060 uae_u32 t1=next_pc_p;
6061 uae_u32 t2=taken_pc_p;
6062 int cc=branch_cc;
6063
6064 uae_u32* branchadd;
6065 uae_u32* tba;
6066 bigstate tmp;
6067 blockinfo* tbi;
6068
6069 if (taken_pc_p<next_pc_p) {
6070 /* backward branch. Optimize for the "taken" case ---
6071 which means the raw_jcc should fall through when
6072 the 68k branch is taken. */
6073 t1=taken_pc_p;
6074 t2=next_pc_p;
6075 cc=branch_cc^1;
6076 }
6077
6078 tmp=live; /* ouch! This is big... */
6079 raw_jcc_l_oponly(cc);
6080 branchadd=(uae_u32*)get_target();
6081 emit_long(0);
6082
6083 /* predicted outcome */
6084 tbi=get_blockinfo_addr_new((void*)t1,1);
6085 match_states(tbi);
6086 raw_cmp_l_mi((uae_u32)specflags,0);
6087 raw_jcc_l_oponly(4);
6088 tba=(uae_u32*)get_target();
6089 emit_long(get_handler(t1)-((uae_u32)tba+4));
6090 raw_mov_l_mi((uae_u32)&regs.pc_p,t1);
6091 raw_jmp((uae_u32)popall_do_nothing);
6092 create_jmpdep(bi,0,tba,t1);
6093
6094 align_target(16);
6095 /* not-predicted outcome */
6096 *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6097 live=tmp; /* Ouch again */
6098 tbi=get_blockinfo_addr_new((void*)t2,1);
6099 match_states(tbi);
6100
6101 //flush(1); /* Can only get here if was_comp==1 */
6102 raw_cmp_l_mi((uae_u32)specflags,0);
6103 raw_jcc_l_oponly(4);
6104 tba=(uae_u32*)get_target();
6105 emit_long(get_handler(t2)-((uae_u32)tba+4));
6106 raw_mov_l_mi((uae_u32)&regs.pc_p,t2);
6107 raw_jmp((uae_u32)popall_do_nothing);
6108 create_jmpdep(bi,1,tba,t2);
6109 }
6110 else
6111 {
6112 if (was_comp) {
6113 flush(1);
6114 }
6115
6116 /* Let's find out where next_handler is... */
6117 if (was_comp && isinreg(PC_P)) {
6118 r=live.state[PC_P].realreg;
6119 raw_and_l_ri(r,TAGMASK);
6120 int r2 = (r==0) ? 1 : 0;
6121 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6122 raw_cmp_l_mi((uae_u32)specflags,0);
6123 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6124 raw_jmp_r(r2);
6125 }
6126 else if (was_comp && isconst(PC_P)) {
6127 uae_u32 v=live.state[PC_P].val;
6128 uae_u32* tba;
6129 blockinfo* tbi;
6130
6131 tbi=get_blockinfo_addr_new((void*)v,1);
6132 match_states(tbi);
6133
6134 raw_cmp_l_mi((uae_u32)specflags,0);
6135 raw_jcc_l_oponly(4);
6136 tba=(uae_u32*)get_target();
6137 emit_long(get_handler(v)-((uae_u32)tba+4));
6138 raw_mov_l_mi((uae_u32)&regs.pc_p,v);
6139 raw_jmp((uae_u32)popall_do_nothing);
6140 create_jmpdep(bi,0,tba,v);
6141 }
6142 else {
6143 r=REG_PC_TMP;
6144 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
6145 raw_and_l_ri(r,TAGMASK);
6146 int r2 = (r==0) ? 1 : 0;
6147 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6148 raw_cmp_l_mi((uae_u32)specflags,0);
6149 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6150 raw_jmp_r(r2);
6151 }
6152 }
6153 }
6154
6155 #if USE_MATCH
6156 if (callers_need_recompile(&live,&(bi->env))) {
6157 mark_callers_recompile(bi);
6158 }
6159
6160 big_to_small_state(&live,&(bi->env));
6161 #endif
6162
6163 if (next_pc_p+extra_len>=max_pcp &&
6164 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6165 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6166 else
6167 max_pcp+=LONGEST_68K_INST;
6168 bi->len=max_pcp-min_pcp;
6169 bi->min_pcp=min_pcp;
6170
6171 remove_from_list(bi);
6172 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6173 add_to_dormant(bi); /* No need to checksum it on cache flush.
6174 Please don't start changing ROMs in
6175 flight! */
6176 }
6177 else {
6178 calc_checksum(bi,&(bi->c1),&(bi->c2));
6179 add_to_active(bi);
6180 }
6181
6182 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6183
6184 #if JIT_DEBUG
6185 if (JITDebug)
6186 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6187
6188 if (JITDebug && disasm_block) {
6189 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6190 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6191 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6192 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6193 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6194 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6195 getchar();
6196 }
6197 #endif
6198
6199 log_dump();
6200 align_target(32);
6201
6202 /* This is the non-direct handler */
6203 bi->handler=
6204 bi->handler_to_use=(cpuop_func *)get_target();
6205 raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6206 raw_jnz((uae_u32)popall_cache_miss);
6207 comp_pc_p=(uae_u8*)pc_hist[0].location;
6208
6209 bi->status=BI_FINALIZING;
6210 init_comp();
6211 match_states(bi);
6212 flush(1);
6213
6214 raw_jmp((uae_u32)bi->direct_handler);
6215
6216 align_target(32);
6217 current_compile_p=get_target();
6218
6219 raise_in_cl_list(bi);
6220
6221 /* We will flush soon, anyway, so let's do it now */
6222 if (current_compile_p>=max_compile_start)
6223 flush_icache_hard(7);
6224
6225 bi->status=BI_ACTIVE;
6226 if (redo_current_block)
6227 block_need_recompile(bi);
6228
6229 #if PROFILE_COMPILE_TIME
6230 compile_time += (clock() - start_time);
6231 #endif
6232 }
6233 }
6234
6235 void do_nothing(void)
6236 {
6237 /* What did you expect this to do? */
6238 }
6239
6240 void exec_nostats(void)
6241 {
6242 for (;;) {
6243 uae_u32 opcode = GET_OPCODE;
6244 #ifdef X86_ASSEMBLY__disable
6245 __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6246 : : "b" (cpufunctbl[opcode]), "a" (opcode)
6247 : "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc");
6248 #else
6249 (*cpufunctbl[opcode])(opcode);
6250 #endif
6251 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6252 return; /* We will deal with the spcflags in the caller */
6253 }
6254 }
6255 }
6256
6257 void execute_normal(void)
6258 {
6259 if (!check_for_cache_miss()) {
6260 cpu_history pc_hist[MAXRUN];
6261 int blocklen = 0;
6262 #if REAL_ADDRESSING || DIRECT_ADDRESSING
6263 start_pc_p = regs.pc_p;
6264 start_pc = get_virtual_address(regs.pc_p);
6265 #else
6266 start_pc_p = regs.pc_oldp;
6267 start_pc = regs.pc;
6268 #endif
6269 for (;;) { /* Take note: This is the do-it-normal loop */
6270 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6271 uae_u32 opcode = GET_OPCODE;
6272 #if FLIGHT_RECORDER
6273 m68k_record_step(m68k_getpc());
6274 #endif
6275 #ifdef X86_ASSEMBLY__disable
6276 __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6277 : : "b" (cpufunctbl[opcode]), "a" (opcode)
6278 : "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc");
6279 #else
6280 (*cpufunctbl[opcode])(opcode);
6281 #endif
6282 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6283 compile_block(pc_hist, blocklen);
6284 return; /* We will deal with the spcflags in the caller */
6285 }
6286 /* No need to check regs.spcflags, because if they were set,
6287 we'd have ended up inside that "if" */
6288 }
6289 }
6290 }
6291
6292 typedef void (*compiled_handler)(void);
6293
6294 void m68k_do_compile_execute(void)
6295 {
6296 for (;;) {
6297 #ifdef X86_ASSEMBLY
6298 __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6299 : : "b" (cache_tags[cacheline(regs.pc_p)].handler)
6300 : "%edx", "%ecx", "%eax", "%esi", "%edi", "%ebp", "memory", "cc");
6301 #else
6302 ((compiled_handler)(pushall_call_handler))();
6303 #endif
6304 /* Whenever we return from that, we should check spcflags */
6305 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6306 if (m68k_do_specialties ())
6307 return;
6308 }
6309 }
6310 }