ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.6
Committed: 2002-10-01T09:39:55Z (21 years, 9 months ago) by gbeauche
Branch: MAIN
Changes since 1.5: +37 -19 lines
Log Message:
- Optimize use of quit_program variable. This is a real boolean for B2.
- Remove unused/dead code concerning surroundings of (debugging).
- m68k_compile_execute() is generated and optimized code now.

File Contents

# Content
1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
2 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
3 #endif
4
5 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
6 #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
7 #endif
8
9 #define USE_MATCH 0
10
11 /* kludge for Brian, so he can compile under MSVC++ */
12 #define USE_NORMAL_CALLING_CONVENTION 0
13
14 #ifndef WIN32
15 #include <sys/types.h>
16 #include <sys/mman.h>
17 #endif
18
19 #include <stdlib.h>
20 #include <fcntl.h>
21 #include <errno.h>
22
23 #include "sysdeps.h"
24 #include "cpu_emulation.h"
25 #include "main.h"
26 #include "prefs.h"
27 #include "user_strings.h"
28 #include "vm_alloc.h"
29
30 #include "m68k.h"
31 #include "memory.h"
32 #include "readcpu.h"
33 #include "newcpu.h"
34 #include "comptbl.h"
35 #include "compiler/compemu.h"
36 #include "fpu/fpu.h"
37 #include "fpu/flags.h"
38
39 #define DEBUG 1
40 #include "debug.h"
41
42 #ifdef ENABLE_MON
43 #include "mon.h"
44 #endif
45
46 #ifndef WIN32
47 #define PROFILE_COMPILE_TIME 1
48 #endif
49
50 #ifdef WIN32
51 #undef write_log
52 #define write_log dummy_write_log
53 static void dummy_write_log(const char *, ...) { }
54 #endif
55
56 #if JIT_DEBUG
57 #undef abort
58 #define abort() do { \
59 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
60 exit(EXIT_FAILURE); \
61 } while (0)
62 #endif
63
64 #if PROFILE_COMPILE_TIME
65 #include <time.h>
66 static uae_u32 compile_count = 0;
67 static clock_t compile_time = 0;
68 static clock_t emul_start_time = 0;
69 static clock_t emul_end_time = 0;
70 #endif
71
72 compop_func *compfunctbl[65536];
73 compop_func *nfcompfunctbl[65536];
74 cpuop_func *nfcpufunctbl[65536];
75 uae_u8* comp_pc_p;
76
77 // From newcpu.cpp
78 extern bool quit_program;
79
80 // gb-- Extra data for Basilisk II/JIT
81 #if JIT_DEBUG
82 static bool JITDebug = false; // Enable runtime disassemblers through mon?
83 #else
84 const bool JITDebug = false; // Don't use JIT debug mode at all
85 #endif
86
87 const uae_u32 MIN_CACHE_SIZE = 2048; // Minimal translation cache size (2048 KB)
88 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
89 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
90 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
91 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
92 static bool have_cmov = false; // target has CMOV instructions ?
93 static bool have_rat_stall = true; // target has partial register stalls ?
94 static bool tune_alignment = false; // Tune code alignments for running CPU ?
95 static int align_loops = 32; // Align the start of loops
96 static int align_jumps = 32; // Align the start of jumps
97 static int zero_fd = -1;
98 static int optcount[10] = {
99 10, // How often a block has to be executed before it is translated
100 0, // How often to use naive translation
101 0, 0, 0, 0,
102 -1, -1, -1, -1
103 };
104
105 struct op_properties {
106 uae_u8 use_flags;
107 uae_u8 set_flags;
108 uae_u8 is_addx;
109 uae_u8 cflow;
110 };
111 static op_properties prop[65536];
112
113 static inline int end_block(uae_u32 opcode)
114 {
115 return (prop[opcode].cflow & fl_end_block);
116 }
117
118 uae_u8* start_pc_p;
119 uae_u32 start_pc;
120 uae_u32 current_block_pc_p;
121 uae_u32 current_block_start_target;
122 uae_u32 needed_flags;
123 static uae_u32 next_pc_p;
124 static uae_u32 taken_pc_p;
125 static int branch_cc;
126 static int redo_current_block;
127
128 int segvcount=0;
129 int soft_flush_count=0;
130 int hard_flush_count=0;
131 int checksum_count=0;
132 static uae_u8* current_compile_p=NULL;
133 static uae_u8* max_compile_start;
134 static uae_u8* compiled_code=NULL;
135 static uae_s32 reg_alloc_run;
136
137 void* pushall_call_handler=NULL;
138 static void* popall_do_nothing=NULL;
139 static void* popall_exec_nostats=NULL;
140 static void* popall_execute_normal=NULL;
141 static void* popall_cache_miss=NULL;
142 static void* popall_recompile_block=NULL;
143 static void* popall_check_checksum=NULL;
144
145 extern uae_u32 oink;
146 extern unsigned long foink3;
147 extern unsigned long foink;
148
149 /* The 68k only ever executes from even addresses. So right now, we
150 * waste half the entries in this array
151 * UPDATE: We now use those entries to store the start of the linked
152 * lists that we maintain for each hash result.
153 */
154 cacheline cache_tags[TAGSIZE];
155 int letit=0;
156 blockinfo* hold_bi[MAX_HOLD_BI];
157 blockinfo* active;
158 blockinfo* dormant;
159
160 /* 68040 */
161 extern struct cputbl op_smalltbl_0_nf[];
162 extern struct comptbl op_smalltbl_0_comp_nf[];
163 extern struct comptbl op_smalltbl_0_comp_ff[];
164
165 /* 68020 + 68881 */
166 extern struct cputbl op_smalltbl_1_nf[];
167
168 /* 68020 */
169 extern struct cputbl op_smalltbl_2_nf[];
170
171 /* 68010 */
172 extern struct cputbl op_smalltbl_3_nf[];
173
174 /* 68000 */
175 extern struct cputbl op_smalltbl_4_nf[];
176
177 /* 68000 slow but compatible. */
178 extern struct cputbl op_smalltbl_5_nf[];
179
180 static void flush_icache_hard(int n);
181 static void flush_icache_lazy(int n);
182 static void flush_icache_none(int n);
183 void (*flush_icache)(int n) = flush_icache_none;
184
185
186
187 bigstate live;
188 smallstate empty_ss;
189 smallstate default_ss;
190 static int optlev;
191
192 static int writereg(int r, int size);
193 static void unlock2(int r);
194 static void setlock(int r);
195 static int readreg_specific(int r, int size, int spec);
196 static int writereg_specific(int r, int size, int spec);
197 static void prepare_for_call_1(void);
198 static void prepare_for_call_2(void);
199 static void align_target(uae_u32 a);
200
201 static uae_s32 nextused[VREGS];
202
203 uae_u32 m68k_pc_offset;
204
205 /* Some arithmetic ooperations can be optimized away if the operands
206 * are known to be constant. But that's only a good idea when the
207 * side effects they would have on the flags are not important. This
208 * variable indicates whether we need the side effects or not
209 */
210 uae_u32 needflags=0;
211
212 /* Flag handling is complicated.
213 *
214 * x86 instructions create flags, which quite often are exactly what we
215 * want. So at times, the "68k" flags are actually in the x86 flags.
216 *
217 * Then again, sometimes we do x86 instructions that clobber the x86
218 * flags, but don't represent a corresponding m68k instruction. In that
219 * case, we have to save them.
220 *
221 * We used to save them to the stack, but now store them back directly
222 * into the regflags.cznv of the traditional emulation. Thus some odd
223 * names.
224 *
225 * So flags can be in either of two places (used to be three; boy were
226 * things complicated back then!); And either place can contain either
227 * valid flags or invalid trash (and on the stack, there was also the
228 * option of "nothing at all", now gone). A couple of variables keep
229 * track of the respective states.
230 *
231 * To make things worse, we might or might not be interested in the flags.
232 * by default, we are, but a call to dont_care_flags can change that
233 * until the next call to live_flags. If we are not, pretty much whatever
234 * is in the register and/or the native flags is seen as valid.
235 */
236
237 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
238 {
239 return cache_tags[cl+1].bi;
240 }
241
242 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
243 {
244 blockinfo* bi=get_blockinfo(cacheline(addr));
245
246 while (bi) {
247 if (bi->pc_p==addr)
248 return bi;
249 bi=bi->next_same_cl;
250 }
251 return NULL;
252 }
253
254
255 /*******************************************************************
256 * All sorts of list related functions for all of the lists *
257 *******************************************************************/
258
259 static __inline__ void remove_from_cl_list(blockinfo* bi)
260 {
261 uae_u32 cl=cacheline(bi->pc_p);
262
263 if (bi->prev_same_cl_p)
264 *(bi->prev_same_cl_p)=bi->next_same_cl;
265 if (bi->next_same_cl)
266 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
267 if (cache_tags[cl+1].bi)
268 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
269 else
270 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
271 }
272
273 static __inline__ void remove_from_list(blockinfo* bi)
274 {
275 if (bi->prev_p)
276 *(bi->prev_p)=bi->next;
277 if (bi->next)
278 bi->next->prev_p=bi->prev_p;
279 }
280
281 static __inline__ void remove_from_lists(blockinfo* bi)
282 {
283 remove_from_list(bi);
284 remove_from_cl_list(bi);
285 }
286
287 static __inline__ void add_to_cl_list(blockinfo* bi)
288 {
289 uae_u32 cl=cacheline(bi->pc_p);
290
291 if (cache_tags[cl+1].bi)
292 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
293 bi->next_same_cl=cache_tags[cl+1].bi;
294
295 cache_tags[cl+1].bi=bi;
296 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
297
298 cache_tags[cl].handler=bi->handler_to_use;
299 }
300
301 static __inline__ void raise_in_cl_list(blockinfo* bi)
302 {
303 remove_from_cl_list(bi);
304 add_to_cl_list(bi);
305 }
306
307 static __inline__ void add_to_active(blockinfo* bi)
308 {
309 if (active)
310 active->prev_p=&(bi->next);
311 bi->next=active;
312
313 active=bi;
314 bi->prev_p=&active;
315 }
316
317 static __inline__ void add_to_dormant(blockinfo* bi)
318 {
319 if (dormant)
320 dormant->prev_p=&(bi->next);
321 bi->next=dormant;
322
323 dormant=bi;
324 bi->prev_p=&dormant;
325 }
326
327 static __inline__ void remove_dep(dependency* d)
328 {
329 if (d->prev_p)
330 *(d->prev_p)=d->next;
331 if (d->next)
332 d->next->prev_p=d->prev_p;
333 d->prev_p=NULL;
334 d->next=NULL;
335 }
336
337 /* This block's code is about to be thrown away, so it no longer
338 depends on anything else */
339 static __inline__ void remove_deps(blockinfo* bi)
340 {
341 remove_dep(&(bi->dep[0]));
342 remove_dep(&(bi->dep[1]));
343 }
344
345 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
346 {
347 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
348 }
349
350 /********************************************************************
351 * Soft flush handling support functions *
352 ********************************************************************/
353
354 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
355 {
356 //write_log("bi is %p\n",bi);
357 if (dh!=bi->direct_handler_to_use) {
358 dependency* x=bi->deplist;
359 //write_log("bi->deplist=%p\n",bi->deplist);
360 while (x) {
361 //write_log("x is %p\n",x);
362 //write_log("x->next is %p\n",x->next);
363 //write_log("x->prev_p is %p\n",x->prev_p);
364
365 if (x->jmp_off) {
366 adjust_jmpdep(x,dh);
367 }
368 x=x->next;
369 }
370 bi->direct_handler_to_use=dh;
371 }
372 }
373
374 static __inline__ void invalidate_block(blockinfo* bi)
375 {
376 int i;
377
378 bi->optlevel=0;
379 bi->count=optcount[0]-1;
380 bi->handler=NULL;
381 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
382 bi->direct_handler=NULL;
383 set_dhtu(bi,bi->direct_pen);
384 bi->needed_flags=0xff;
385 bi->status=BI_INVALID;
386 for (i=0;i<2;i++) {
387 bi->dep[i].jmp_off=NULL;
388 bi->dep[i].target=NULL;
389 }
390 remove_deps(bi);
391 }
392
393 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
394 {
395 blockinfo* tbi=get_blockinfo_addr((void*)target);
396
397 Dif(!tbi) {
398 write_log("Could not create jmpdep!\n");
399 abort();
400 }
401 bi->dep[i].jmp_off=jmpaddr;
402 bi->dep[i].source=bi;
403 bi->dep[i].target=tbi;
404 bi->dep[i].next=tbi->deplist;
405 if (bi->dep[i].next)
406 bi->dep[i].next->prev_p=&(bi->dep[i].next);
407 bi->dep[i].prev_p=&(tbi->deplist);
408 tbi->deplist=&(bi->dep[i]);
409 }
410
411 static __inline__ void block_need_recompile(blockinfo * bi)
412 {
413 uae_u32 cl = cacheline(bi->pc_p);
414
415 set_dhtu(bi, bi->direct_pen);
416 bi->direct_handler = bi->direct_pen;
417
418 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
419 bi->handler = (cpuop_func *)popall_execute_normal;
420 if (bi == cache_tags[cl + 1].bi)
421 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
422 bi->status = BI_NEED_RECOMP;
423 }
424
425 static __inline__ void mark_callers_recompile(blockinfo * bi)
426 {
427 dependency *x = bi->deplist;
428
429 while (x) {
430 dependency *next = x->next; /* This disappears when we mark for
431 * recompilation and thus remove the
432 * blocks from the lists */
433 if (x->jmp_off) {
434 blockinfo *cbi = x->source;
435
436 Dif(cbi->status == BI_INVALID) {
437 // write_log("invalid block in dependency list\n"); // FIXME?
438 // abort();
439 }
440 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
441 block_need_recompile(cbi);
442 mark_callers_recompile(cbi);
443 }
444 else if (cbi->status == BI_COMPILING) {
445 redo_current_block = 1;
446 }
447 else if (cbi->status == BI_NEED_RECOMP) {
448 /* nothing */
449 }
450 else {
451 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
452 }
453 }
454 x = next;
455 }
456 }
457
458 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
459 {
460 blockinfo* bi=get_blockinfo_addr(addr);
461 int i;
462
463 if (!bi) {
464 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
465 if (hold_bi[i]) {
466 uae_u32 cl=cacheline(addr);
467
468 bi=hold_bi[i];
469 hold_bi[i]=NULL;
470 bi->pc_p=(uae_u8 *)addr;
471 invalidate_block(bi);
472 add_to_active(bi);
473 add_to_cl_list(bi);
474
475 }
476 }
477 }
478 if (!bi) {
479 write_log("Looking for blockinfo, can't find free one\n");
480 abort();
481 }
482 return bi;
483 }
484
485 static void prepare_block(blockinfo* bi);
486
487 /* Managment of blockinfos.
488
489 A blockinfo struct is allocated whenever a new block has to be
490 compiled. If the list of free blockinfos is empty, we allocate a new
491 pool of blockinfos and link the newly created blockinfos altogether
492 into the list of free blockinfos. Otherwise, we simply pop a structure
493 of the free list.
494
495 Blockinfo are lazily deallocated, i.e. chained altogether in the
496 list of free blockinfos whenvever a translation cache flush (hard or
497 soft) request occurs.
498 */
499
500 #if USE_SEPARATE_BIA
501 const int BLOCKINFO_POOL_SIZE = 128;
502 struct blockinfo_pool {
503 blockinfo bi[BLOCKINFO_POOL_SIZE];
504 blockinfo_pool *next;
505 };
506 static blockinfo_pool * blockinfo_pools = 0;
507 static blockinfo * free_blockinfos = 0;
508 #endif
509
510 static __inline__ blockinfo *alloc_blockinfo(void)
511 {
512 #if USE_SEPARATE_BIA
513 if (!free_blockinfos) {
514 // There is no blockinfo struct left, allocate a new
515 // pool and link the chunks into the free list
516 blockinfo_pool *bi_pool = (blockinfo_pool *)malloc(sizeof(blockinfo_pool));
517 for (blockinfo *bi = &bi_pool->bi[0]; bi < &bi_pool->bi[BLOCKINFO_POOL_SIZE]; bi++) {
518 bi->next = free_blockinfos;
519 free_blockinfos = bi;
520 }
521 bi_pool->next = blockinfo_pools;
522 blockinfo_pools = bi_pool;
523 }
524 blockinfo *bi = free_blockinfos;
525 free_blockinfos = bi->next;
526 #else
527 blockinfo *bi = (blockinfo*)current_compile_p;
528 current_compile_p += sizeof(blockinfo);
529 #endif
530 return bi;
531 }
532
533 static __inline__ void free_blockinfo(blockinfo *bi)
534 {
535 #if USE_SEPARATE_BIA
536 bi->next = free_blockinfos;
537 free_blockinfos = bi;
538 #endif
539 }
540
541 static void free_blockinfo_pools(void)
542 {
543 #if USE_SEPARATE_BIA
544 int blockinfo_pool_count = 0;
545 blockinfo_pool *curr_pool = blockinfo_pools;
546 while (curr_pool) {
547 blockinfo_pool_count++;
548 blockinfo_pool *dead_pool = curr_pool;
549 curr_pool = curr_pool->next;
550 free(dead_pool);
551 }
552
553 uae_u32 blockinfo_pools_size = blockinfo_pool_count * BLOCKINFO_POOL_SIZE * sizeof(blockinfo);
554 write_log("### Blockinfo allocation statistics\n");
555 write_log("Number of blockinfo pools : %d\n", blockinfo_pool_count);
556 write_log("Total number of blockinfos : %d (%d KB)\n",
557 blockinfo_pool_count * BLOCKINFO_POOL_SIZE,
558 blockinfo_pools_size / 1024);
559 write_log("\n");
560 #endif
561 }
562
563 static __inline__ void alloc_blockinfos(void)
564 {
565 int i;
566 blockinfo* bi;
567
568 for (i=0;i<MAX_HOLD_BI;i++) {
569 if (hold_bi[i])
570 return;
571 bi=hold_bi[i]=alloc_blockinfo();
572 prepare_block(bi);
573 }
574 }
575
576 /********************************************************************
577 * Functions to emit data into memory, and other general support *
578 ********************************************************************/
579
580 static uae_u8* target;
581
582 static void emit_init(void)
583 {
584 }
585
586 static __inline__ void emit_byte(uae_u8 x)
587 {
588 *target++=x;
589 }
590
591 static __inline__ void emit_word(uae_u16 x)
592 {
593 *((uae_u16*)target)=x;
594 target+=2;
595 }
596
597 static __inline__ void emit_long(uae_u32 x)
598 {
599 *((uae_u32*)target)=x;
600 target+=4;
601 }
602
603 static __inline__ uae_u32 reverse32(uae_u32 v)
604 {
605 #if 1
606 // gb-- We have specialized byteswapping functions, just use them
607 return do_byteswap_32(v);
608 #else
609 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
610 #endif
611 }
612
613 /********************************************************************
614 * Getting the information about the target CPU *
615 ********************************************************************/
616
617 #include "codegen_x86.cpp"
618
619 void set_target(uae_u8* t)
620 {
621 target=t;
622 }
623
624 static __inline__ uae_u8* get_target_noopt(void)
625 {
626 return target;
627 }
628
629 __inline__ uae_u8* get_target(void)
630 {
631 return get_target_noopt();
632 }
633
634
635 /********************************************************************
636 * Flags status handling. EMIT TIME! *
637 ********************************************************************/
638
639 static void bt_l_ri_noclobber(R4 r, IMM i);
640
641 static void make_flags_live_internal(void)
642 {
643 if (live.flags_in_flags==VALID)
644 return;
645 Dif (live.flags_on_stack==TRASH) {
646 write_log("Want flags, got something on stack, but it is TRASH\n");
647 abort();
648 }
649 if (live.flags_on_stack==VALID) {
650 int tmp;
651 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
652 raw_reg_to_flags(tmp);
653 unlock2(tmp);
654
655 live.flags_in_flags=VALID;
656 return;
657 }
658 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
659 live.flags_in_flags,live.flags_on_stack);
660 abort();
661 }
662
663 static void flags_to_stack(void)
664 {
665 if (live.flags_on_stack==VALID)
666 return;
667 if (!live.flags_are_important) {
668 live.flags_on_stack=VALID;
669 return;
670 }
671 Dif (live.flags_in_flags!=VALID)
672 abort();
673 else {
674 int tmp;
675 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
676 raw_flags_to_reg(tmp);
677 unlock2(tmp);
678 }
679 live.flags_on_stack=VALID;
680 }
681
682 static __inline__ void clobber_flags(void)
683 {
684 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
685 flags_to_stack();
686 live.flags_in_flags=TRASH;
687 }
688
689 /* Prepare for leaving the compiled stuff */
690 static __inline__ void flush_flags(void)
691 {
692 flags_to_stack();
693 return;
694 }
695
696 int touchcnt;
697
698 /********************************************************************
699 * register allocation per block logging *
700 ********************************************************************/
701
702 static uae_s8 vstate[VREGS];
703 static uae_s8 vwritten[VREGS];
704 static uae_s8 nstate[N_REGS];
705
706 #define L_UNKNOWN -127
707 #define L_UNAVAIL -1
708 #define L_NEEDED -2
709 #define L_UNNEEDED -3
710
711 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
712 {
713 int i;
714
715 for (i = 0; i < VREGS; i++)
716 s->virt[i] = vstate[i];
717 for (i = 0; i < N_REGS; i++)
718 s->nat[i] = nstate[i];
719 }
720
721 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
722 {
723 int i;
724 int reverse = 0;
725
726 for (i = 0; i < VREGS; i++) {
727 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
728 return 1;
729 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
730 reverse++;
731 }
732 for (i = 0; i < N_REGS; i++) {
733 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
734 return 1;
735 if (nstate[i] < 0 && s->nat[i] >= 0)
736 reverse++;
737 }
738 if (reverse >= 2 && USE_MATCH)
739 return 1; /* In this case, it might be worth recompiling the
740 * callers */
741 return 0;
742 }
743
744 static __inline__ void log_startblock(void)
745 {
746 int i;
747
748 for (i = 0; i < VREGS; i++) {
749 vstate[i] = L_UNKNOWN;
750 vwritten[i] = 0;
751 }
752 for (i = 0; i < N_REGS; i++)
753 nstate[i] = L_UNKNOWN;
754 }
755
756 /* Using an n-reg for a temp variable */
757 static __inline__ void log_isused(int n)
758 {
759 if (nstate[n] == L_UNKNOWN)
760 nstate[n] = L_UNAVAIL;
761 }
762
763 static __inline__ void log_visused(int r)
764 {
765 if (vstate[r] == L_UNKNOWN)
766 vstate[r] = L_NEEDED;
767 }
768
769 static __inline__ void do_load_reg(int n, int r)
770 {
771 if (r == FLAGTMP)
772 raw_load_flagreg(n, r);
773 else if (r == FLAGX)
774 raw_load_flagx(n, r);
775 else
776 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
777 }
778
779 static __inline__ void check_load_reg(int n, int r)
780 {
781 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
782 }
783
784 static __inline__ void log_vwrite(int r)
785 {
786 vwritten[r] = 1;
787 }
788
789 /* Using an n-reg to hold a v-reg */
790 static __inline__ void log_isreg(int n, int r)
791 {
792 static int count = 0;
793
794 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
795 nstate[n] = r;
796 else {
797 do_load_reg(n, r);
798 if (nstate[n] == L_UNKNOWN)
799 nstate[n] = L_UNAVAIL;
800 }
801 if (vstate[r] == L_UNKNOWN)
802 vstate[r] = L_NEEDED;
803 }
804
805 static __inline__ void log_clobberreg(int r)
806 {
807 if (vstate[r] == L_UNKNOWN)
808 vstate[r] = L_UNNEEDED;
809 }
810
811 /* This ends all possibility of clever register allocation */
812
813 static __inline__ void log_flush(void)
814 {
815 int i;
816
817 for (i = 0; i < VREGS; i++)
818 if (vstate[i] == L_UNKNOWN)
819 vstate[i] = L_NEEDED;
820 for (i = 0; i < N_REGS; i++)
821 if (nstate[i] == L_UNKNOWN)
822 nstate[i] = L_UNAVAIL;
823 }
824
825 static __inline__ void log_dump(void)
826 {
827 int i;
828
829 return;
830
831 write_log("----------------------\n");
832 for (i = 0; i < N_REGS; i++) {
833 switch (nstate[i]) {
834 case L_UNKNOWN:
835 write_log("Nat %d : UNKNOWN\n", i);
836 break;
837 case L_UNAVAIL:
838 write_log("Nat %d : UNAVAIL\n", i);
839 break;
840 default:
841 write_log("Nat %d : %d\n", i, nstate[i]);
842 break;
843 }
844 }
845 for (i = 0; i < VREGS; i++) {
846 if (vstate[i] == L_UNNEEDED)
847 write_log("Virt %d: UNNEEDED\n", i);
848 }
849 }
850
851 /********************************************************************
852 * register status handling. EMIT TIME! *
853 ********************************************************************/
854
855 static __inline__ void set_status(int r, int status)
856 {
857 if (status == ISCONST)
858 log_clobberreg(r);
859 live.state[r].status=status;
860 }
861
862 static __inline__ int isinreg(int r)
863 {
864 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
865 }
866
867 static __inline__ void adjust_nreg(int r, uae_u32 val)
868 {
869 if (!val)
870 return;
871 raw_lea_l_brr(r,r,val);
872 }
873
874 static void tomem(int r)
875 {
876 int rr=live.state[r].realreg;
877
878 if (isinreg(r)) {
879 if (live.state[r].val && live.nat[rr].nholds==1
880 && !live.nat[rr].locked) {
881 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
882 // live.state[r].val,r,rr,target);
883 adjust_nreg(rr,live.state[r].val);
884 live.state[r].val=0;
885 live.state[r].dirtysize=4;
886 set_status(r,DIRTY);
887 }
888 }
889
890 if (live.state[r].status==DIRTY) {
891 switch (live.state[r].dirtysize) {
892 case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
893 case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
894 case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
895 default: abort();
896 }
897 log_vwrite(r);
898 set_status(r,CLEAN);
899 live.state[r].dirtysize=0;
900 }
901 }
902
903 static __inline__ int isconst(int r)
904 {
905 return live.state[r].status==ISCONST;
906 }
907
908 int is_const(int r)
909 {
910 return isconst(r);
911 }
912
913 static __inline__ void writeback_const(int r)
914 {
915 if (!isconst(r))
916 return;
917 Dif (live.state[r].needflush==NF_HANDLER) {
918 write_log("Trying to write back constant NF_HANDLER!\n");
919 abort();
920 }
921
922 raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
923 log_vwrite(r);
924 live.state[r].val=0;
925 set_status(r,INMEM);
926 }
927
928 static __inline__ void tomem_c(int r)
929 {
930 if (isconst(r)) {
931 writeback_const(r);
932 }
933 else
934 tomem(r);
935 }
936
937 static void evict(int r)
938 {
939 int rr;
940
941 if (!isinreg(r))
942 return;
943 tomem(r);
944 rr=live.state[r].realreg;
945
946 Dif (live.nat[rr].locked &&
947 live.nat[rr].nholds==1) {
948 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
949 abort();
950 }
951
952 live.nat[rr].nholds--;
953 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
954 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
955 int thisind=live.state[r].realind;
956
957 live.nat[rr].holds[thisind]=topreg;
958 live.state[topreg].realind=thisind;
959 }
960 live.state[r].realreg=-1;
961 set_status(r,INMEM);
962 }
963
964 static __inline__ void free_nreg(int r)
965 {
966 int i=live.nat[r].nholds;
967
968 while (i) {
969 int vr;
970
971 --i;
972 vr=live.nat[r].holds[i];
973 evict(vr);
974 }
975 Dif (live.nat[r].nholds!=0) {
976 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
977 abort();
978 }
979 }
980
981 /* Use with care! */
982 static __inline__ void isclean(int r)
983 {
984 if (!isinreg(r))
985 return;
986 live.state[r].validsize=4;
987 live.state[r].dirtysize=0;
988 live.state[r].val=0;
989 set_status(r,CLEAN);
990 }
991
992 static __inline__ void disassociate(int r)
993 {
994 isclean(r);
995 evict(r);
996 }
997
998 static __inline__ void set_const(int r, uae_u32 val)
999 {
1000 disassociate(r);
1001 live.state[r].val=val;
1002 set_status(r,ISCONST);
1003 }
1004
1005 static __inline__ uae_u32 get_offset(int r)
1006 {
1007 return live.state[r].val;
1008 }
1009
1010 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1011 {
1012 int bestreg;
1013 uae_s32 when;
1014 int i;
1015 uae_s32 badness=0; /* to shut up gcc */
1016 bestreg=-1;
1017 when=2000000000;
1018
1019 for (i=N_REGS;i--;) {
1020 badness=live.nat[i].touched;
1021 if (live.nat[i].nholds==0)
1022 badness=0;
1023 if (i==hint)
1024 badness-=200000000;
1025 if (!live.nat[i].locked && badness<when) {
1026 if ((size==1 && live.nat[i].canbyte) ||
1027 (size==2 && live.nat[i].canword) ||
1028 (size==4)) {
1029 bestreg=i;
1030 when=badness;
1031 if (live.nat[i].nholds==0 && hint<0)
1032 break;
1033 if (i==hint)
1034 break;
1035 }
1036 }
1037 }
1038 Dif (bestreg==-1)
1039 abort();
1040
1041 if (live.nat[bestreg].nholds>0) {
1042 free_nreg(bestreg);
1043 }
1044 if (isinreg(r)) {
1045 int rr=live.state[r].realreg;
1046 /* This will happen if we read a partially dirty register at a
1047 bigger size */
1048 Dif (willclobber || live.state[r].validsize>=size)
1049 abort();
1050 Dif (live.nat[rr].nholds!=1)
1051 abort();
1052 if (size==4 && live.state[r].validsize==2) {
1053 log_isused(bestreg);
1054 log_visused(r);
1055 raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
1056 raw_bswap_32(bestreg);
1057 raw_zero_extend_16_rr(rr,rr);
1058 raw_zero_extend_16_rr(bestreg,bestreg);
1059 raw_bswap_32(bestreg);
1060 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1061 live.state[r].validsize=4;
1062 live.nat[rr].touched=touchcnt++;
1063 return rr;
1064 }
1065 if (live.state[r].validsize==1) {
1066 /* Nothing yet */
1067 }
1068 evict(r);
1069 }
1070
1071 if (!willclobber) {
1072 if (live.state[r].status!=UNDEF) {
1073 if (isconst(r)) {
1074 raw_mov_l_ri(bestreg,live.state[r].val);
1075 live.state[r].val=0;
1076 live.state[r].dirtysize=4;
1077 set_status(r,DIRTY);
1078 log_isused(bestreg);
1079 }
1080 else {
1081 log_isreg(bestreg, r); /* This will also load it! */
1082 live.state[r].dirtysize=0;
1083 set_status(r,CLEAN);
1084 }
1085 }
1086 else {
1087 live.state[r].val=0;
1088 live.state[r].dirtysize=0;
1089 set_status(r,CLEAN);
1090 log_isused(bestreg);
1091 }
1092 live.state[r].validsize=4;
1093 }
1094 else { /* this is the easiest way, but not optimal. FIXME! */
1095 /* Now it's trickier, but hopefully still OK */
1096 if (!isconst(r) || size==4) {
1097 live.state[r].validsize=size;
1098 live.state[r].dirtysize=size;
1099 live.state[r].val=0;
1100 set_status(r,DIRTY);
1101 if (size == 4) {
1102 log_clobberreg(r);
1103 log_isused(bestreg);
1104 }
1105 else {
1106 log_visused(r);
1107 log_isused(bestreg);
1108 }
1109 }
1110 else {
1111 if (live.state[r].status!=UNDEF)
1112 raw_mov_l_ri(bestreg,live.state[r].val);
1113 live.state[r].val=0;
1114 live.state[r].validsize=4;
1115 live.state[r].dirtysize=4;
1116 set_status(r,DIRTY);
1117 log_isused(bestreg);
1118 }
1119 }
1120 live.state[r].realreg=bestreg;
1121 live.state[r].realind=live.nat[bestreg].nholds;
1122 live.nat[bestreg].touched=touchcnt++;
1123 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1124 live.nat[bestreg].nholds++;
1125
1126 return bestreg;
1127 }
1128
1129 static int alloc_reg(int r, int size, int willclobber)
1130 {
1131 return alloc_reg_hinted(r,size,willclobber,-1);
1132 }
1133
1134 static void unlock2(int r)
1135 {
1136 Dif (!live.nat[r].locked)
1137 abort();
1138 live.nat[r].locked--;
1139 }
1140
1141 static void setlock(int r)
1142 {
1143 live.nat[r].locked++;
1144 }
1145
1146
1147 static void mov_nregs(int d, int s)
1148 {
1149 int ns=live.nat[s].nholds;
1150 int nd=live.nat[d].nholds;
1151 int i;
1152
1153 if (s==d)
1154 return;
1155
1156 if (nd>0)
1157 free_nreg(d);
1158
1159 log_isused(d);
1160 raw_mov_l_rr(d,s);
1161
1162 for (i=0;i<live.nat[s].nholds;i++) {
1163 int vs=live.nat[s].holds[i];
1164
1165 live.state[vs].realreg=d;
1166 live.state[vs].realind=i;
1167 live.nat[d].holds[i]=vs;
1168 }
1169 live.nat[d].nholds=live.nat[s].nholds;
1170
1171 live.nat[s].nholds=0;
1172 }
1173
1174
1175 static __inline__ void make_exclusive(int r, int size, int spec)
1176 {
1177 int clobber;
1178 reg_status oldstate;
1179 int rr=live.state[r].realreg;
1180 int nr;
1181 int nind;
1182 int ndirt=0;
1183 int i;
1184
1185 if (!isinreg(r))
1186 return;
1187 if (live.nat[rr].nholds==1)
1188 return;
1189 for (i=0;i<live.nat[rr].nholds;i++) {
1190 int vr=live.nat[rr].holds[i];
1191 if (vr!=r &&
1192 (live.state[vr].status==DIRTY || live.state[vr].val))
1193 ndirt++;
1194 }
1195 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1196 /* Everything else is clean, so let's keep this register */
1197 for (i=0;i<live.nat[rr].nholds;i++) {
1198 int vr=live.nat[rr].holds[i];
1199 if (vr!=r) {
1200 evict(vr);
1201 i--; /* Try that index again! */
1202 }
1203 }
1204 Dif (live.nat[rr].nholds!=1) {
1205 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1206 rr,live.nat[rr].nholds,r);
1207 abort();
1208 }
1209 return;
1210 }
1211
1212 /* We have to split the register */
1213 oldstate=live.state[r];
1214
1215 setlock(rr); /* Make sure this doesn't go away */
1216 /* Forget about r being in the register rr */
1217 disassociate(r);
1218 /* Get a new register, that we will clobber completely */
1219 if (oldstate.status==DIRTY) {
1220 /* If dirtysize is <4, we need a register that can handle the
1221 eventual smaller memory store! Thanks to Quake68k for exposing
1222 this detail ;-) */
1223 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1224 }
1225 else {
1226 nr=alloc_reg_hinted(r,4,1,spec);
1227 }
1228 nind=live.state[r].realind;
1229 live.state[r]=oldstate; /* Keep all the old state info */
1230 live.state[r].realreg=nr;
1231 live.state[r].realind=nind;
1232
1233 if (size<live.state[r].validsize) {
1234 if (live.state[r].val) {
1235 /* Might as well compensate for the offset now */
1236 raw_lea_l_brr(nr,rr,oldstate.val);
1237 live.state[r].val=0;
1238 live.state[r].dirtysize=4;
1239 set_status(r,DIRTY);
1240 }
1241 else
1242 raw_mov_l_rr(nr,rr); /* Make another copy */
1243 }
1244 unlock2(rr);
1245 }
1246
1247 static __inline__ void add_offset(int r, uae_u32 off)
1248 {
1249 live.state[r].val+=off;
1250 }
1251
1252 static __inline__ void remove_offset(int r, int spec)
1253 {
1254 reg_status oldstate;
1255 int rr;
1256
1257 if (isconst(r))
1258 return;
1259 if (live.state[r].val==0)
1260 return;
1261 if (isinreg(r) && live.state[r].validsize<4)
1262 evict(r);
1263
1264 if (!isinreg(r))
1265 alloc_reg_hinted(r,4,0,spec);
1266
1267 Dif (live.state[r].validsize!=4) {
1268 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1269 abort();
1270 }
1271 make_exclusive(r,0,-1);
1272 /* make_exclusive might have done the job already */
1273 if (live.state[r].val==0)
1274 return;
1275
1276 rr=live.state[r].realreg;
1277
1278 if (live.nat[rr].nholds==1) {
1279 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1280 // live.state[r].val,r,rr,target);
1281 adjust_nreg(rr,live.state[r].val);
1282 live.state[r].dirtysize=4;
1283 live.state[r].val=0;
1284 set_status(r,DIRTY);
1285 return;
1286 }
1287 write_log("Failed in remove_offset\n");
1288 abort();
1289 }
1290
1291 static __inline__ void remove_all_offsets(void)
1292 {
1293 int i;
1294
1295 for (i=0;i<VREGS;i++)
1296 remove_offset(i,-1);
1297 }
1298
1299 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1300 {
1301 int n;
1302 int answer=-1;
1303
1304 if (live.state[r].status==UNDEF) {
1305 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1306 }
1307 if (!can_offset)
1308 remove_offset(r,spec);
1309
1310 if (isinreg(r) && live.state[r].validsize>=size) {
1311 n=live.state[r].realreg;
1312 switch(size) {
1313 case 1:
1314 if (live.nat[n].canbyte || spec>=0) {
1315 answer=n;
1316 }
1317 break;
1318 case 2:
1319 if (live.nat[n].canword || spec>=0) {
1320 answer=n;
1321 }
1322 break;
1323 case 4:
1324 answer=n;
1325 break;
1326 default: abort();
1327 }
1328 if (answer<0)
1329 evict(r);
1330 }
1331 /* either the value was in memory to start with, or it was evicted and
1332 is in memory now */
1333 if (answer<0) {
1334 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1335 }
1336
1337 if (spec>=0 && spec!=answer) {
1338 /* Too bad */
1339 mov_nregs(spec,answer);
1340 answer=spec;
1341 }
1342 live.nat[answer].locked++;
1343 live.nat[answer].touched=touchcnt++;
1344 return answer;
1345 }
1346
1347
1348
1349 static int readreg(int r, int size)
1350 {
1351 return readreg_general(r,size,-1,0);
1352 }
1353
1354 static int readreg_specific(int r, int size, int spec)
1355 {
1356 return readreg_general(r,size,spec,0);
1357 }
1358
1359 static int readreg_offset(int r, int size)
1360 {
1361 return readreg_general(r,size,-1,1);
1362 }
1363
1364 /* writereg_general(r, size, spec)
1365 *
1366 * INPUT
1367 * - r : mid-layer register
1368 * - size : requested size (1/2/4)
1369 * - spec : -1 if find or make a register free, otherwise specifies
1370 * the physical register to use in any case
1371 *
1372 * OUTPUT
1373 * - hard (physical, x86 here) register allocated to virtual register r
1374 */
1375 static __inline__ int writereg_general(int r, int size, int spec)
1376 {
1377 int n;
1378 int answer=-1;
1379
1380 if (size<4) {
1381 remove_offset(r,spec);
1382 }
1383
1384 make_exclusive(r,size,spec);
1385 if (isinreg(r)) {
1386 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1387 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1388 n=live.state[r].realreg;
1389
1390 Dif (live.nat[n].nholds!=1)
1391 abort();
1392 switch(size) {
1393 case 1:
1394 if (live.nat[n].canbyte || spec>=0) {
1395 live.state[r].dirtysize=ndsize;
1396 live.state[r].validsize=nvsize;
1397 answer=n;
1398 }
1399 break;
1400 case 2:
1401 if (live.nat[n].canword || spec>=0) {
1402 live.state[r].dirtysize=ndsize;
1403 live.state[r].validsize=nvsize;
1404 answer=n;
1405 }
1406 break;
1407 case 4:
1408 live.state[r].dirtysize=ndsize;
1409 live.state[r].validsize=nvsize;
1410 answer=n;
1411 break;
1412 default: abort();
1413 }
1414 if (answer<0)
1415 evict(r);
1416 }
1417 /* either the value was in memory to start with, or it was evicted and
1418 is in memory now */
1419 if (answer<0) {
1420 answer=alloc_reg_hinted(r,size,1,spec);
1421 }
1422 if (spec>=0 && spec!=answer) {
1423 mov_nregs(spec,answer);
1424 answer=spec;
1425 }
1426 if (live.state[r].status==UNDEF)
1427 live.state[r].validsize=4;
1428 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1429 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1430
1431 live.nat[answer].locked++;
1432 live.nat[answer].touched=touchcnt++;
1433 if (size==4) {
1434 live.state[r].val=0;
1435 }
1436 else {
1437 Dif (live.state[r].val) {
1438 write_log("Problem with val\n");
1439 abort();
1440 }
1441 }
1442 set_status(r,DIRTY);
1443 return answer;
1444 }
1445
1446 static int writereg(int r, int size)
1447 {
1448 return writereg_general(r,size,-1);
1449 }
1450
1451 static int writereg_specific(int r, int size, int spec)
1452 {
1453 return writereg_general(r,size,spec);
1454 }
1455
1456 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1457 {
1458 int n;
1459 int answer=-1;
1460
1461 if (live.state[r].status==UNDEF) {
1462 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1463 }
1464 remove_offset(r,spec);
1465 make_exclusive(r,0,spec);
1466
1467 Dif (wsize<rsize) {
1468 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1469 abort();
1470 }
1471 if (isinreg(r) && live.state[r].validsize>=rsize) {
1472 n=live.state[r].realreg;
1473 Dif (live.nat[n].nholds!=1)
1474 abort();
1475
1476 switch(rsize) {
1477 case 1:
1478 if (live.nat[n].canbyte || spec>=0) {
1479 answer=n;
1480 }
1481 break;
1482 case 2:
1483 if (live.nat[n].canword || spec>=0) {
1484 answer=n;
1485 }
1486 break;
1487 case 4:
1488 answer=n;
1489 break;
1490 default: abort();
1491 }
1492 if (answer<0)
1493 evict(r);
1494 }
1495 /* either the value was in memory to start with, or it was evicted and
1496 is in memory now */
1497 if (answer<0) {
1498 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1499 }
1500
1501 if (spec>=0 && spec!=answer) {
1502 /* Too bad */
1503 mov_nregs(spec,answer);
1504 answer=spec;
1505 }
1506 if (wsize>live.state[r].dirtysize)
1507 live.state[r].dirtysize=wsize;
1508 if (wsize>live.state[r].validsize)
1509 live.state[r].validsize=wsize;
1510 set_status(r,DIRTY);
1511
1512 live.nat[answer].locked++;
1513 live.nat[answer].touched=touchcnt++;
1514
1515 Dif (live.state[r].val) {
1516 write_log("Problem with val(rmw)\n");
1517 abort();
1518 }
1519 return answer;
1520 }
1521
1522 static int rmw(int r, int wsize, int rsize)
1523 {
1524 return rmw_general(r,wsize,rsize,-1);
1525 }
1526
1527 static int rmw_specific(int r, int wsize, int rsize, int spec)
1528 {
1529 return rmw_general(r,wsize,rsize,spec);
1530 }
1531
1532
1533 /* needed for restoring the carry flag on non-P6 cores */
1534 static void bt_l_ri_noclobber(R4 r, IMM i)
1535 {
1536 int size=4;
1537 if (i<16)
1538 size=2;
1539 r=readreg(r,size);
1540 raw_bt_l_ri(r,i);
1541 unlock2(r);
1542 }
1543
1544 /********************************************************************
1545 * FPU register status handling. EMIT TIME! *
1546 ********************************************************************/
1547
1548 static void f_tomem(int r)
1549 {
1550 if (live.fate[r].status==DIRTY) {
1551 #if USE_LONG_DOUBLE
1552 raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1553 #else
1554 raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1555 #endif
1556 live.fate[r].status=CLEAN;
1557 }
1558 }
1559
1560 static void f_tomem_drop(int r)
1561 {
1562 if (live.fate[r].status==DIRTY) {
1563 #if USE_LONG_DOUBLE
1564 raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1565 #else
1566 raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1567 #endif
1568 live.fate[r].status=INMEM;
1569 }
1570 }
1571
1572
1573 static __inline__ int f_isinreg(int r)
1574 {
1575 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1576 }
1577
1578 static void f_evict(int r)
1579 {
1580 int rr;
1581
1582 if (!f_isinreg(r))
1583 return;
1584 rr=live.fate[r].realreg;
1585 if (live.fat[rr].nholds==1)
1586 f_tomem_drop(r);
1587 else
1588 f_tomem(r);
1589
1590 Dif (live.fat[rr].locked &&
1591 live.fat[rr].nholds==1) {
1592 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1593 abort();
1594 }
1595
1596 live.fat[rr].nholds--;
1597 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1598 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1599 int thisind=live.fate[r].realind;
1600 live.fat[rr].holds[thisind]=topreg;
1601 live.fate[topreg].realind=thisind;
1602 }
1603 live.fate[r].status=INMEM;
1604 live.fate[r].realreg=-1;
1605 }
1606
1607 static __inline__ void f_free_nreg(int r)
1608 {
1609 int i=live.fat[r].nholds;
1610
1611 while (i) {
1612 int vr;
1613
1614 --i;
1615 vr=live.fat[r].holds[i];
1616 f_evict(vr);
1617 }
1618 Dif (live.fat[r].nholds!=0) {
1619 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1620 abort();
1621 }
1622 }
1623
1624
1625 /* Use with care! */
1626 static __inline__ void f_isclean(int r)
1627 {
1628 if (!f_isinreg(r))
1629 return;
1630 live.fate[r].status=CLEAN;
1631 }
1632
1633 static __inline__ void f_disassociate(int r)
1634 {
1635 f_isclean(r);
1636 f_evict(r);
1637 }
1638
1639
1640
1641 static int f_alloc_reg(int r, int willclobber)
1642 {
1643 int bestreg;
1644 uae_s32 when;
1645 int i;
1646 uae_s32 badness;
1647 bestreg=-1;
1648 when=2000000000;
1649 for (i=N_FREGS;i--;) {
1650 badness=live.fat[i].touched;
1651 if (live.fat[i].nholds==0)
1652 badness=0;
1653
1654 if (!live.fat[i].locked && badness<when) {
1655 bestreg=i;
1656 when=badness;
1657 if (live.fat[i].nholds==0)
1658 break;
1659 }
1660 }
1661 Dif (bestreg==-1)
1662 abort();
1663
1664 if (live.fat[bestreg].nholds>0) {
1665 f_free_nreg(bestreg);
1666 }
1667 if (f_isinreg(r)) {
1668 f_evict(r);
1669 }
1670
1671 if (!willclobber) {
1672 if (live.fate[r].status!=UNDEF) {
1673 #if USE_LONG_DOUBLE
1674 raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
1675 #else
1676 raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
1677 #endif
1678 }
1679 live.fate[r].status=CLEAN;
1680 }
1681 else {
1682 live.fate[r].status=DIRTY;
1683 }
1684 live.fate[r].realreg=bestreg;
1685 live.fate[r].realind=live.fat[bestreg].nholds;
1686 live.fat[bestreg].touched=touchcnt++;
1687 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
1688 live.fat[bestreg].nholds++;
1689
1690 return bestreg;
1691 }
1692
1693 static void f_unlock(int r)
1694 {
1695 Dif (!live.fat[r].locked)
1696 abort();
1697 live.fat[r].locked--;
1698 }
1699
1700 static void f_setlock(int r)
1701 {
1702 live.fat[r].locked++;
1703 }
1704
1705 static __inline__ int f_readreg(int r)
1706 {
1707 int n;
1708 int answer=-1;
1709
1710 if (f_isinreg(r)) {
1711 n=live.fate[r].realreg;
1712 answer=n;
1713 }
1714 /* either the value was in memory to start with, or it was evicted and
1715 is in memory now */
1716 if (answer<0)
1717 answer=f_alloc_reg(r,0);
1718
1719 live.fat[answer].locked++;
1720 live.fat[answer].touched=touchcnt++;
1721 return answer;
1722 }
1723
1724 static __inline__ void f_make_exclusive(int r, int clobber)
1725 {
1726 freg_status oldstate;
1727 int rr=live.fate[r].realreg;
1728 int nr;
1729 int nind;
1730 int ndirt=0;
1731 int i;
1732
1733 if (!f_isinreg(r))
1734 return;
1735 if (live.fat[rr].nholds==1)
1736 return;
1737 for (i=0;i<live.fat[rr].nholds;i++) {
1738 int vr=live.fat[rr].holds[i];
1739 if (vr!=r && live.fate[vr].status==DIRTY)
1740 ndirt++;
1741 }
1742 if (!ndirt && !live.fat[rr].locked) {
1743 /* Everything else is clean, so let's keep this register */
1744 for (i=0;i<live.fat[rr].nholds;i++) {
1745 int vr=live.fat[rr].holds[i];
1746 if (vr!=r) {
1747 f_evict(vr);
1748 i--; /* Try that index again! */
1749 }
1750 }
1751 Dif (live.fat[rr].nholds!=1) {
1752 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
1753 for (i=0;i<live.fat[rr].nholds;i++) {
1754 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
1755 live.fate[live.fat[rr].holds[i]].realreg,
1756 live.fate[live.fat[rr].holds[i]].realind);
1757 }
1758 write_log("\n");
1759 abort();
1760 }
1761 return;
1762 }
1763
1764 /* We have to split the register */
1765 oldstate=live.fate[r];
1766
1767 f_setlock(rr); /* Make sure this doesn't go away */
1768 /* Forget about r being in the register rr */
1769 f_disassociate(r);
1770 /* Get a new register, that we will clobber completely */
1771 nr=f_alloc_reg(r,1);
1772 nind=live.fate[r].realind;
1773 if (!clobber)
1774 raw_fmov_rr(nr,rr); /* Make another copy */
1775 live.fate[r]=oldstate; /* Keep all the old state info */
1776 live.fate[r].realreg=nr;
1777 live.fate[r].realind=nind;
1778 f_unlock(rr);
1779 }
1780
1781
1782 static __inline__ int f_writereg(int r)
1783 {
1784 int n;
1785 int answer=-1;
1786
1787 f_make_exclusive(r,1);
1788 if (f_isinreg(r)) {
1789 n=live.fate[r].realreg;
1790 answer=n;
1791 }
1792 if (answer<0) {
1793 answer=f_alloc_reg(r,1);
1794 }
1795 live.fate[r].status=DIRTY;
1796 live.fat[answer].locked++;
1797 live.fat[answer].touched=touchcnt++;
1798 return answer;
1799 }
1800
1801 static int f_rmw(int r)
1802 {
1803 int n;
1804
1805 f_make_exclusive(r,0);
1806 if (f_isinreg(r)) {
1807 n=live.fate[r].realreg;
1808 }
1809 else
1810 n=f_alloc_reg(r,0);
1811 live.fate[r].status=DIRTY;
1812 live.fat[n].locked++;
1813 live.fat[n].touched=touchcnt++;
1814 return n;
1815 }
1816
1817 static void fflags_into_flags_internal(uae_u32 tmp)
1818 {
1819 int r;
1820
1821 clobber_flags();
1822 r=f_readreg(FP_RESULT);
1823 if (FFLAG_NREG_CLOBBER_CONDITION) {
1824 int tmp2=tmp;
1825 tmp=writereg_specific(tmp,4,FFLAG_NREG);
1826 raw_fflags_into_flags(r);
1827 unlock2(tmp);
1828 forget_about(tmp2);
1829 }
1830 else
1831 raw_fflags_into_flags(r);
1832 f_unlock(r);
1833 }
1834
1835
1836
1837
1838 /********************************************************************
1839 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
1840 ********************************************************************/
1841
1842 /*
1843 * RULES FOR HANDLING REGISTERS:
1844 *
1845 * * In the function headers, order the parameters
1846 * - 1st registers written to
1847 * - 2nd read/modify/write registers
1848 * - 3rd registers read from
1849 * * Before calling raw_*, you must call readreg, writereg or rmw for
1850 * each register
1851 * * The order for this is
1852 * - 1st call remove_offset for all registers written to with size<4
1853 * - 2nd call readreg for all registers read without offset
1854 * - 3rd call rmw for all rmw registers
1855 * - 4th call readreg_offset for all registers that can handle offsets
1856 * - 5th call get_offset for all the registers from the previous step
1857 * - 6th call writereg for all written-to registers
1858 * - 7th call raw_*
1859 * - 8th unlock2 all registers that were locked
1860 */
1861
1862 MIDFUNC(0,live_flags,(void))
1863 {
1864 live.flags_on_stack=TRASH;
1865 live.flags_in_flags=VALID;
1866 live.flags_are_important=1;
1867 }
1868 MENDFUNC(0,live_flags,(void))
1869
1870 MIDFUNC(0,dont_care_flags,(void))
1871 {
1872 live.flags_are_important=0;
1873 }
1874 MENDFUNC(0,dont_care_flags,(void))
1875
1876
1877 MIDFUNC(0,duplicate_carry,(void))
1878 {
1879 evict(FLAGX);
1880 make_flags_live_internal();
1881 COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
1882 log_vwrite(FLAGX);
1883 }
1884 MENDFUNC(0,duplicate_carry,(void))
1885
1886 MIDFUNC(0,restore_carry,(void))
1887 {
1888 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
1889 bt_l_ri_noclobber(FLAGX,0);
1890 }
1891 else { /* Avoid the stall the above creates.
1892 This is slow on non-P6, though.
1893 */
1894 COMPCALL(rol_b_ri(FLAGX,8));
1895 isclean(FLAGX);
1896 }
1897 }
1898 MENDFUNC(0,restore_carry,(void))
1899
1900 MIDFUNC(0,start_needflags,(void))
1901 {
1902 needflags=1;
1903 }
1904 MENDFUNC(0,start_needflags,(void))
1905
1906 MIDFUNC(0,end_needflags,(void))
1907 {
1908 needflags=0;
1909 }
1910 MENDFUNC(0,end_needflags,(void))
1911
1912 MIDFUNC(0,make_flags_live,(void))
1913 {
1914 make_flags_live_internal();
1915 }
1916 MENDFUNC(0,make_flags_live,(void))
1917
1918 MIDFUNC(1,fflags_into_flags,(W2 tmp))
1919 {
1920 clobber_flags();
1921 fflags_into_flags_internal(tmp);
1922 }
1923 MENDFUNC(1,fflags_into_flags,(W2 tmp))
1924
1925
1926 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
1927 {
1928 int size=4;
1929 if (i<16)
1930 size=2;
1931 CLOBBER_BT;
1932 r=readreg(r,size);
1933 raw_bt_l_ri(r,i);
1934 unlock2(r);
1935 }
1936 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
1937
1938 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
1939 {
1940 CLOBBER_BT;
1941 r=readreg(r,4);
1942 b=readreg(b,4);
1943 raw_bt_l_rr(r,b);
1944 unlock2(r);
1945 unlock2(b);
1946 }
1947 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
1948
1949 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
1950 {
1951 int size=4;
1952 if (i<16)
1953 size=2;
1954 CLOBBER_BT;
1955 r=rmw(r,size,size);
1956 raw_btc_l_ri(r,i);
1957 unlock2(r);
1958 }
1959 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
1960
1961 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
1962 {
1963 CLOBBER_BT;
1964 b=readreg(b,4);
1965 r=rmw(r,4,4);
1966 raw_btc_l_rr(r,b);
1967 unlock2(r);
1968 unlock2(b);
1969 }
1970 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
1971
1972
1973 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
1974 {
1975 int size=4;
1976 if (i<16)
1977 size=2;
1978 CLOBBER_BT;
1979 r=rmw(r,size,size);
1980 raw_btr_l_ri(r,i);
1981 unlock2(r);
1982 }
1983 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
1984
1985 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
1986 {
1987 CLOBBER_BT;
1988 b=readreg(b,4);
1989 r=rmw(r,4,4);
1990 raw_btr_l_rr(r,b);
1991 unlock2(r);
1992 unlock2(b);
1993 }
1994 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
1995
1996
1997 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
1998 {
1999 int size=4;
2000 if (i<16)
2001 size=2;
2002 CLOBBER_BT;
2003 r=rmw(r,size,size);
2004 raw_bts_l_ri(r,i);
2005 unlock2(r);
2006 }
2007 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2008
2009 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2010 {
2011 CLOBBER_BT;
2012 b=readreg(b,4);
2013 r=rmw(r,4,4);
2014 raw_bts_l_rr(r,b);
2015 unlock2(r);
2016 unlock2(b);
2017 }
2018 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2019
2020 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2021 {
2022 CLOBBER_MOV;
2023 d=writereg(d,4);
2024 raw_mov_l_rm(d,s);
2025 unlock2(d);
2026 }
2027 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2028
2029
2030 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2031 {
2032 r=readreg(r,4);
2033 raw_call_r(r);
2034 unlock2(r);
2035 }
2036 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2037
2038 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2039 {
2040 CLOBBER_SUB;
2041 raw_sub_l_mi(d,s) ;
2042 }
2043 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2044
2045 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2046 {
2047 CLOBBER_MOV;
2048 raw_mov_l_mi(d,s) ;
2049 }
2050 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2051
2052 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2053 {
2054 CLOBBER_MOV;
2055 raw_mov_w_mi(d,s) ;
2056 }
2057 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2058
2059 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2060 {
2061 CLOBBER_MOV;
2062 raw_mov_b_mi(d,s) ;
2063 }
2064 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2065
2066 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2067 {
2068 if (!i && !needflags)
2069 return;
2070 CLOBBER_ROL;
2071 r=rmw(r,1,1);
2072 raw_rol_b_ri(r,i);
2073 unlock2(r);
2074 }
2075 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2076
2077 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2078 {
2079 if (!i && !needflags)
2080 return;
2081 CLOBBER_ROL;
2082 r=rmw(r,2,2);
2083 raw_rol_w_ri(r,i);
2084 unlock2(r);
2085 }
2086 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2087
2088 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2089 {
2090 if (!i && !needflags)
2091 return;
2092 CLOBBER_ROL;
2093 r=rmw(r,4,4);
2094 raw_rol_l_ri(r,i);
2095 unlock2(r);
2096 }
2097 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2098
2099 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2100 {
2101 if (isconst(r)) {
2102 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2103 return;
2104 }
2105 CLOBBER_ROL;
2106 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2107 d=rmw(d,4,4);
2108 Dif (r!=1) {
2109 write_log("Illegal register %d in raw_rol_b\n",r);
2110 abort();
2111 }
2112 raw_rol_l_rr(d,r) ;
2113 unlock2(r);
2114 unlock2(d);
2115 }
2116 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2117
2118 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2119 { /* Can only do this with r==1, i.e. cl */
2120
2121 if (isconst(r)) {
2122 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2123 return;
2124 }
2125 CLOBBER_ROL;
2126 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2127 d=rmw(d,2,2);
2128 Dif (r!=1) {
2129 write_log("Illegal register %d in raw_rol_b\n",r);
2130 abort();
2131 }
2132 raw_rol_w_rr(d,r) ;
2133 unlock2(r);
2134 unlock2(d);
2135 }
2136 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2137
2138 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2139 { /* Can only do this with r==1, i.e. cl */
2140
2141 if (isconst(r)) {
2142 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2143 return;
2144 }
2145
2146 CLOBBER_ROL;
2147 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2148 d=rmw(d,1,1);
2149 Dif (r!=1) {
2150 write_log("Illegal register %d in raw_rol_b\n",r);
2151 abort();
2152 }
2153 raw_rol_b_rr(d,r) ;
2154 unlock2(r);
2155 unlock2(d);
2156 }
2157 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2158
2159
2160 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2161 {
2162 if (isconst(r)) {
2163 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2164 return;
2165 }
2166 CLOBBER_SHLL;
2167 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2168 d=rmw(d,4,4);
2169 Dif (r!=1) {
2170 write_log("Illegal register %d in raw_rol_b\n",r);
2171 abort();
2172 }
2173 raw_shll_l_rr(d,r) ;
2174 unlock2(r);
2175 unlock2(d);
2176 }
2177 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2178
2179 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2180 { /* Can only do this with r==1, i.e. cl */
2181
2182 if (isconst(r)) {
2183 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2184 return;
2185 }
2186 CLOBBER_SHLL;
2187 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2188 d=rmw(d,2,2);
2189 Dif (r!=1) {
2190 write_log("Illegal register %d in raw_shll_b\n",r);
2191 abort();
2192 }
2193 raw_shll_w_rr(d,r) ;
2194 unlock2(r);
2195 unlock2(d);
2196 }
2197 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2198
2199 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2200 { /* Can only do this with r==1, i.e. cl */
2201
2202 if (isconst(r)) {
2203 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2204 return;
2205 }
2206
2207 CLOBBER_SHLL;
2208 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2209 d=rmw(d,1,1);
2210 Dif (r!=1) {
2211 write_log("Illegal register %d in raw_shll_b\n",r);
2212 abort();
2213 }
2214 raw_shll_b_rr(d,r) ;
2215 unlock2(r);
2216 unlock2(d);
2217 }
2218 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2219
2220
2221 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2222 {
2223 if (!i && !needflags)
2224 return;
2225 CLOBBER_ROR;
2226 r=rmw(r,1,1);
2227 raw_ror_b_ri(r,i);
2228 unlock2(r);
2229 }
2230 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2231
2232 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2233 {
2234 if (!i && !needflags)
2235 return;
2236 CLOBBER_ROR;
2237 r=rmw(r,2,2);
2238 raw_ror_w_ri(r,i);
2239 unlock2(r);
2240 }
2241 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2242
2243 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2244 {
2245 if (!i && !needflags)
2246 return;
2247 CLOBBER_ROR;
2248 r=rmw(r,4,4);
2249 raw_ror_l_ri(r,i);
2250 unlock2(r);
2251 }
2252 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2253
2254 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2255 {
2256 if (isconst(r)) {
2257 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2258 return;
2259 }
2260 CLOBBER_ROR;
2261 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2262 d=rmw(d,4,4);
2263 raw_ror_l_rr(d,r) ;
2264 unlock2(r);
2265 unlock2(d);
2266 }
2267 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2268
2269 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2270 {
2271 if (isconst(r)) {
2272 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2273 return;
2274 }
2275 CLOBBER_ROR;
2276 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2277 d=rmw(d,2,2);
2278 raw_ror_w_rr(d,r) ;
2279 unlock2(r);
2280 unlock2(d);
2281 }
2282 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2283
2284 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2285 {
2286 if (isconst(r)) {
2287 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2288 return;
2289 }
2290
2291 CLOBBER_ROR;
2292 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2293 d=rmw(d,1,1);
2294 raw_ror_b_rr(d,r) ;
2295 unlock2(r);
2296 unlock2(d);
2297 }
2298 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2299
2300 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2301 {
2302 if (isconst(r)) {
2303 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2304 return;
2305 }
2306 CLOBBER_SHRL;
2307 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2308 d=rmw(d,4,4);
2309 Dif (r!=1) {
2310 write_log("Illegal register %d in raw_rol_b\n",r);
2311 abort();
2312 }
2313 raw_shrl_l_rr(d,r) ;
2314 unlock2(r);
2315 unlock2(d);
2316 }
2317 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2318
2319 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2320 { /* Can only do this with r==1, i.e. cl */
2321
2322 if (isconst(r)) {
2323 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2324 return;
2325 }
2326 CLOBBER_SHRL;
2327 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2328 d=rmw(d,2,2);
2329 Dif (r!=1) {
2330 write_log("Illegal register %d in raw_shrl_b\n",r);
2331 abort();
2332 }
2333 raw_shrl_w_rr(d,r) ;
2334 unlock2(r);
2335 unlock2(d);
2336 }
2337 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2338
2339 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2340 { /* Can only do this with r==1, i.e. cl */
2341
2342 if (isconst(r)) {
2343 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2344 return;
2345 }
2346
2347 CLOBBER_SHRL;
2348 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2349 d=rmw(d,1,1);
2350 Dif (r!=1) {
2351 write_log("Illegal register %d in raw_shrl_b\n",r);
2352 abort();
2353 }
2354 raw_shrl_b_rr(d,r) ;
2355 unlock2(r);
2356 unlock2(d);
2357 }
2358 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2359
2360
2361
2362 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2363 {
2364 if (!i && !needflags)
2365 return;
2366 if (isconst(r) && !needflags) {
2367 live.state[r].val<<=i;
2368 return;
2369 }
2370 CLOBBER_SHLL;
2371 r=rmw(r,4,4);
2372 raw_shll_l_ri(r,i);
2373 unlock2(r);
2374 }
2375 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2376
2377 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2378 {
2379 if (!i && !needflags)
2380 return;
2381 CLOBBER_SHLL;
2382 r=rmw(r,2,2);
2383 raw_shll_w_ri(r,i);
2384 unlock2(r);
2385 }
2386 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2387
2388 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2389 {
2390 if (!i && !needflags)
2391 return;
2392 CLOBBER_SHLL;
2393 r=rmw(r,1,1);
2394 raw_shll_b_ri(r,i);
2395 unlock2(r);
2396 }
2397 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2398
2399 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2400 {
2401 if (!i && !needflags)
2402 return;
2403 if (isconst(r) && !needflags) {
2404 live.state[r].val>>=i;
2405 return;
2406 }
2407 CLOBBER_SHRL;
2408 r=rmw(r,4,4);
2409 raw_shrl_l_ri(r,i);
2410 unlock2(r);
2411 }
2412 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2413
2414 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2415 {
2416 if (!i && !needflags)
2417 return;
2418 CLOBBER_SHRL;
2419 r=rmw(r,2,2);
2420 raw_shrl_w_ri(r,i);
2421 unlock2(r);
2422 }
2423 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2424
2425 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2426 {
2427 if (!i && !needflags)
2428 return;
2429 CLOBBER_SHRL;
2430 r=rmw(r,1,1);
2431 raw_shrl_b_ri(r,i);
2432 unlock2(r);
2433 }
2434 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2435
2436 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2437 {
2438 if (!i && !needflags)
2439 return;
2440 CLOBBER_SHRA;
2441 r=rmw(r,4,4);
2442 raw_shra_l_ri(r,i);
2443 unlock2(r);
2444 }
2445 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2446
2447 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2448 {
2449 if (!i && !needflags)
2450 return;
2451 CLOBBER_SHRA;
2452 r=rmw(r,2,2);
2453 raw_shra_w_ri(r,i);
2454 unlock2(r);
2455 }
2456 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2457
2458 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2459 {
2460 if (!i && !needflags)
2461 return;
2462 CLOBBER_SHRA;
2463 r=rmw(r,1,1);
2464 raw_shra_b_ri(r,i);
2465 unlock2(r);
2466 }
2467 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2468
2469 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2470 {
2471 if (isconst(r)) {
2472 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2473 return;
2474 }
2475 CLOBBER_SHRA;
2476 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2477 d=rmw(d,4,4);
2478 Dif (r!=1) {
2479 write_log("Illegal register %d in raw_rol_b\n",r);
2480 abort();
2481 }
2482 raw_shra_l_rr(d,r) ;
2483 unlock2(r);
2484 unlock2(d);
2485 }
2486 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2487
2488 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2489 { /* Can only do this with r==1, i.e. cl */
2490
2491 if (isconst(r)) {
2492 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2493 return;
2494 }
2495 CLOBBER_SHRA;
2496 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2497 d=rmw(d,2,2);
2498 Dif (r!=1) {
2499 write_log("Illegal register %d in raw_shra_b\n",r);
2500 abort();
2501 }
2502 raw_shra_w_rr(d,r) ;
2503 unlock2(r);
2504 unlock2(d);
2505 }
2506 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2507
2508 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2509 { /* Can only do this with r==1, i.e. cl */
2510
2511 if (isconst(r)) {
2512 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2513 return;
2514 }
2515
2516 CLOBBER_SHRA;
2517 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2518 d=rmw(d,1,1);
2519 Dif (r!=1) {
2520 write_log("Illegal register %d in raw_shra_b\n",r);
2521 abort();
2522 }
2523 raw_shra_b_rr(d,r) ;
2524 unlock2(r);
2525 unlock2(d);
2526 }
2527 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2528
2529
2530 MIDFUNC(2,setcc,(W1 d, IMM cc))
2531 {
2532 CLOBBER_SETCC;
2533 d=writereg(d,1);
2534 raw_setcc(d,cc);
2535 unlock2(d);
2536 }
2537 MENDFUNC(2,setcc,(W1 d, IMM cc))
2538
2539 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2540 {
2541 CLOBBER_SETCC;
2542 raw_setcc_m(d,cc);
2543 }
2544 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2545
2546 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2547 {
2548 if (d==s)
2549 return;
2550 CLOBBER_CMOV;
2551 s=readreg(s,4);
2552 d=rmw(d,4,4);
2553 raw_cmov_l_rr(d,s,cc);
2554 unlock2(s);
2555 unlock2(d);
2556 }
2557 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2558
2559 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2560 {
2561 CLOBBER_CMOV;
2562 d=rmw(d,4,4);
2563 raw_cmov_l_rm(d,s,cc);
2564 unlock2(d);
2565 }
2566 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2567
2568 MIDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2569 {
2570 CLOBBER_BSF;
2571 s=readreg(s,4);
2572 d=writereg(d,4);
2573 raw_bsf_l_rr(d,s);
2574 unlock2(s);
2575 unlock2(d);
2576 }
2577 MENDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2578
2579 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2580 {
2581 CLOBBER_MUL;
2582 s=readreg(s,4);
2583 d=rmw(d,4,4);
2584 raw_imul_32_32(d,s);
2585 unlock2(s);
2586 unlock2(d);
2587 }
2588 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2589
2590 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2591 {
2592 CLOBBER_MUL;
2593 s=rmw_specific(s,4,4,MUL_NREG2);
2594 d=rmw_specific(d,4,4,MUL_NREG1);
2595 raw_imul_64_32(d,s);
2596 unlock2(s);
2597 unlock2(d);
2598 }
2599 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2600
2601 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2602 {
2603 CLOBBER_MUL;
2604 s=rmw_specific(s,4,4,MUL_NREG2);
2605 d=rmw_specific(d,4,4,MUL_NREG1);
2606 raw_mul_64_32(d,s);
2607 unlock2(s);
2608 unlock2(d);
2609 }
2610 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2611
2612 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2613 {
2614 CLOBBER_MUL;
2615 s=readreg(s,4);
2616 d=rmw(d,4,4);
2617 raw_mul_32_32(d,s);
2618 unlock2(s);
2619 unlock2(d);
2620 }
2621 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
2622
2623 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2624 {
2625 int isrmw;
2626
2627 if (isconst(s)) {
2628 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
2629 return;
2630 }
2631
2632 CLOBBER_SE16;
2633 isrmw=(s==d);
2634 if (!isrmw) {
2635 s=readreg(s,2);
2636 d=writereg(d,4);
2637 }
2638 else { /* If we try to lock this twice, with different sizes, we
2639 are int trouble! */
2640 s=d=rmw(s,4,2);
2641 }
2642 raw_sign_extend_16_rr(d,s);
2643 if (!isrmw) {
2644 unlock2(d);
2645 unlock2(s);
2646 }
2647 else {
2648 unlock2(s);
2649 }
2650 }
2651 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2652
2653 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
2654 {
2655 int isrmw;
2656
2657 if (isconst(s)) {
2658 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
2659 return;
2660 }
2661
2662 isrmw=(s==d);
2663 CLOBBER_SE8;
2664 if (!isrmw) {
2665 s=readreg(s,1);
2666 d=writereg(d,4);
2667 }
2668 else { /* If we try to lock this twice, with different sizes, we
2669 are int trouble! */
2670 s=d=rmw(s,4,1);
2671 }
2672
2673 raw_sign_extend_8_rr(d,s);
2674
2675 if (!isrmw) {
2676 unlock2(d);
2677 unlock2(s);
2678 }
2679 else {
2680 unlock2(s);
2681 }
2682 }
2683 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
2684
2685
2686 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
2687 {
2688 int isrmw;
2689
2690 if (isconst(s)) {
2691 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
2692 return;
2693 }
2694
2695 isrmw=(s==d);
2696 CLOBBER_ZE16;
2697 if (!isrmw) {
2698 s=readreg(s,2);
2699 d=writereg(d,4);
2700 }
2701 else { /* If we try to lock this twice, with different sizes, we
2702 are int trouble! */
2703 s=d=rmw(s,4,2);
2704 }
2705 raw_zero_extend_16_rr(d,s);
2706 if (!isrmw) {
2707 unlock2(d);
2708 unlock2(s);
2709 }
2710 else {
2711 unlock2(s);
2712 }
2713 }
2714 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
2715
2716 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
2717 {
2718 int isrmw;
2719 if (isconst(s)) {
2720 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
2721 return;
2722 }
2723
2724 isrmw=(s==d);
2725 CLOBBER_ZE8;
2726 if (!isrmw) {
2727 s=readreg(s,1);
2728 d=writereg(d,4);
2729 }
2730 else { /* If we try to lock this twice, with different sizes, we
2731 are int trouble! */
2732 s=d=rmw(s,4,1);
2733 }
2734
2735 raw_zero_extend_8_rr(d,s);
2736
2737 if (!isrmw) {
2738 unlock2(d);
2739 unlock2(s);
2740 }
2741 else {
2742 unlock2(s);
2743 }
2744 }
2745 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
2746
2747 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
2748 {
2749 if (d==s)
2750 return;
2751 if (isconst(s)) {
2752 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
2753 return;
2754 }
2755
2756 CLOBBER_MOV;
2757 s=readreg(s,1);
2758 d=writereg(d,1);
2759 raw_mov_b_rr(d,s);
2760 unlock2(d);
2761 unlock2(s);
2762 }
2763 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
2764
2765 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
2766 {
2767 if (d==s)
2768 return;
2769 if (isconst(s)) {
2770 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
2771 return;
2772 }
2773
2774 CLOBBER_MOV;
2775 s=readreg(s,2);
2776 d=writereg(d,2);
2777 raw_mov_w_rr(d,s);
2778 unlock2(d);
2779 unlock2(s);
2780 }
2781 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
2782
2783
2784 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
2785 {
2786 CLOBBER_MOV;
2787 baser=readreg(baser,4);
2788 index=readreg(index,4);
2789 d=writereg(d,4);
2790
2791 raw_mov_l_rrm_indexed(d,baser,index,factor);
2792 unlock2(d);
2793 unlock2(baser);
2794 unlock2(index);
2795 }
2796 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
2797
2798 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
2799 {
2800 CLOBBER_MOV;
2801 baser=readreg(baser,4);
2802 index=readreg(index,4);
2803 d=writereg(d,2);
2804
2805 raw_mov_w_rrm_indexed(d,baser,index,factor);
2806 unlock2(d);
2807 unlock2(baser);
2808 unlock2(index);
2809 }
2810 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
2811
2812 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
2813 {
2814 CLOBBER_MOV;
2815 baser=readreg(baser,4);
2816 index=readreg(index,4);
2817 d=writereg(d,1);
2818
2819 raw_mov_b_rrm_indexed(d,baser,index,factor);
2820
2821 unlock2(d);
2822 unlock2(baser);
2823 unlock2(index);
2824 }
2825 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
2826
2827
2828 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
2829 {
2830 CLOBBER_MOV;
2831 baser=readreg(baser,4);
2832 index=readreg(index,4);
2833 s=readreg(s,4);
2834
2835 Dif (baser==s || index==s)
2836 abort();
2837
2838
2839 raw_mov_l_mrr_indexed(baser,index,factor,s);
2840 unlock2(s);
2841 unlock2(baser);
2842 unlock2(index);
2843 }
2844 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
2845
2846 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2847 {
2848 CLOBBER_MOV;
2849 baser=readreg(baser,4);
2850 index=readreg(index,4);
2851 s=readreg(s,2);
2852
2853 raw_mov_w_mrr_indexed(baser,index,factor,s);
2854 unlock2(s);
2855 unlock2(baser);
2856 unlock2(index);
2857 }
2858 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2859
2860 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2861 {
2862 CLOBBER_MOV;
2863 s=readreg(s,1);
2864 baser=readreg(baser,4);
2865 index=readreg(index,4);
2866
2867 raw_mov_b_mrr_indexed(baser,index,factor,s);
2868 unlock2(s);
2869 unlock2(baser);
2870 unlock2(index);
2871 }
2872 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2873
2874
2875 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2876 {
2877 int basereg=baser;
2878 int indexreg=index;
2879
2880 CLOBBER_MOV;
2881 s=readreg(s,4);
2882 baser=readreg_offset(baser,4);
2883 index=readreg_offset(index,4);
2884
2885 base+=get_offset(basereg);
2886 base+=factor*get_offset(indexreg);
2887
2888 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
2889 unlock2(s);
2890 unlock2(baser);
2891 unlock2(index);
2892 }
2893 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2894
2895 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2896 {
2897 int basereg=baser;
2898 int indexreg=index;
2899
2900 CLOBBER_MOV;
2901 s=readreg(s,2);
2902 baser=readreg_offset(baser,4);
2903 index=readreg_offset(index,4);
2904
2905 base+=get_offset(basereg);
2906 base+=factor*get_offset(indexreg);
2907
2908 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
2909 unlock2(s);
2910 unlock2(baser);
2911 unlock2(index);
2912 }
2913 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2914
2915 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2916 {
2917 int basereg=baser;
2918 int indexreg=index;
2919
2920 CLOBBER_MOV;
2921 s=readreg(s,1);
2922 baser=readreg_offset(baser,4);
2923 index=readreg_offset(index,4);
2924
2925 base+=get_offset(basereg);
2926 base+=factor*get_offset(indexreg);
2927
2928 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
2929 unlock2(s);
2930 unlock2(baser);
2931 unlock2(index);
2932 }
2933 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2934
2935
2936
2937 /* Read a long from base+baser+factor*index */
2938 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2939 {
2940 int basereg=baser;
2941 int indexreg=index;
2942
2943 CLOBBER_MOV;
2944 baser=readreg_offset(baser,4);
2945 index=readreg_offset(index,4);
2946 base+=get_offset(basereg);
2947 base+=factor*get_offset(indexreg);
2948 d=writereg(d,4);
2949 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
2950 unlock2(d);
2951 unlock2(baser);
2952 unlock2(index);
2953 }
2954 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2955
2956
2957 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2958 {
2959 int basereg=baser;
2960 int indexreg=index;
2961
2962 CLOBBER_MOV;
2963 remove_offset(d,-1);
2964 baser=readreg_offset(baser,4);
2965 index=readreg_offset(index,4);
2966 base+=get_offset(basereg);
2967 base+=factor*get_offset(indexreg);
2968 d=writereg(d,2);
2969 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
2970 unlock2(d);
2971 unlock2(baser);
2972 unlock2(index);
2973 }
2974 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2975
2976
2977 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2978 {
2979 int basereg=baser;
2980 int indexreg=index;
2981
2982 CLOBBER_MOV;
2983 remove_offset(d,-1);
2984 baser=readreg_offset(baser,4);
2985 index=readreg_offset(index,4);
2986 base+=get_offset(basereg);
2987 base+=factor*get_offset(indexreg);
2988 d=writereg(d,1);
2989 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
2990 unlock2(d);
2991 unlock2(baser);
2992 unlock2(index);
2993 }
2994 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2995
2996 /* Read a long from base+factor*index */
2997 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2998 {
2999 int indexreg=index;
3000
3001 if (isconst(index)) {
3002 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3003 return;
3004 }
3005
3006 CLOBBER_MOV;
3007 index=readreg_offset(index,4);
3008 base+=get_offset(indexreg)*factor;
3009 d=writereg(d,4);
3010
3011 raw_mov_l_rm_indexed(d,base,index,factor);
3012 unlock2(index);
3013 unlock2(d);
3014 }
3015 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3016
3017
3018 /* read the long at the address contained in s+offset and store in d */
3019 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3020 {
3021 if (isconst(s)) {
3022 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3023 return;
3024 }
3025 CLOBBER_MOV;
3026 s=readreg(s,4);
3027 d=writereg(d,4);
3028
3029 raw_mov_l_rR(d,s,offset);
3030 unlock2(d);
3031 unlock2(s);
3032 }
3033 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3034
3035 /* read the word at the address contained in s+offset and store in d */
3036 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3037 {
3038 if (isconst(s)) {
3039 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3040 return;
3041 }
3042 CLOBBER_MOV;
3043 s=readreg(s,4);
3044 d=writereg(d,2);
3045
3046 raw_mov_w_rR(d,s,offset);
3047 unlock2(d);
3048 unlock2(s);
3049 }
3050 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3051
3052 /* read the word at the address contained in s+offset and store in d */
3053 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3054 {
3055 if (isconst(s)) {
3056 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3057 return;
3058 }
3059 CLOBBER_MOV;
3060 s=readreg(s,4);
3061 d=writereg(d,1);
3062
3063 raw_mov_b_rR(d,s,offset);
3064 unlock2(d);
3065 unlock2(s);
3066 }
3067 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3068
3069 /* read the long at the address contained in s+offset and store in d */
3070 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3071 {
3072 int sreg=s;
3073 if (isconst(s)) {
3074 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3075 return;
3076 }
3077 CLOBBER_MOV;
3078 s=readreg_offset(s,4);
3079 offset+=get_offset(sreg);
3080 d=writereg(d,4);
3081
3082 raw_mov_l_brR(d,s,offset);
3083 unlock2(d);
3084 unlock2(s);
3085 }
3086 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3087
3088 /* read the word at the address contained in s+offset and store in d */
3089 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3090 {
3091 int sreg=s;
3092 if (isconst(s)) {
3093 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3094 return;
3095 }
3096 CLOBBER_MOV;
3097 remove_offset(d,-1);
3098 s=readreg_offset(s,4);
3099 offset+=get_offset(sreg);
3100 d=writereg(d,2);
3101
3102 raw_mov_w_brR(d,s,offset);
3103 unlock2(d);
3104 unlock2(s);
3105 }
3106 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3107
3108 /* read the word at the address contained in s+offset and store in d */
3109 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3110 {
3111 int sreg=s;
3112 if (isconst(s)) {
3113 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3114 return;
3115 }
3116 CLOBBER_MOV;
3117 remove_offset(d,-1);
3118 s=readreg_offset(s,4);
3119 offset+=get_offset(sreg);
3120 d=writereg(d,1);
3121
3122 raw_mov_b_brR(d,s,offset);
3123 unlock2(d);
3124 unlock2(s);
3125 }
3126 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3127
3128 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3129 {
3130 int dreg=d;
3131 if (isconst(d)) {
3132 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3133 return;
3134 }
3135
3136 CLOBBER_MOV;
3137 d=readreg_offset(d,4);
3138 offset+=get_offset(dreg);
3139 raw_mov_l_Ri(d,i,offset);
3140 unlock2(d);
3141 }
3142 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3143
3144 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3145 {
3146 int dreg=d;
3147 if (isconst(d)) {
3148 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3149 return;
3150 }
3151
3152 CLOBBER_MOV;
3153 d=readreg_offset(d,4);
3154 offset+=get_offset(dreg);
3155 raw_mov_w_Ri(d,i,offset);
3156 unlock2(d);
3157 }
3158 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3159
3160 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3161 {
3162 int dreg=d;
3163 if (isconst(d)) {
3164 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3165 return;
3166 }
3167
3168 CLOBBER_MOV;
3169 d=readreg_offset(d,4);
3170 offset+=get_offset(dreg);
3171 raw_mov_b_Ri(d,i,offset);
3172 unlock2(d);
3173 }
3174 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3175
3176 /* Warning! OFFSET is byte sized only! */
3177 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3178 {
3179 if (isconst(d)) {
3180 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3181 return;
3182 }
3183 if (isconst(s)) {
3184 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3185 return;
3186 }
3187
3188 CLOBBER_MOV;
3189 s=readreg(s,4);
3190 d=readreg(d,4);
3191
3192 raw_mov_l_Rr(d,s,offset);
3193 unlock2(d);
3194 unlock2(s);
3195 }
3196 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3197
3198 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3199 {
3200 if (isconst(d)) {
3201 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3202 return;
3203 }
3204 if (isconst(s)) {
3205 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3206 return;
3207 }
3208
3209 CLOBBER_MOV;
3210 s=readreg(s,2);
3211 d=readreg(d,4);
3212 raw_mov_w_Rr(d,s,offset);
3213 unlock2(d);
3214 unlock2(s);
3215 }
3216 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3217
3218 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3219 {
3220 if (isconst(d)) {
3221 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3222 return;
3223 }
3224 if (isconst(s)) {
3225 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3226 return;
3227 }
3228
3229 CLOBBER_MOV;
3230 s=readreg(s,1);
3231 d=readreg(d,4);
3232 raw_mov_b_Rr(d,s,offset);
3233 unlock2(d);
3234 unlock2(s);
3235 }
3236 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3237
3238 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3239 {
3240 if (isconst(s)) {
3241 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3242 return;
3243 }
3244 #if USE_OFFSET
3245 if (d==s) {
3246 add_offset(d,offset);
3247 return;
3248 }
3249 #endif
3250 CLOBBER_LEA;
3251 s=readreg(s,4);
3252 d=writereg(d,4);
3253 raw_lea_l_brr(d,s,offset);
3254 unlock2(d);
3255 unlock2(s);
3256 }
3257 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3258
3259 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3260 {
3261 if (!offset) {
3262 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3263 return;
3264 }
3265 CLOBBER_LEA;
3266 s=readreg(s,4);
3267 index=readreg(index,4);
3268 d=writereg(d,4);
3269
3270 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3271 unlock2(d);
3272 unlock2(index);
3273 unlock2(s);
3274 }
3275 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3276
3277 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3278 {
3279 CLOBBER_LEA;
3280 s=readreg(s,4);
3281 index=readreg(index,4);
3282 d=writereg(d,4);
3283
3284 raw_lea_l_rr_indexed(d,s,index,factor);
3285 unlock2(d);
3286 unlock2(index);
3287 unlock2(s);
3288 }
3289 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3290
3291 /* write d to the long at the address contained in s+offset */
3292 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3293 {
3294 int dreg=d;
3295 if (isconst(d)) {
3296 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3297 return;
3298 }
3299
3300 CLOBBER_MOV;
3301 s=readreg(s,4);
3302 d=readreg_offset(d,4);
3303 offset+=get_offset(dreg);
3304
3305 raw_mov_l_bRr(d,s,offset);
3306 unlock2(d);
3307 unlock2(s);
3308 }
3309 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3310
3311 /* write the word at the address contained in s+offset and store in d */
3312 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3313 {
3314 int dreg=d;
3315
3316 if (isconst(d)) {
3317 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3318 return;
3319 }
3320
3321 CLOBBER_MOV;
3322 s=readreg(s,2);
3323 d=readreg_offset(d,4);
3324 offset+=get_offset(dreg);
3325 raw_mov_w_bRr(d,s,offset);
3326 unlock2(d);
3327 unlock2(s);
3328 }
3329 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3330
3331 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3332 {
3333 int dreg=d;
3334 if (isconst(d)) {
3335 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3336 return;
3337 }
3338
3339 CLOBBER_MOV;
3340 s=readreg(s,1);
3341 d=readreg_offset(d,4);
3342 offset+=get_offset(dreg);
3343 raw_mov_b_bRr(d,s,offset);
3344 unlock2(d);
3345 unlock2(s);
3346 }
3347 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3348
3349 MIDFUNC(1,bswap_32,(RW4 r))
3350 {
3351 int reg=r;
3352
3353 if (isconst(r)) {
3354 uae_u32 oldv=live.state[r].val;
3355 live.state[r].val=reverse32(oldv);
3356 return;
3357 }
3358
3359 CLOBBER_SW32;
3360 r=rmw(r,4,4);
3361 raw_bswap_32(r);
3362 unlock2(r);
3363 }
3364 MENDFUNC(1,bswap_32,(RW4 r))
3365
3366 MIDFUNC(1,bswap_16,(RW2 r))
3367 {
3368 if (isconst(r)) {
3369 uae_u32 oldv=live.state[r].val;
3370 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3371 (oldv&0xffff0000);
3372 return;
3373 }
3374
3375 CLOBBER_SW16;
3376 r=rmw(r,2,2);
3377
3378 raw_bswap_16(r);
3379 unlock2(r);
3380 }
3381 MENDFUNC(1,bswap_16,(RW2 r))
3382
3383
3384
3385 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3386 {
3387 int olds;
3388
3389 if (d==s) { /* How pointless! */
3390 return;
3391 }
3392 if (isconst(s)) {
3393 COMPCALL(mov_l_ri)(d,live.state[s].val);
3394 return;
3395 }
3396 olds=s;
3397 disassociate(d);
3398 s=readreg_offset(s,4);
3399 live.state[d].realreg=s;
3400 live.state[d].realind=live.nat[s].nholds;
3401 live.state[d].val=live.state[olds].val;
3402 live.state[d].validsize=4;
3403 live.state[d].dirtysize=4;
3404 set_status(d,DIRTY);
3405
3406 live.nat[s].holds[live.nat[s].nholds]=d;
3407 live.nat[s].nholds++;
3408 log_clobberreg(d);
3409 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3410 d,s,live.state[d].realind,live.nat[s].nholds); */
3411 unlock2(s);
3412 }
3413 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3414
3415 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3416 {
3417 if (isconst(s)) {
3418 COMPCALL(mov_l_mi)(d,live.state[s].val);
3419 return;
3420 }
3421 CLOBBER_MOV;
3422 s=readreg(s,4);
3423
3424 raw_mov_l_mr(d,s);
3425 unlock2(s);
3426 }
3427 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3428
3429
3430 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3431 {
3432 if (isconst(s)) {
3433 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3434 return;
3435 }
3436 CLOBBER_MOV;
3437 s=readreg(s,2);
3438
3439 raw_mov_w_mr(d,s);
3440 unlock2(s);
3441 }
3442 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3443
3444 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3445 {
3446 CLOBBER_MOV;
3447 d=writereg(d,2);
3448
3449 raw_mov_w_rm(d,s);
3450 unlock2(d);
3451 }
3452 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3453
3454 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3455 {
3456 if (isconst(s)) {
3457 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3458 return;
3459 }
3460
3461 CLOBBER_MOV;
3462 s=readreg(s,1);
3463
3464 raw_mov_b_mr(d,s);
3465 unlock2(s);
3466 }
3467 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3468
3469 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3470 {
3471 CLOBBER_MOV;
3472 d=writereg(d,1);
3473
3474 raw_mov_b_rm(d,s);
3475 unlock2(d);
3476 }
3477 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3478
3479 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3480 {
3481 set_const(d,s);
3482 return;
3483 }
3484 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3485
3486 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3487 {
3488 CLOBBER_MOV;
3489 d=writereg(d,2);
3490
3491 raw_mov_w_ri(d,s);
3492 unlock2(d);
3493 }
3494 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3495
3496 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3497 {
3498 CLOBBER_MOV;
3499 d=writereg(d,1);
3500
3501 raw_mov_b_ri(d,s);
3502 unlock2(d);
3503 }
3504 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3505
3506
3507 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3508 {
3509 CLOBBER_ADD;
3510 raw_add_l_mi(d,s) ;
3511 }
3512 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3513
3514 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3515 {
3516 CLOBBER_ADD;
3517 raw_add_w_mi(d,s) ;
3518 }
3519 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3520
3521 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3522 {
3523 CLOBBER_ADD;
3524 raw_add_b_mi(d,s) ;
3525 }
3526 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3527
3528
3529 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3530 {
3531 CLOBBER_TEST;
3532 d=readreg(d,4);
3533
3534 raw_test_l_ri(d,i);
3535 unlock2(d);
3536 }
3537 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3538
3539 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3540 {
3541 CLOBBER_TEST;
3542 d=readreg(d,4);
3543 s=readreg(s,4);
3544
3545 raw_test_l_rr(d,s);;
3546 unlock2(d);
3547 unlock2(s);
3548 }
3549 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3550
3551 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3552 {
3553 CLOBBER_TEST;
3554 d=readreg(d,2);
3555 s=readreg(s,2);
3556
3557 raw_test_w_rr(d,s);
3558 unlock2(d);
3559 unlock2(s);
3560 }
3561 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3562
3563 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3564 {
3565 CLOBBER_TEST;
3566 d=readreg(d,1);
3567 s=readreg(s,1);
3568
3569 raw_test_b_rr(d,s);
3570 unlock2(d);
3571 unlock2(s);
3572 }
3573 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3574
3575
3576 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3577 {
3578 if (isconst(d) && !needflags) {
3579 live.state[d].val &= i;
3580 return;
3581 }
3582
3583 CLOBBER_AND;
3584 d=rmw(d,4,4);
3585
3586 raw_and_l_ri(d,i);
3587 unlock2(d);
3588 }
3589 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
3590
3591 MIDFUNC(2,and_l,(RW4 d, R4 s))
3592 {
3593 CLOBBER_AND;
3594 s=readreg(s,4);
3595 d=rmw(d,4,4);
3596
3597 raw_and_l(d,s);
3598 unlock2(d);
3599 unlock2(s);
3600 }
3601 MENDFUNC(2,and_l,(RW4 d, R4 s))
3602
3603 MIDFUNC(2,and_w,(RW2 d, R2 s))
3604 {
3605 CLOBBER_AND;
3606 s=readreg(s,2);
3607 d=rmw(d,2,2);
3608
3609 raw_and_w(d,s);
3610 unlock2(d);
3611 unlock2(s);
3612 }
3613 MENDFUNC(2,and_w,(RW2 d, R2 s))
3614
3615 MIDFUNC(2,and_b,(RW1 d, R1 s))
3616 {
3617 CLOBBER_AND;
3618 s=readreg(s,1);
3619 d=rmw(d,1,1);
3620
3621 raw_and_b(d,s);
3622 unlock2(d);
3623 unlock2(s);
3624 }
3625 MENDFUNC(2,and_b,(RW1 d, R1 s))
3626
3627 // gb-- used for making an fpcr value in compemu_fpp.cpp
3628 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
3629 {
3630 CLOBBER_OR;
3631 d=rmw(d,4,4);
3632
3633 raw_or_l_rm(d,s);
3634 unlock2(d);
3635 }
3636 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
3637
3638 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
3639 {
3640 if (isconst(d) && !needflags) {
3641 live.state[d].val|=i;
3642 return;
3643 }
3644 CLOBBER_OR;
3645 d=rmw(d,4,4);
3646
3647 raw_or_l_ri(d,i);
3648 unlock2(d);
3649 }
3650 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
3651
3652 MIDFUNC(2,or_l,(RW4 d, R4 s))
3653 {
3654 if (isconst(d) && isconst(s) && !needflags) {
3655 live.state[d].val|=live.state[s].val;
3656 return;
3657 }
3658 CLOBBER_OR;
3659 s=readreg(s,4);
3660 d=rmw(d,4,4);
3661
3662 raw_or_l(d,s);
3663 unlock2(d);
3664 unlock2(s);
3665 }
3666 MENDFUNC(2,or_l,(RW4 d, R4 s))
3667
3668 MIDFUNC(2,or_w,(RW2 d, R2 s))
3669 {
3670 CLOBBER_OR;
3671 s=readreg(s,2);
3672 d=rmw(d,2,2);
3673
3674 raw_or_w(d,s);
3675 unlock2(d);
3676 unlock2(s);
3677 }
3678 MENDFUNC(2,or_w,(RW2 d, R2 s))
3679
3680 MIDFUNC(2,or_b,(RW1 d, R1 s))
3681 {
3682 CLOBBER_OR;
3683 s=readreg(s,1);
3684 d=rmw(d,1,1);
3685
3686 raw_or_b(d,s);
3687 unlock2(d);
3688 unlock2(s);
3689 }
3690 MENDFUNC(2,or_b,(RW1 d, R1 s))
3691
3692 MIDFUNC(2,adc_l,(RW4 d, R4 s))
3693 {
3694 CLOBBER_ADC;
3695 s=readreg(s,4);
3696 d=rmw(d,4,4);
3697
3698 raw_adc_l(d,s);
3699
3700 unlock2(d);
3701 unlock2(s);
3702 }
3703 MENDFUNC(2,adc_l,(RW4 d, R4 s))
3704
3705 MIDFUNC(2,adc_w,(RW2 d, R2 s))
3706 {
3707 CLOBBER_ADC;
3708 s=readreg(s,2);
3709 d=rmw(d,2,2);
3710
3711 raw_adc_w(d,s);
3712 unlock2(d);
3713 unlock2(s);
3714 }
3715 MENDFUNC(2,adc_w,(RW2 d, R2 s))
3716
3717 MIDFUNC(2,adc_b,(RW1 d, R1 s))
3718 {
3719 CLOBBER_ADC;
3720 s=readreg(s,1);
3721 d=rmw(d,1,1);
3722
3723 raw_adc_b(d,s);
3724 unlock2(d);
3725 unlock2(s);
3726 }
3727 MENDFUNC(2,adc_b,(RW1 d, R1 s))
3728
3729 MIDFUNC(2,add_l,(RW4 d, R4 s))
3730 {
3731 if (isconst(s)) {
3732 COMPCALL(add_l_ri)(d,live.state[s].val);
3733 return;
3734 }
3735
3736 CLOBBER_ADD;
3737 s=readreg(s,4);
3738 d=rmw(d,4,4);
3739
3740 raw_add_l(d,s);
3741
3742 unlock2(d);
3743 unlock2(s);
3744 }
3745 MENDFUNC(2,add_l,(RW4 d, R4 s))
3746
3747 MIDFUNC(2,add_w,(RW2 d, R2 s))
3748 {
3749 if (isconst(s)) {
3750 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
3751 return;
3752 }
3753
3754 CLOBBER_ADD;
3755 s=readreg(s,2);
3756 d=rmw(d,2,2);
3757
3758 raw_add_w(d,s);
3759 unlock2(d);
3760 unlock2(s);
3761 }
3762 MENDFUNC(2,add_w,(RW2 d, R2 s))
3763
3764 MIDFUNC(2,add_b,(RW1 d, R1 s))
3765 {
3766 if (isconst(s)) {
3767 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
3768 return;
3769 }
3770
3771 CLOBBER_ADD;
3772 s=readreg(s,1);
3773 d=rmw(d,1,1);
3774
3775 raw_add_b(d,s);
3776 unlock2(d);
3777 unlock2(s);
3778 }
3779 MENDFUNC(2,add_b,(RW1 d, R1 s))
3780
3781 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
3782 {
3783 if (!i && !needflags)
3784 return;
3785 if (isconst(d) && !needflags) {
3786 live.state[d].val-=i;
3787 return;
3788 }
3789 #if USE_OFFSET
3790 if (!needflags) {
3791 add_offset(d,-i);
3792 return;
3793 }
3794 #endif
3795
3796 CLOBBER_SUB;
3797 d=rmw(d,4,4);
3798
3799 raw_sub_l_ri(d,i);
3800 unlock2(d);
3801 }
3802 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
3803
3804 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
3805 {
3806 if (!i && !needflags)
3807 return;
3808
3809 CLOBBER_SUB;
3810 d=rmw(d,2,2);
3811
3812 raw_sub_w_ri(d,i);
3813 unlock2(d);
3814 }
3815 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
3816
3817 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
3818 {
3819 if (!i && !needflags)
3820 return;
3821
3822 CLOBBER_SUB;
3823 d=rmw(d,1,1);
3824
3825 raw_sub_b_ri(d,i);
3826
3827 unlock2(d);
3828 }
3829 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
3830
3831 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
3832 {
3833 if (!i && !needflags)
3834 return;
3835 if (isconst(d) && !needflags) {
3836 live.state[d].val+=i;
3837 return;
3838 }
3839 #if USE_OFFSET
3840 if (!needflags) {
3841 add_offset(d,i);
3842 return;
3843 }
3844 #endif
3845 CLOBBER_ADD;
3846 d=rmw(d,4,4);
3847 raw_add_l_ri(d,i);
3848 unlock2(d);
3849 }
3850 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
3851
3852 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
3853 {
3854 if (!i && !needflags)
3855 return;
3856
3857 CLOBBER_ADD;
3858 d=rmw(d,2,2);
3859
3860 raw_add_w_ri(d,i);
3861 unlock2(d);
3862 }
3863 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
3864
3865 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
3866 {
3867 if (!i && !needflags)
3868 return;
3869
3870 CLOBBER_ADD;
3871 d=rmw(d,1,1);
3872
3873 raw_add_b_ri(d,i);
3874
3875 unlock2(d);
3876 }
3877 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
3878
3879 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
3880 {
3881 CLOBBER_SBB;
3882 s=readreg(s,4);
3883 d=rmw(d,4,4);
3884
3885 raw_sbb_l(d,s);
3886 unlock2(d);
3887 unlock2(s);
3888 }
3889 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
3890
3891 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
3892 {
3893 CLOBBER_SBB;
3894 s=readreg(s,2);
3895 d=rmw(d,2,2);
3896
3897 raw_sbb_w(d,s);
3898 unlock2(d);
3899 unlock2(s);
3900 }
3901 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
3902
3903 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
3904 {
3905 CLOBBER_SBB;
3906 s=readreg(s,1);
3907 d=rmw(d,1,1);
3908
3909 raw_sbb_b(d,s);
3910 unlock2(d);
3911 unlock2(s);
3912 }
3913 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
3914
3915 MIDFUNC(2,sub_l,(RW4 d, R4 s))
3916 {
3917 if (isconst(s)) {
3918 COMPCALL(sub_l_ri)(d,live.state[s].val);
3919 return;
3920 }
3921
3922 CLOBBER_SUB;
3923 s=readreg(s,4);
3924 d=rmw(d,4,4);
3925
3926 raw_sub_l(d,s);
3927 unlock2(d);
3928 unlock2(s);
3929 }
3930 MENDFUNC(2,sub_l,(RW4 d, R4 s))
3931
3932 MIDFUNC(2,sub_w,(RW2 d, R2 s))
3933 {
3934 if (isconst(s)) {
3935 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
3936 return;
3937 }
3938
3939 CLOBBER_SUB;
3940 s=readreg(s,2);
3941 d=rmw(d,2,2);
3942
3943 raw_sub_w(d,s);
3944 unlock2(d);
3945 unlock2(s);
3946 }
3947 MENDFUNC(2,sub_w,(RW2 d, R2 s))
3948
3949 MIDFUNC(2,sub_b,(RW1 d, R1 s))
3950 {
3951 if (isconst(s)) {
3952 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
3953 return;
3954 }
3955
3956 CLOBBER_SUB;
3957 s=readreg(s,1);
3958 d=rmw(d,1,1);
3959
3960 raw_sub_b(d,s);
3961 unlock2(d);
3962 unlock2(s);
3963 }
3964 MENDFUNC(2,sub_b,(RW1 d, R1 s))
3965
3966 MIDFUNC(2,cmp_l,(R4 d, R4 s))
3967 {
3968 CLOBBER_CMP;
3969 s=readreg(s,4);
3970 d=readreg(d,4);
3971
3972 raw_cmp_l(d,s);
3973 unlock2(d);
3974 unlock2(s);
3975 }
3976 MENDFUNC(2,cmp_l,(R4 d, R4 s))
3977
3978 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
3979 {
3980 CLOBBER_CMP;
3981 r=readreg(r,4);
3982
3983 raw_cmp_l_ri(r,i);
3984 unlock2(r);
3985 }
3986 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
3987
3988 MIDFUNC(2,cmp_w,(R2 d, R2 s))
3989 {
3990 CLOBBER_CMP;
3991 s=readreg(s,2);
3992 d=readreg(d,2);
3993
3994 raw_cmp_w(d,s);
3995 unlock2(d);
3996 unlock2(s);
3997 }
3998 MENDFUNC(2,cmp_w,(R2 d, R2 s))
3999
4000 MIDFUNC(2,cmp_b,(R1 d, R1 s))
4001 {
4002 CLOBBER_CMP;
4003 s=readreg(s,1);
4004 d=readreg(d,1);
4005
4006 raw_cmp_b(d,s);
4007 unlock2(d);
4008 unlock2(s);
4009 }
4010 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4011
4012
4013 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4014 {
4015 CLOBBER_XOR;
4016 s=readreg(s,4);
4017 d=rmw(d,4,4);
4018
4019 raw_xor_l(d,s);
4020 unlock2(d);
4021 unlock2(s);
4022 }
4023 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4024
4025 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4026 {
4027 CLOBBER_XOR;
4028 s=readreg(s,2);
4029 d=rmw(d,2,2);
4030
4031 raw_xor_w(d,s);
4032 unlock2(d);
4033 unlock2(s);
4034 }
4035 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4036
4037 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4038 {
4039 CLOBBER_XOR;
4040 s=readreg(s,1);
4041 d=rmw(d,1,1);
4042
4043 raw_xor_b(d,s);
4044 unlock2(d);
4045 unlock2(s);
4046 }
4047 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4048
4049 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4050 {
4051 clobber_flags();
4052 remove_all_offsets();
4053 if (osize==4) {
4054 if (out1!=in1 && out1!=r) {
4055 COMPCALL(forget_about)(out1);
4056 }
4057 }
4058 else {
4059 tomem_c(out1);
4060 }
4061
4062 in1=readreg_specific(in1,isize,REG_PAR1);
4063 r=readreg(r,4);
4064 prepare_for_call_1(); /* This should ensure that there won't be
4065 any need for swapping nregs in prepare_for_call_2
4066 */
4067 #if USE_NORMAL_CALLING_CONVENTION
4068 raw_push_l_r(in1);
4069 #endif
4070 unlock2(in1);
4071 unlock2(r);
4072
4073 prepare_for_call_2();
4074 raw_call_r(r);
4075
4076 #if USE_NORMAL_CALLING_CONVENTION
4077 raw_inc_sp(4);
4078 #endif
4079
4080
4081 live.nat[REG_RESULT].holds[0]=out1;
4082 live.nat[REG_RESULT].nholds=1;
4083 live.nat[REG_RESULT].touched=touchcnt++;
4084
4085 live.state[out1].realreg=REG_RESULT;
4086 live.state[out1].realind=0;
4087 live.state[out1].val=0;
4088 live.state[out1].validsize=osize;
4089 live.state[out1].dirtysize=osize;
4090 set_status(out1,DIRTY);
4091 }
4092 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4093
4094 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4095 {
4096 clobber_flags();
4097 remove_all_offsets();
4098 in1=readreg_specific(in1,isize1,REG_PAR1);
4099 in2=readreg_specific(in2,isize2,REG_PAR2);
4100 r=readreg(r,4);
4101 prepare_for_call_1(); /* This should ensure that there won't be
4102 any need for swapping nregs in prepare_for_call_2
4103 */
4104 #if USE_NORMAL_CALLING_CONVENTION
4105 raw_push_l_r(in2);
4106 raw_push_l_r(in1);
4107 #endif
4108 unlock2(r);
4109 unlock2(in1);
4110 unlock2(in2);
4111 prepare_for_call_2();
4112 raw_call_r(r);
4113 #if USE_NORMAL_CALLING_CONVENTION
4114 raw_inc_sp(8);
4115 #endif
4116 }
4117 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4118
4119 /* forget_about() takes a mid-layer register */
4120 MIDFUNC(1,forget_about,(W4 r))
4121 {
4122 if (isinreg(r))
4123 disassociate(r);
4124 live.state[r].val=0;
4125 set_status(r,UNDEF);
4126 }
4127 MENDFUNC(1,forget_about,(W4 r))
4128
4129 MIDFUNC(0,nop,(void))
4130 {
4131 raw_nop();
4132 }
4133 MENDFUNC(0,nop,(void))
4134
4135
4136 MIDFUNC(1,f_forget_about,(FW r))
4137 {
4138 if (f_isinreg(r))
4139 f_disassociate(r);
4140 live.fate[r].status=UNDEF;
4141 }
4142 MENDFUNC(1,f_forget_about,(FW r))
4143
4144 MIDFUNC(1,fmov_pi,(FW r))
4145 {
4146 r=f_writereg(r);
4147 raw_fmov_pi(r);
4148 f_unlock(r);
4149 }
4150 MENDFUNC(1,fmov_pi,(FW r))
4151
4152 MIDFUNC(1,fmov_log10_2,(FW r))
4153 {
4154 r=f_writereg(r);
4155 raw_fmov_log10_2(r);
4156 f_unlock(r);
4157 }
4158 MENDFUNC(1,fmov_log10_2,(FW r))
4159
4160 MIDFUNC(1,fmov_log2_e,(FW r))
4161 {
4162 r=f_writereg(r);
4163 raw_fmov_log2_e(r);
4164 f_unlock(r);
4165 }
4166 MENDFUNC(1,fmov_log2_e,(FW r))
4167
4168 MIDFUNC(1,fmov_loge_2,(FW r))
4169 {
4170 r=f_writereg(r);
4171 raw_fmov_loge_2(r);
4172 f_unlock(r);
4173 }
4174 MENDFUNC(1,fmov_loge_2,(FW r))
4175
4176 MIDFUNC(1,fmov_1,(FW r))
4177 {
4178 r=f_writereg(r);
4179 raw_fmov_1(r);
4180 f_unlock(r);
4181 }
4182 MENDFUNC(1,fmov_1,(FW r))
4183
4184 MIDFUNC(1,fmov_0,(FW r))
4185 {
4186 r=f_writereg(r);
4187 raw_fmov_0(r);
4188 f_unlock(r);
4189 }
4190 MENDFUNC(1,fmov_0,(FW r))
4191
4192 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4193 {
4194 r=f_writereg(r);
4195 raw_fmov_rm(r,m);
4196 f_unlock(r);
4197 }
4198 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4199
4200 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4201 {
4202 r=f_writereg(r);
4203 raw_fmovi_rm(r,m);
4204 f_unlock(r);
4205 }
4206 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4207
4208 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4209 {
4210 r=f_readreg(r);
4211 raw_fmovi_mr(m,r);
4212 f_unlock(r);
4213 }
4214 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4215
4216 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4217 {
4218 r=f_writereg(r);
4219 raw_fmovs_rm(r,m);
4220 f_unlock(r);
4221 }
4222 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4223
4224 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4225 {
4226 r=f_readreg(r);
4227 raw_fmovs_mr(m,r);
4228 f_unlock(r);
4229 }
4230 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4231
4232 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4233 {
4234 r=f_readreg(r);
4235 raw_fmov_ext_mr(m,r);
4236 f_unlock(r);
4237 }
4238 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4239
4240 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4241 {
4242 r=f_readreg(r);
4243 raw_fmov_mr(m,r);
4244 f_unlock(r);
4245 }
4246 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4247
4248 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4249 {
4250 r=f_writereg(r);
4251 raw_fmov_ext_rm(r,m);
4252 f_unlock(r);
4253 }
4254 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4255
4256 MIDFUNC(2,fmov_rr,(FW d, FR s))
4257 {
4258 if (d==s) { /* How pointless! */
4259 return;
4260 }
4261 #if USE_F_ALIAS
4262 f_disassociate(d);
4263 s=f_readreg(s);
4264 live.fate[d].realreg=s;
4265 live.fate[d].realind=live.fat[s].nholds;
4266 live.fate[d].status=DIRTY;
4267 live.fat[s].holds[live.fat[s].nholds]=d;
4268 live.fat[s].nholds++;
4269 f_unlock(s);
4270 #else
4271 s=f_readreg(s);
4272 d=f_writereg(d);
4273 raw_fmov_rr(d,s);
4274 f_unlock(s);
4275 f_unlock(d);
4276 #endif
4277 }
4278 MENDFUNC(2,fmov_rr,(FW d, FR s))
4279
4280 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4281 {
4282 index=readreg(index,4);
4283
4284 raw_fldcw_m_indexed(index,base);
4285 unlock2(index);
4286 }
4287 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4288
4289 MIDFUNC(1,ftst_r,(FR r))
4290 {
4291 r=f_readreg(r);
4292 raw_ftst_r(r);
4293 f_unlock(r);
4294 }
4295 MENDFUNC(1,ftst_r,(FR r))
4296
4297 MIDFUNC(0,dont_care_fflags,(void))
4298 {
4299 f_disassociate(FP_RESULT);
4300 }
4301 MENDFUNC(0,dont_care_fflags,(void))
4302
4303 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4304 {
4305 s=f_readreg(s);
4306 d=f_writereg(d);
4307 raw_fsqrt_rr(d,s);
4308 f_unlock(s);
4309 f_unlock(d);
4310 }
4311 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4312
4313 MIDFUNC(2,fabs_rr,(FW d, FR s))
4314 {
4315 s=f_readreg(s);
4316 d=f_writereg(d);
4317 raw_fabs_rr(d,s);
4318 f_unlock(s);
4319 f_unlock(d);
4320 }
4321 MENDFUNC(2,fabs_rr,(FW d, FR s))
4322
4323 MIDFUNC(2,fsin_rr,(FW d, FR s))
4324 {
4325 s=f_readreg(s);
4326 d=f_writereg(d);
4327 raw_fsin_rr(d,s);
4328 f_unlock(s);
4329 f_unlock(d);
4330 }
4331 MENDFUNC(2,fsin_rr,(FW d, FR s))
4332
4333 MIDFUNC(2,fcos_rr,(FW d, FR s))
4334 {
4335 s=f_readreg(s);
4336 d=f_writereg(d);
4337 raw_fcos_rr(d,s);
4338 f_unlock(s);
4339 f_unlock(d);
4340 }
4341 MENDFUNC(2,fcos_rr,(FW d, FR s))
4342
4343 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4344 {
4345 s=f_readreg(s);
4346 d=f_writereg(d);
4347 raw_ftwotox_rr(d,s);
4348 f_unlock(s);
4349 f_unlock(d);
4350 }
4351 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4352
4353 MIDFUNC(2,fetox_rr,(FW d, FR s))
4354 {
4355 s=f_readreg(s);
4356 d=f_writereg(d);
4357 raw_fetox_rr(d,s);
4358 f_unlock(s);
4359 f_unlock(d);
4360 }
4361 MENDFUNC(2,fetox_rr,(FW d, FR s))
4362
4363 MIDFUNC(2,frndint_rr,(FW d, FR s))
4364 {
4365 s=f_readreg(s);
4366 d=f_writereg(d);
4367 raw_frndint_rr(d,s);
4368 f_unlock(s);
4369 f_unlock(d);
4370 }
4371 MENDFUNC(2,frndint_rr,(FW d, FR s))
4372
4373 MIDFUNC(2,flog2_rr,(FW d, FR s))
4374 {
4375 s=f_readreg(s);
4376 d=f_writereg(d);
4377 raw_flog2_rr(d,s);
4378 f_unlock(s);
4379 f_unlock(d);
4380 }
4381 MENDFUNC(2,flog2_rr,(FW d, FR s))
4382
4383 MIDFUNC(2,fneg_rr,(FW d, FR s))
4384 {
4385 s=f_readreg(s);
4386 d=f_writereg(d);
4387 raw_fneg_rr(d,s);
4388 f_unlock(s);
4389 f_unlock(d);
4390 }
4391 MENDFUNC(2,fneg_rr,(FW d, FR s))
4392
4393 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4394 {
4395 s=f_readreg(s);
4396 d=f_rmw(d);
4397 raw_fadd_rr(d,s);
4398 f_unlock(s);
4399 f_unlock(d);
4400 }
4401 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4402
4403 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4404 {
4405 s=f_readreg(s);
4406 d=f_rmw(d);
4407 raw_fsub_rr(d,s);
4408 f_unlock(s);
4409 f_unlock(d);
4410 }
4411 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4412
4413 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4414 {
4415 d=f_readreg(d);
4416 s=f_readreg(s);
4417 raw_fcmp_rr(d,s);
4418 f_unlock(s);
4419 f_unlock(d);
4420 }
4421 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4422
4423 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4424 {
4425 s=f_readreg(s);
4426 d=f_rmw(d);
4427 raw_fdiv_rr(d,s);
4428 f_unlock(s);
4429 f_unlock(d);
4430 }
4431 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4432
4433 MIDFUNC(2,frem_rr,(FRW d, FR s))
4434 {
4435 s=f_readreg(s);
4436 d=f_rmw(d);
4437 raw_frem_rr(d,s);
4438 f_unlock(s);
4439 f_unlock(d);
4440 }
4441 MENDFUNC(2,frem_rr,(FRW d, FR s))
4442
4443 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4444 {
4445 s=f_readreg(s);
4446 d=f_rmw(d);
4447 raw_frem1_rr(d,s);
4448 f_unlock(s);
4449 f_unlock(d);
4450 }
4451 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4452
4453 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4454 {
4455 s=f_readreg(s);
4456 d=f_rmw(d);
4457 raw_fmul_rr(d,s);
4458 f_unlock(s);
4459 f_unlock(d);
4460 }
4461 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4462
4463 /********************************************************************
4464 * Support functions exposed to gencomp. CREATE time *
4465 ********************************************************************/
4466
4467 int kill_rodent(int r)
4468 {
4469 return KILLTHERAT &&
4470 have_rat_stall &&
4471 (live.state[r].status==INMEM ||
4472 live.state[r].status==CLEAN ||
4473 live.state[r].status==ISCONST ||
4474 live.state[r].dirtysize==4);
4475 }
4476
4477 uae_u32 get_const(int r)
4478 {
4479 Dif (!isconst(r)) {
4480 write_log("Register %d should be constant, but isn't\n",r);
4481 abort();
4482 }
4483 return live.state[r].val;
4484 }
4485
4486 void sync_m68k_pc(void)
4487 {
4488 if (m68k_pc_offset) {
4489 add_l_ri(PC_P,m68k_pc_offset);
4490 comp_pc_p+=m68k_pc_offset;
4491 m68k_pc_offset=0;
4492 }
4493 }
4494
4495 /********************************************************************
4496 * Scratch registers management *
4497 ********************************************************************/
4498
4499 struct scratch_t {
4500 uae_u32 regs[VREGS];
4501 fpu_register fregs[VFREGS];
4502 };
4503
4504 static scratch_t scratch;
4505
4506 /********************************************************************
4507 * Support functions exposed to newcpu *
4508 ********************************************************************/
4509
4510 static inline const char *str_on_off(bool b)
4511 {
4512 return b ? "on" : "off";
4513 }
4514
4515 static __inline__ unsigned int cft_map (unsigned int f)
4516 {
4517 #ifndef HAVE_GET_WORD_UNSWAPPED
4518 return f;
4519 #else
4520 return ((f >> 8) & 255) | ((f & 255) << 8);
4521 #endif
4522 }
4523
4524 void compiler_init(void)
4525 {
4526 static bool initialized = false;
4527 if (initialized)
4528 return;
4529
4530 #ifndef WIN32
4531 // Open /dev/zero
4532 zero_fd = open("/dev/zero", O_RDWR);
4533 if (zero_fd < 0) {
4534 char str[200];
4535 sprintf(str, GetString(STR_NO_DEV_ZERO_ERR), strerror(errno));
4536 ErrorAlert(str);
4537 QuitEmulator();
4538 }
4539 #endif
4540
4541 #if JIT_DEBUG
4542 // JIT debug mode ?
4543 JITDebug = PrefsFindBool("jitdebug");
4544 #endif
4545 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4546
4547 #ifdef USE_JIT_FPU
4548 // Use JIT compiler for FPU instructions ?
4549 avoid_fpu = !PrefsFindBool("jitfpu");
4550 #else
4551 // JIT FPU is always disabled
4552 avoid_fpu = true;
4553 #endif
4554 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4555
4556 // Get size of the translation cache (in KB)
4557 cache_size = PrefsFindInt32("jitcachesize");
4558 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4559
4560 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4561 raw_init_cpu();
4562 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4563 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4564 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4565
4566 // Translation cache flush mechanism
4567 lazy_flush = PrefsFindBool("jitlazyflush");
4568 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4569 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4570
4571 // Compiler features
4572 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4573 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4574 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4575 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4576
4577 // Build compiler tables
4578 build_comp();
4579
4580 initialized = true;
4581
4582 #if PROFILE_COMPILE_TIME
4583 write_log("<JIT compiler> : gather statistics on translation time\n");
4584 emul_start_time = clock();
4585 #endif
4586 }
4587
4588 void compiler_exit(void)
4589 {
4590 #if PROFILE_COMPILE_TIME
4591 emul_end_time = clock();
4592 #endif
4593
4594 // Deallocate translation cache
4595 if (compiled_code) {
4596 vm_release(compiled_code, cache_size * 1024);
4597 compiled_code = 0;
4598 }
4599
4600 // Deallocate blockinfo pools
4601 free_blockinfo_pools();
4602
4603 #ifndef WIN32
4604 // Close /dev/zero
4605 if (zero_fd > 0)
4606 close(zero_fd);
4607 #endif
4608
4609 #if PROFILE_COMPILE_TIME
4610 write_log("### Compile Block statistics\n");
4611 write_log("Number of calls to compile_block : %d\n", compile_count);
4612 uae_u32 emul_time = emul_end_time - emul_start_time;
4613 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
4614 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
4615 100.0*double(compile_time)/double(emul_time));
4616 write_log("\n");
4617 #endif
4618 }
4619
4620 bool compiler_use_jit(void)
4621 {
4622 // Check for the "jit" prefs item
4623 if (!PrefsFindBool("jit"))
4624 return false;
4625
4626 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
4627 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
4628 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
4629 return false;
4630 }
4631
4632 // FIXME: there are currently problems with JIT compilation and anything below a 68040
4633 if (CPUType < 4) {
4634 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
4635 return false;
4636 }
4637
4638 return true;
4639 }
4640
4641 void init_comp(void)
4642 {
4643 int i;
4644 uae_s8* cb=can_byte;
4645 uae_s8* cw=can_word;
4646 uae_s8* au=always_used;
4647
4648 for (i=0;i<VREGS;i++) {
4649 live.state[i].realreg=-1;
4650 live.state[i].needflush=NF_SCRATCH;
4651 live.state[i].val=0;
4652 set_status(i,UNDEF);
4653 }
4654
4655 for (i=0;i<VFREGS;i++) {
4656 live.fate[i].status=UNDEF;
4657 live.fate[i].realreg=-1;
4658 live.fate[i].needflush=NF_SCRATCH;
4659 }
4660
4661 for (i=0;i<VREGS;i++) {
4662 if (i<16) { /* First 16 registers map to 68k registers */
4663 live.state[i].mem=((uae_u32*)&regs)+i;
4664 live.state[i].needflush=NF_TOMEM;
4665 set_status(i,INMEM);
4666 }
4667 else
4668 live.state[i].mem=scratch.regs+i;
4669 }
4670 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
4671 live.state[PC_P].needflush=NF_TOMEM;
4672 set_const(PC_P,(uae_u32)comp_pc_p);
4673
4674 live.state[FLAGX].mem=&(regflags.x);
4675 live.state[FLAGX].needflush=NF_TOMEM;
4676 set_status(FLAGX,INMEM);
4677
4678 live.state[FLAGTMP].mem=&(regflags.cznv);
4679 live.state[FLAGTMP].needflush=NF_TOMEM;
4680 set_status(FLAGTMP,INMEM);
4681
4682 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
4683 set_status(NEXT_HANDLER,UNDEF);
4684
4685 for (i=0;i<VFREGS;i++) {
4686 if (i<8) { /* First 8 registers map to 68k FPU registers */
4687 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
4688 live.fate[i].needflush=NF_TOMEM;
4689 live.fate[i].status=INMEM;
4690 }
4691 else if (i==FP_RESULT) {
4692 live.fate[i].mem=(uae_u32*)(&fpu.result);
4693 live.fate[i].needflush=NF_TOMEM;
4694 live.fate[i].status=INMEM;
4695 }
4696 else
4697 live.fate[i].mem=(uae_u32*)(scratch.fregs+i);
4698 }
4699
4700
4701 for (i=0;i<N_REGS;i++) {
4702 live.nat[i].touched=0;
4703 live.nat[i].nholds=0;
4704 live.nat[i].locked=0;
4705 if (*cb==i) {
4706 live.nat[i].canbyte=1; cb++;
4707 } else live.nat[i].canbyte=0;
4708 if (*cw==i) {
4709 live.nat[i].canword=1; cw++;
4710 } else live.nat[i].canword=0;
4711 if (*au==i) {
4712 live.nat[i].locked=1; au++;
4713 }
4714 }
4715
4716 for (i=0;i<N_FREGS;i++) {
4717 live.fat[i].touched=0;
4718 live.fat[i].nholds=0;
4719 live.fat[i].locked=0;
4720 }
4721
4722 touchcnt=1;
4723 m68k_pc_offset=0;
4724 live.flags_in_flags=TRASH;
4725 live.flags_on_stack=VALID;
4726 live.flags_are_important=1;
4727
4728 raw_fp_init();
4729 }
4730
4731 /* Only do this if you really mean it! The next call should be to init!*/
4732 void flush(int save_regs)
4733 {
4734 int fi,i;
4735
4736 log_flush();
4737 flush_flags(); /* low level */
4738 sync_m68k_pc(); /* mid level */
4739
4740 if (save_regs) {
4741 for (i=0;i<VFREGS;i++) {
4742 if (live.fate[i].needflush==NF_SCRATCH ||
4743 live.fate[i].status==CLEAN) {
4744 f_disassociate(i);
4745 }
4746 }
4747 for (i=0;i<VREGS;i++) {
4748 if (live.state[i].needflush==NF_TOMEM) {
4749 switch(live.state[i].status) {
4750 case INMEM:
4751 if (live.state[i].val) {
4752 raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
4753 log_vwrite(i);
4754 live.state[i].val=0;
4755 }
4756 break;
4757 case CLEAN:
4758 case DIRTY:
4759 remove_offset(i,-1); tomem(i); break;
4760 case ISCONST:
4761 if (i!=PC_P)
4762 writeback_const(i);
4763 break;
4764 default: break;
4765 }
4766 Dif (live.state[i].val && i!=PC_P) {
4767 write_log("Register %d still has val %x\n",
4768 i,live.state[i].val);
4769 }
4770 }
4771 }
4772 for (i=0;i<VFREGS;i++) {
4773 if (live.fate[i].needflush==NF_TOMEM &&
4774 live.fate[i].status==DIRTY) {
4775 f_evict(i);
4776 }
4777 }
4778 raw_fp_cleanup_drop();
4779 }
4780 if (needflags) {
4781 write_log("Warning! flush with needflags=1!\n");
4782 }
4783 }
4784
4785 static void flush_keepflags(void)
4786 {
4787 int fi,i;
4788
4789 for (i=0;i<VFREGS;i++) {
4790 if (live.fate[i].needflush==NF_SCRATCH ||
4791 live.fate[i].status==CLEAN) {
4792 f_disassociate(i);
4793 }
4794 }
4795 for (i=0;i<VREGS;i++) {
4796 if (live.state[i].needflush==NF_TOMEM) {
4797 switch(live.state[i].status) {
4798 case INMEM:
4799 /* Can't adjust the offset here --- that needs "add" */
4800 break;
4801 case CLEAN:
4802 case DIRTY:
4803 remove_offset(i,-1); tomem(i); break;
4804 case ISCONST:
4805 if (i!=PC_P)
4806 writeback_const(i);
4807 break;
4808 default: break;
4809 }
4810 }
4811 }
4812 for (i=0;i<VFREGS;i++) {
4813 if (live.fate[i].needflush==NF_TOMEM &&
4814 live.fate[i].status==DIRTY) {
4815 f_evict(i);
4816 }
4817 }
4818 raw_fp_cleanup_drop();
4819 }
4820
4821 void freescratch(void)
4822 {
4823 int i;
4824 for (i=0;i<N_REGS;i++)
4825 if (live.nat[i].locked && i!=4)
4826 write_log("Warning! %d is locked\n",i);
4827
4828 for (i=0;i<VREGS;i++)
4829 if (live.state[i].needflush==NF_SCRATCH) {
4830 forget_about(i);
4831 }
4832
4833 for (i=0;i<VFREGS;i++)
4834 if (live.fate[i].needflush==NF_SCRATCH) {
4835 f_forget_about(i);
4836 }
4837 }
4838
4839 /********************************************************************
4840 * Support functions, internal *
4841 ********************************************************************/
4842
4843
4844 static void align_target(uae_u32 a)
4845 {
4846 /* Fill with NOPs --- makes debugging with gdb easier */
4847 while ((uae_u32)target&(a-1))
4848 *target++=0x90;
4849 }
4850
4851 static __inline__ int isinrom(uintptr addr)
4852 {
4853 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
4854 }
4855
4856 static void flush_all(void)
4857 {
4858 int i;
4859
4860 log_flush();
4861 for (i=0;i<VREGS;i++)
4862 if (live.state[i].status==DIRTY) {
4863 if (!call_saved[live.state[i].realreg]) {
4864 tomem(i);
4865 }
4866 }
4867 for (i=0;i<VFREGS;i++)
4868 if (f_isinreg(i))
4869 f_evict(i);
4870 raw_fp_cleanup_drop();
4871 }
4872
4873 /* Make sure all registers that will get clobbered by a call are
4874 save and sound in memory */
4875 static void prepare_for_call_1(void)
4876 {
4877 flush_all(); /* If there are registers that don't get clobbered,
4878 * we should be a bit more selective here */
4879 }
4880
4881 /* We will call a C routine in a moment. That will clobber all registers,
4882 so we need to disassociate everything */
4883 static void prepare_for_call_2(void)
4884 {
4885 int i;
4886 for (i=0;i<N_REGS;i++)
4887 if (!call_saved[i] && live.nat[i].nholds>0)
4888 free_nreg(i);
4889
4890 for (i=0;i<N_FREGS;i++)
4891 if (live.fat[i].nholds>0)
4892 f_free_nreg(i);
4893
4894 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
4895 flags at the very start of the call_r
4896 functions! */
4897 }
4898
4899 /********************************************************************
4900 * Memory access and related functions, CREATE time *
4901 ********************************************************************/
4902
4903 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
4904 {
4905 next_pc_p=not_taken;
4906 taken_pc_p=taken;
4907 branch_cc=cond;
4908 }
4909
4910
4911 static uae_u32 get_handler_address(uae_u32 addr)
4912 {
4913 uae_u32 cl=cacheline(addr);
4914 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
4915 return (uae_u32)&(bi->direct_handler_to_use);
4916 }
4917
4918 static uae_u32 get_handler(uae_u32 addr)
4919 {
4920 uae_u32 cl=cacheline(addr);
4921 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
4922 return (uae_u32)bi->direct_handler_to_use;
4923 }
4924
4925 static void load_handler(int reg, uae_u32 addr)
4926 {
4927 mov_l_rm(reg,get_handler_address(addr));
4928 }
4929
4930 /* This version assumes that it is writing *real* memory, and *will* fail
4931 * if that assumption is wrong! No branches, no second chances, just
4932 * straight go-for-it attitude */
4933
4934 static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
4935 {
4936 int f=tmp;
4937
4938 if (clobber)
4939 f=source;
4940 switch(size) {
4941 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
4942 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
4943 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
4944 }
4945 forget_about(tmp);
4946 forget_about(f);
4947 }
4948
4949 void writebyte(int address, int source, int tmp)
4950 {
4951 writemem_real(address,source,20,1,tmp,0);
4952 }
4953
4954 static __inline__ void writeword_general(int address, int source, int tmp,
4955 int clobber)
4956 {
4957 writemem_real(address,source,16,2,tmp,clobber);
4958 }
4959
4960 void writeword_clobber(int address, int source, int tmp)
4961 {
4962 writeword_general(address,source,tmp,1);
4963 }
4964
4965 void writeword(int address, int source, int tmp)
4966 {
4967 writeword_general(address,source,tmp,0);
4968 }
4969
4970 static __inline__ void writelong_general(int address, int source, int tmp,
4971 int clobber)
4972 {
4973 writemem_real(address,source,12,4,tmp,clobber);
4974 }
4975
4976 void writelong_clobber(int address, int source, int tmp)
4977 {
4978 writelong_general(address,source,tmp,1);
4979 }
4980
4981 void writelong(int address, int source, int tmp)
4982 {
4983 writelong_general(address,source,tmp,0);
4984 }
4985
4986
4987
4988 /* This version assumes that it is reading *real* memory, and *will* fail
4989 * if that assumption is wrong! No branches, no second chances, just
4990 * straight go-for-it attitude */
4991
4992 static void readmem_real(int address, int dest, int offset, int size, int tmp)
4993 {
4994 int f=tmp;
4995
4996 if (size==4 && address!=dest)
4997 f=dest;
4998
4999 switch(size) {
5000 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5001 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5002 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5003 }
5004 forget_about(tmp);
5005 }
5006
5007 void readbyte(int address, int dest, int tmp)
5008 {
5009 readmem_real(address,dest,8,1,tmp);
5010 }
5011
5012 void readword(int address, int dest, int tmp)
5013 {
5014 readmem_real(address,dest,4,2,tmp);
5015 }
5016
5017 void readlong(int address, int dest, int tmp)
5018 {
5019 readmem_real(address,dest,0,4,tmp);
5020 }
5021
5022 void get_n_addr(int address, int dest, int tmp)
5023 {
5024 // a is the register containing the virtual address
5025 // after the offset had been fetched
5026 int a=tmp;
5027
5028 // f is the register that will contain the offset
5029 int f=tmp;
5030
5031 // a == f == tmp if (address == dest)
5032 if (address!=dest) {
5033 a=address;
5034 f=dest;
5035 }
5036
5037 #if REAL_ADDRESSING
5038 mov_l_rr(dest, address);
5039 #elif DIRECT_ADDRESSING
5040 lea_l_brr(dest,address,MEMBaseDiff);
5041 #endif
5042 forget_about(tmp);
5043 }
5044
5045 void get_n_addr_jmp(int address, int dest, int tmp)
5046 {
5047 /* For this, we need to get the same address as the rest of UAE
5048 would --- otherwise we end up translating everything twice */
5049 get_n_addr(address,dest,tmp);
5050 }
5051
5052
5053 /* base is a register, but dp is an actual value.
5054 target is a register, as is tmp */
5055 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5056 {
5057 int reg = (dp >> 12) & 15;
5058 int regd_shift=(dp >> 9) & 3;
5059
5060 if (dp & 0x100) {
5061 int ignorebase=(dp&0x80);
5062 int ignorereg=(dp&0x40);
5063 int addbase=0;
5064 int outer=0;
5065
5066 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5067 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5068
5069 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5070 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5071
5072 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5073 if (!ignorereg) {
5074 if ((dp & 0x800) == 0)
5075 sign_extend_16_rr(target,reg);
5076 else
5077 mov_l_rr(target,reg);
5078 shll_l_ri(target,regd_shift);
5079 }
5080 else
5081 mov_l_ri(target,0);
5082
5083 /* target is now regd */
5084 if (!ignorebase)
5085 add_l(target,base);
5086 add_l_ri(target,addbase);
5087 if (dp&0x03) readlong(target,target,tmp);
5088 } else { /* do the getlong first, then add regd */
5089 if (!ignorebase) {
5090 mov_l_rr(target,base);
5091 add_l_ri(target,addbase);
5092 }
5093 else
5094 mov_l_ri(target,addbase);
5095 if (dp&0x03) readlong(target,target,tmp);
5096
5097 if (!ignorereg) {
5098 if ((dp & 0x800) == 0)
5099 sign_extend_16_rr(tmp,reg);
5100 else
5101 mov_l_rr(tmp,reg);
5102 shll_l_ri(tmp,regd_shift);
5103 /* tmp is now regd */
5104 add_l(target,tmp);
5105 }
5106 }
5107 add_l_ri(target,outer);
5108 }
5109 else { /* 68000 version */
5110 if ((dp & 0x800) == 0) { /* Sign extend */
5111 sign_extend_16_rr(target,reg);
5112 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5113 }
5114 else {
5115 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5116 }
5117 }
5118 forget_about(tmp);
5119 }
5120
5121
5122
5123
5124
5125 void set_cache_state(int enabled)
5126 {
5127 if (enabled!=letit)
5128 flush_icache_hard(77);
5129 letit=enabled;
5130 }
5131
5132 int get_cache_state(void)
5133 {
5134 return letit;
5135 }
5136
5137 uae_u32 get_jitted_size(void)
5138 {
5139 if (compiled_code)
5140 return current_compile_p-compiled_code;
5141 return 0;
5142 }
5143
5144 void alloc_cache(void)
5145 {
5146 if (compiled_code) {
5147 flush_icache_hard(6);
5148 vm_release(compiled_code, cache_size * 1024);
5149 compiled_code = 0;
5150 }
5151
5152 if (cache_size == 0)
5153 return;
5154
5155 while (!compiled_code && cache_size) {
5156 if ((compiled_code = (uae_u8 *)vm_acquire(cache_size * 1024)) == VM_MAP_FAILED) {
5157 compiled_code = 0;
5158 cache_size /= 2;
5159 }
5160 }
5161 vm_protect(compiled_code, cache_size, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5162
5163 if (compiled_code) {
5164 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5165 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5166 current_compile_p = compiled_code;
5167 current_cache_size = 0;
5168 }
5169 }
5170
5171
5172
5173 extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM;
5174
5175 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5176 {
5177 uae_u32 k1=0;
5178 uae_u32 k2=0;
5179 uae_s32 len=bi->len;
5180 uae_u32 tmp=bi->min_pcp;
5181 uae_u32* pos;
5182
5183 len+=(tmp&3);
5184 tmp&=(~3);
5185 pos=(uae_u32*)tmp;
5186
5187 if (len<0 || len>MAX_CHECKSUM_LEN) {
5188 *c1=0;
5189 *c2=0;
5190 }
5191 else {
5192 while (len>0) {
5193 k1+=*pos;
5194 k2^=*pos;
5195 pos++;
5196 len-=4;
5197 }
5198 *c1=k1;
5199 *c2=k2;
5200 }
5201 }
5202
5203 static void show_checksum(blockinfo* bi)
5204 {
5205 uae_u32 k1=0;
5206 uae_u32 k2=0;
5207 uae_s32 len=bi->len;
5208 uae_u32 tmp=(uae_u32)bi->pc_p;
5209 uae_u32* pos;
5210
5211 len+=(tmp&3);
5212 tmp&=(~3);
5213 pos=(uae_u32*)tmp;
5214
5215 if (len<0 || len>MAX_CHECKSUM_LEN) {
5216 return;
5217 }
5218 else {
5219 while (len>0) {
5220 write_log("%08x ",*pos);
5221 pos++;
5222 len-=4;
5223 }
5224 write_log(" bla\n");
5225 }
5226 }
5227
5228
5229 int check_for_cache_miss(void)
5230 {
5231 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5232
5233 if (bi) {
5234 int cl=cacheline(regs.pc_p);
5235 if (bi!=cache_tags[cl+1].bi) {
5236 raise_in_cl_list(bi);
5237 return 1;
5238 }
5239 }
5240 return 0;
5241 }
5242
5243
5244 static void recompile_block(void)
5245 {
5246 /* An existing block's countdown code has expired. We need to make
5247 sure that execute_normal doesn't refuse to recompile due to a
5248 perceived cache miss... */
5249 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5250
5251 Dif (!bi)
5252 abort();
5253 raise_in_cl_list(bi);
5254 execute_normal();
5255 return;
5256 }
5257 static void cache_miss(void)
5258 {
5259 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5260 uae_u32 cl=cacheline(regs.pc_p);
5261 blockinfo* bi2=get_blockinfo(cl);
5262
5263 if (!bi) {
5264 execute_normal(); /* Compile this block now */
5265 return;
5266 }
5267 Dif (!bi2 || bi==bi2) {
5268 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5269 abort();
5270 }
5271 raise_in_cl_list(bi);
5272 return;
5273 }
5274
5275 static int called_check_checksum(blockinfo* bi);
5276
5277 static inline int block_check_checksum(blockinfo* bi)
5278 {
5279 uae_u32 c1,c2;
5280 int isgood;
5281
5282 if (bi->status!=BI_NEED_CHECK)
5283 return 1; /* This block is in a checked state */
5284
5285 checksum_count++;
5286 if (bi->c1 || bi->c2)
5287 calc_checksum(bi,&c1,&c2);
5288 else {
5289 c1=c2=1; /* Make sure it doesn't match */
5290 }
5291
5292 isgood=(c1==bi->c1 && c2==bi->c2);
5293 if (isgood) {
5294 /* This block is still OK. So we reactivate. Of course, that
5295 means we have to move it into the needs-to-be-flushed list */
5296 bi->handler_to_use=bi->handler;
5297 set_dhtu(bi,bi->direct_handler);
5298 bi->status=BI_CHECKING;
5299 isgood=called_check_checksum(bi);
5300 }
5301 if (isgood) {
5302 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5303 c1,c2,bi->c1,bi->c2);*/
5304 remove_from_list(bi);
5305 add_to_active(bi);
5306 raise_in_cl_list(bi);
5307 bi->status=BI_ACTIVE;
5308 }
5309 else {
5310 /* This block actually changed. We need to invalidate it,
5311 and set it up to be recompiled */
5312 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5313 c1,c2,bi->c1,bi->c2); */
5314 invalidate_block(bi);
5315 raise_in_cl_list(bi);
5316 }
5317 return isgood;
5318 }
5319
5320 static int called_check_checksum(blockinfo* bi)
5321 {
5322 dependency* x=bi->deplist;
5323 int isgood=1;
5324 int i;
5325
5326 for (i=0;i<2 && isgood;i++) {
5327 if (bi->dep[i].jmp_off) {
5328 isgood=block_check_checksum(bi->dep[i].target);
5329 }
5330 }
5331 return isgood;
5332 }
5333
5334 static void check_checksum(void)
5335 {
5336 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5337 uae_u32 cl=cacheline(regs.pc_p);
5338 blockinfo* bi2=get_blockinfo(cl);
5339
5340 /* These are not the droids you are looking for... */
5341 if (!bi) {
5342 /* Whoever is the primary target is in a dormant state, but
5343 calling it was accidental, and we should just compile this
5344 new block */
5345 execute_normal();
5346 return;
5347 }
5348 if (bi!=bi2) {
5349 /* The block was hit accidentally, but it does exist. Cache miss */
5350 cache_miss();
5351 return;
5352 }
5353
5354 if (!block_check_checksum(bi))
5355 execute_normal();
5356 }
5357
5358 static __inline__ void match_states(blockinfo* bi)
5359 {
5360 int i;
5361 smallstate* s=&(bi->env);
5362
5363 if (bi->status==BI_NEED_CHECK) {
5364 block_check_checksum(bi);
5365 }
5366 if (bi->status==BI_ACTIVE ||
5367 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5368 block makes (about not using
5369 certain vregs) */
5370 for (i=0;i<16;i++) {
5371 if (s->virt[i]==L_UNNEEDED) {
5372 // write_log("unneeded reg %d at %p\n",i,target);
5373 COMPCALL(forget_about)(i); // FIXME
5374 }
5375 }
5376 }
5377 flush(1);
5378
5379 /* And now deal with the *demands* the block makes */
5380 for (i=0;i<N_REGS;i++) {
5381 int v=s->nat[i];
5382 if (v>=0) {
5383 // printf("Loading reg %d into %d at %p\n",v,i,target);
5384 readreg_specific(v,4,i);
5385 // do_load_reg(i,v);
5386 // setlock(i);
5387 }
5388 }
5389 for (i=0;i<N_REGS;i++) {
5390 int v=s->nat[i];
5391 if (v>=0) {
5392 unlock2(i);
5393 }
5394 }
5395 }
5396
5397 static uae_u8 popallspace[1024]; /* That should be enough space */
5398
5399 static __inline__ void create_popalls(void)
5400 {
5401 int i,r;
5402
5403 current_compile_p=popallspace;
5404 set_target(current_compile_p);
5405 #if USE_PUSH_POP
5406 /* If we can't use gcc inline assembly, we need to pop some
5407 registers before jumping back to the various get-out routines.
5408 This generates the code for it.
5409 */
5410 align_target(align_jumps);
5411 popall_do_nothing=get_target();
5412 for (i=0;i<N_REGS;i++) {
5413 if (need_to_preserve[i])
5414 raw_pop_l_r(i);
5415 }
5416 raw_jmp((uae_u32)do_nothing);
5417
5418 align_target(align_jumps);
5419 popall_execute_normal=get_target();
5420 for (i=0;i<N_REGS;i++) {
5421 if (need_to_preserve[i])
5422 raw_pop_l_r(i);
5423 }
5424 raw_jmp((uae_u32)execute_normal);
5425
5426 align_target(align_jumps);
5427 popall_cache_miss=get_target();
5428 for (i=0;i<N_REGS;i++) {
5429 if (need_to_preserve[i])
5430 raw_pop_l_r(i);
5431 }
5432 raw_jmp((uae_u32)cache_miss);
5433
5434 align_target(align_jumps);
5435 popall_recompile_block=get_target();
5436 for (i=0;i<N_REGS;i++) {
5437 if (need_to_preserve[i])
5438 raw_pop_l_r(i);
5439 }
5440 raw_jmp((uae_u32)recompile_block);
5441
5442 align_target(align_jumps);
5443 popall_exec_nostats=get_target();
5444 for (i=0;i<N_REGS;i++) {
5445 if (need_to_preserve[i])
5446 raw_pop_l_r(i);
5447 }
5448 raw_jmp((uae_u32)exec_nostats);
5449
5450 align_target(align_jumps);
5451 popall_check_checksum=get_target();
5452 for (i=0;i<N_REGS;i++) {
5453 if (need_to_preserve[i])
5454 raw_pop_l_r(i);
5455 }
5456 raw_jmp((uae_u32)check_checksum);
5457
5458 align_target(align_jumps);
5459 current_compile_p=get_target();
5460 #else
5461 popall_exec_nostats=(void *)exec_nostats;
5462 popall_execute_normal=(void *)execute_normal;
5463 popall_cache_miss=(void *)cache_miss;
5464 popall_recompile_block=(void *)recompile_block;
5465 popall_do_nothing=(void *)do_nothing;
5466 popall_check_checksum=(void *)check_checksum;
5467 #endif
5468
5469 /* And now, the code to do the matching pushes and then jump
5470 into a handler routine */
5471 pushall_call_handler=get_target();
5472 #if USE_PUSH_POP
5473 for (i=N_REGS;i--;) {
5474 if (need_to_preserve[i])
5475 raw_push_l_r(i);
5476 }
5477 #endif
5478 r=REG_PC_TMP;
5479 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5480 raw_and_l_ri(r,TAGMASK);
5481 raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5482
5483 #ifdef X86_ASSEMBLY
5484 align_target(align_jumps);
5485 m68k_compile_execute = (void (*)(void))get_target();
5486 for (i=N_REGS;i--;) {
5487 if (need_to_preserve[i])
5488 raw_push_l_r(i);
5489 }
5490 align_target(align_loops);
5491 uae_u32 dispatch_loop = (uae_u32)get_target();
5492 r=REG_PC_TMP;
5493 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5494 raw_and_l_ri(r,TAGMASK);
5495 raw_call_m_indexed((uae_u32)cache_tags,r,4);
5496 raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5497 raw_jcc_b_oponly(NATIVE_CC_EQ);
5498 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5499 raw_call((uae_u32)m68k_do_specialties);
5500 raw_test_l_rr(REG_RESULT,REG_RESULT);
5501 raw_jcc_b_oponly(NATIVE_CC_EQ);
5502 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5503 raw_cmp_b_mi((uae_u32)&quit_program,0);
5504 raw_jcc_b_oponly(NATIVE_CC_EQ);
5505 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5506 for (i=0;i<N_REGS;i++) {
5507 if (need_to_preserve[i])
5508 raw_pop_l_r(i);
5509 }
5510 raw_ret();
5511 #endif
5512 }
5513
5514 static __inline__ void reset_lists(void)
5515 {
5516 int i;
5517
5518 for (i=0;i<MAX_HOLD_BI;i++)
5519 hold_bi[i]=NULL;
5520 active=NULL;
5521 dormant=NULL;
5522 }
5523
5524 static void prepare_block(blockinfo* bi)
5525 {
5526 int i;
5527
5528 set_target(current_compile_p);
5529 align_target(align_jumps);
5530 bi->direct_pen=(cpuop_func *)get_target();
5531 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5532 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5533 raw_jmp((uae_u32)popall_execute_normal);
5534
5535 align_target(align_jumps);
5536 bi->direct_pcc=(cpuop_func *)get_target();
5537 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5538 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5539 raw_jmp((uae_u32)popall_check_checksum);
5540 current_compile_p=get_target();
5541
5542 bi->deplist=NULL;
5543 for (i=0;i<2;i++) {
5544 bi->dep[i].prev_p=NULL;
5545 bi->dep[i].next=NULL;
5546 }
5547 bi->env=default_ss;
5548 bi->status=BI_INVALID;
5549 bi->havestate=0;
5550 //bi->env=empty_ss;
5551 }
5552
5553 void build_comp(void)
5554 {
5555 int i;
5556 int jumpcount=0;
5557 unsigned long opcode;
5558 struct comptbl* tbl=op_smalltbl_0_comp_ff;
5559 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
5560 int count;
5561 int cpu_level = 0; // 68000 (default)
5562 if (CPUType == 4)
5563 cpu_level = 4; // 68040 with FPU
5564 else {
5565 if (FPUType)
5566 cpu_level = 3; // 68020 with FPU
5567 else if (CPUType >= 2)
5568 cpu_level = 2; // 68020
5569 else if (CPUType == 1)
5570 cpu_level = 1;
5571 }
5572 struct cputbl *nfctbl = (
5573 cpu_level == 4 ? op_smalltbl_0_nf
5574 : cpu_level == 3 ? op_smalltbl_1_nf
5575 : cpu_level == 2 ? op_smalltbl_2_nf
5576 : cpu_level == 1 ? op_smalltbl_3_nf
5577 : op_smalltbl_4_nf);
5578
5579 write_log ("<JIT compiler> : building compiler function tables\n");
5580
5581 for (opcode = 0; opcode < 65536; opcode++) {
5582 nfcpufunctbl[opcode] = op_illg_1;
5583 compfunctbl[opcode] = NULL;
5584 nfcompfunctbl[opcode] = NULL;
5585 prop[opcode].use_flags = 0x1f;
5586 prop[opcode].set_flags = 0x1f;
5587 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
5588 }
5589
5590 for (i = 0; tbl[i].opcode < 65536; i++) {
5591 int cflow = table68k[tbl[i].opcode].cflow;
5592 prop[cft_map(tbl[i].opcode)].cflow = cflow;
5593
5594 int uses_fpu = tbl[i].specific & 32;
5595 if (uses_fpu && avoid_fpu)
5596 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
5597 else
5598 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
5599 }
5600
5601 for (i = 0; nftbl[i].opcode < 65536; i++) {
5602 int uses_fpu = tbl[i].specific & 32;
5603 if (uses_fpu && avoid_fpu)
5604 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
5605 else
5606 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
5607
5608 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
5609 }
5610
5611 for (i = 0; nfctbl[i].handler; i++) {
5612 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
5613 }
5614
5615 for (opcode = 0; opcode < 65536; opcode++) {
5616 compop_func *f;
5617 compop_func *nff;
5618 cpuop_func *nfcf;
5619 int isaddx,cflow;
5620
5621 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
5622 continue;
5623
5624 if (table68k[opcode].handler != -1) {
5625 f = compfunctbl[cft_map(table68k[opcode].handler)];
5626 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
5627 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
5628 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
5629 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
5630 prop[cft_map(opcode)].cflow = cflow;
5631 prop[cft_map(opcode)].is_addx = isaddx;
5632 compfunctbl[cft_map(opcode)] = f;
5633 nfcompfunctbl[cft_map(opcode)] = nff;
5634 Dif (nfcf == op_illg_1)
5635 abort();
5636 nfcpufunctbl[cft_map(opcode)] = nfcf;
5637 }
5638 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
5639 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
5640 }
5641 for (i = 0; nfctbl[i].handler != NULL; i++) {
5642 if (nfctbl[i].specific)
5643 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
5644 }
5645
5646 count=0;
5647 for (opcode = 0; opcode < 65536; opcode++) {
5648 if (compfunctbl[cft_map(opcode)])
5649 count++;
5650 }
5651 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
5652
5653 /* Initialise state */
5654 create_popalls();
5655 alloc_cache();
5656 reset_lists();
5657
5658 for (i=0;i<TAGSIZE;i+=2) {
5659 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
5660 cache_tags[i+1].bi=NULL;
5661 }
5662
5663 #if 0
5664 for (i=0;i<N_REGS;i++) {
5665 empty_ss.nat[i].holds=-1;
5666 empty_ss.nat[i].validsize=0;
5667 empty_ss.nat[i].dirtysize=0;
5668 }
5669 #endif
5670 for (i=0;i<VREGS;i++) {
5671 empty_ss.virt[i]=L_NEEDED;
5672 }
5673 for (i=0;i<N_REGS;i++) {
5674 empty_ss.nat[i]=L_UNKNOWN;
5675 }
5676 default_ss=empty_ss;
5677 }
5678
5679
5680 static void flush_icache_none(int n)
5681 {
5682 /* Nothing to do. */
5683 }
5684
5685 static void flush_icache_hard(int n)
5686 {
5687 uae_u32 i;
5688 blockinfo* bi, *dbi;
5689
5690 hard_flush_count++;
5691 #if 0
5692 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
5693 n,regs.pc,regs.pc_p,current_cache_size/1024);
5694 current_cache_size = 0;
5695 #endif
5696 bi=active;
5697 while(bi) {
5698 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
5699 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
5700 dbi=bi; bi=bi->next;
5701 free_blockinfo(dbi);
5702 }
5703 bi=dormant;
5704 while(bi) {
5705 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
5706 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
5707 dbi=bi; bi=bi->next;
5708 free_blockinfo(dbi);
5709 }
5710
5711 reset_lists();
5712 if (!compiled_code)
5713 return;
5714 current_compile_p=compiled_code;
5715 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
5716 }
5717
5718
5719 /* "Soft flushing" --- instead of actually throwing everything away,
5720 we simply mark everything as "needs to be checked".
5721 */
5722
5723 static inline void flush_icache_lazy(int n)
5724 {
5725 uae_u32 i;
5726 blockinfo* bi;
5727 blockinfo* bi2;
5728
5729 soft_flush_count++;
5730 if (!active)
5731 return;
5732
5733 bi=active;
5734 while (bi) {
5735 uae_u32 cl=cacheline(bi->pc_p);
5736 if (bi->status==BI_INVALID ||
5737 bi->status==BI_NEED_RECOMP) {
5738 if (bi==cache_tags[cl+1].bi)
5739 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
5740 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
5741 set_dhtu(bi,bi->direct_pen);
5742 bi->status=BI_INVALID;
5743 }
5744 else {
5745 if (bi==cache_tags[cl+1].bi)
5746 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
5747 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
5748 set_dhtu(bi,bi->direct_pcc);
5749 bi->status=BI_NEED_CHECK;
5750 }
5751 bi2=bi;
5752 bi=bi->next;
5753 }
5754 /* bi2 is now the last entry in the active list */
5755 bi2->next=dormant;
5756 if (dormant)
5757 dormant->prev_p=&(bi2->next);
5758
5759 dormant=active;
5760 active->prev_p=&dormant;
5761 active=NULL;
5762 }
5763
5764 static void catastrophe(void)
5765 {
5766 abort();
5767 }
5768
5769 int failure;
5770
5771 #define TARGET_M68K 0
5772 #define TARGET_POWERPC 1
5773 #define TARGET_X86 2
5774 #if defined(i386) || defined(__i386__)
5775 #define TARGET_NATIVE TARGET_X86
5776 #endif
5777 #if defined(powerpc) || defined(__powerpc__)
5778 #define TARGET_NATIVE TARGET_POWERPC
5779 #endif
5780
5781 #ifdef ENABLE_MON
5782 static uae_u32 mon_read_byte_jit(uae_u32 addr)
5783 {
5784 uae_u8 *m = (uae_u8 *)addr;
5785 return (uae_u32)(*m);
5786 }
5787
5788 static void mon_write_byte_jit(uae_u32 addr, uae_u32 b)
5789 {
5790 uae_u8 *m = (uae_u8 *)addr;
5791 *m = b;
5792 }
5793 #endif
5794
5795 void disasm_block(int target, uint8 * start, size_t length)
5796 {
5797 if (!JITDebug)
5798 return;
5799
5800 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
5801 char disasm_str[200];
5802 sprintf(disasm_str, "%s $%x $%x",
5803 target == TARGET_M68K ? "d68" :
5804 target == TARGET_X86 ? "d86" :
5805 target == TARGET_POWERPC ? "d" : "x",
5806 start, start + length - 1);
5807
5808 uae_u32 (*old_mon_read_byte)(uae_u32) = mon_read_byte;
5809 void (*old_mon_write_byte)(uae_u32, uae_u32) = mon_write_byte;
5810
5811 mon_read_byte = mon_read_byte_jit;
5812 mon_write_byte = mon_write_byte_jit;
5813
5814 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
5815 mon(4, arg);
5816
5817 mon_read_byte = old_mon_read_byte;
5818 mon_write_byte = old_mon_write_byte;
5819 #endif
5820 }
5821
5822 static inline void disasm_native_block(uint8 *start, size_t length)
5823 {
5824 disasm_block(TARGET_NATIVE, start, length);
5825 }
5826
5827 static inline void disasm_m68k_block(uint8 *start, size_t length)
5828 {
5829 disasm_block(TARGET_M68K, start, length);
5830 }
5831
5832 #ifdef HAVE_GET_WORD_UNSWAPPED
5833 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
5834 #else
5835 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
5836 #endif
5837
5838 #if JIT_DEBUG
5839 static uae_u8 *last_regs_pc_p = 0;
5840 static uae_u8 *last_compiled_block_addr = 0;
5841
5842 void compiler_dumpstate(void)
5843 {
5844 if (!JITDebug)
5845 return;
5846
5847 write_log("### Host addresses\n");
5848 write_log("MEM_BASE : %x\n", MEMBaseDiff);
5849 write_log("PC_P : %p\n", &regs.pc_p);
5850 write_log("SPCFLAGS : %p\n", &regs.spcflags);
5851 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
5852 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
5853 write_log("\n");
5854
5855 write_log("### M68k processor state\n");
5856 m68k_dumpstate(0);
5857 write_log("\n");
5858
5859 write_log("### Block in Mac address space\n");
5860 write_log("M68K block : %p\n",
5861 (void *)get_virtual_address(last_regs_pc_p));
5862 write_log("Native block : %p (%d bytes)\n",
5863 (void *)get_virtual_address(last_compiled_block_addr),
5864 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
5865 write_log("\n");
5866 }
5867 #endif
5868
5869 static void compile_block(cpu_history* pc_hist, int blocklen)
5870 {
5871 if (letit && compiled_code) {
5872 #if PROFILE_COMPILE_TIME
5873 compile_count++;
5874 clock_t start_time = clock();
5875 #endif
5876 #if JIT_DEBUG
5877 bool disasm_block = false;
5878 #endif
5879
5880 /* OK, here we need to 'compile' a block */
5881 int i;
5882 int r;
5883 int was_comp=0;
5884 uae_u8 liveflags[MAXRUN+1];
5885 uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
5886 uae_u32 min_pcp=max_pcp;
5887 uae_u32 cl=cacheline(pc_hist[0].location);
5888 void* specflags=(void*)&regs.spcflags;
5889 blockinfo* bi=NULL;
5890 blockinfo* bi2;
5891 int extra_len=0;
5892
5893 redo_current_block=0;
5894 if (current_compile_p>=max_compile_start)
5895 flush_icache_hard(7);
5896
5897 alloc_blockinfos();
5898
5899 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
5900 bi2=get_blockinfo(cl);
5901
5902 optlev=bi->optlevel;
5903 if (bi->status!=BI_INVALID) {
5904 Dif (bi!=bi2) {
5905 /* I don't think it can happen anymore. Shouldn't, in
5906 any case. So let's make sure... */
5907 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
5908 bi->count,bi->optlevel,bi->handler_to_use,
5909 cache_tags[cl].handler);
5910 abort();
5911 }
5912
5913 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
5914 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
5915 /* What the heck? We are not supposed to be here! */
5916 abort();
5917 }
5918 }
5919 if (bi->count==-1) {
5920 optlev++;
5921 while (!optcount[optlev])
5922 optlev++;
5923 bi->count=optcount[optlev]-1;
5924 }
5925 current_block_pc_p=(uae_u32)pc_hist[0].location;
5926
5927 remove_deps(bi); /* We are about to create new code */
5928 bi->optlevel=optlev;
5929 bi->pc_p=(uae_u8*)pc_hist[0].location;
5930
5931 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
5932 i=blocklen;
5933 while (i--) {
5934 uae_u16* currpcp=pc_hist[i].location;
5935 uae_u32 op=DO_GET_OPCODE(currpcp);
5936
5937 if ((uae_u32)currpcp<min_pcp)
5938 min_pcp=(uae_u32)currpcp;
5939 if ((uae_u32)currpcp>max_pcp)
5940 max_pcp=(uae_u32)currpcp;
5941
5942 liveflags[i]=((liveflags[i+1]&
5943 (~prop[op].set_flags))|
5944 prop[op].use_flags);
5945 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
5946 liveflags[i]&= ~FLAG_Z;
5947 }
5948
5949 bi->needed_flags=liveflags[0];
5950
5951 align_target(align_loops);
5952 was_comp=0;
5953
5954 bi->direct_handler=(cpuop_func *)get_target();
5955 set_dhtu(bi,bi->direct_handler);
5956 bi->status=BI_COMPILING;
5957 current_block_start_target=(uae_u32)get_target();
5958
5959 log_startblock();
5960
5961 if (bi->count>=0) { /* Need to generate countdown code */
5962 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
5963 raw_sub_l_mi((uae_u32)&(bi->count),1);
5964 raw_jl((uae_u32)popall_recompile_block);
5965 }
5966 if (optlev==0) { /* No need to actually translate */
5967 /* Execute normally without keeping stats */
5968 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
5969 raw_jmp((uae_u32)popall_exec_nostats);
5970 }
5971 else {
5972 reg_alloc_run=0;
5973 next_pc_p=0;
5974 taken_pc_p=0;
5975 branch_cc=0;
5976
5977 comp_pc_p=(uae_u8*)pc_hist[0].location;
5978 init_comp();
5979 was_comp=1;
5980
5981 #if JIT_DEBUG
5982 if (JITDebug) {
5983 raw_mov_l_mi((uae_u32)&last_regs_pc_p,(uae_u32)pc_hist[0].location);
5984 raw_mov_l_mi((uae_u32)&last_compiled_block_addr,(uae_u32)current_block_start_target);
5985 }
5986 #endif
5987
5988 for (i=0;i<blocklen &&
5989 get_target_noopt()<max_compile_start;i++) {
5990 cpuop_func **cputbl;
5991 compop_func **comptbl;
5992 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
5993 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
5994 if (!needed_flags) {
5995 cputbl=nfcpufunctbl;
5996 comptbl=nfcompfunctbl;
5997 }
5998 else {
5999 cputbl=cpufunctbl;
6000 comptbl=compfunctbl;
6001 }
6002
6003 failure = 1; // gb-- defaults to failure state
6004 if (comptbl[opcode] && optlev>1) {
6005 failure=0;
6006 if (!was_comp) {
6007 comp_pc_p=(uae_u8*)pc_hist[i].location;
6008 init_comp();
6009 }
6010 was_comp++;
6011
6012 comptbl[opcode](opcode);
6013 freescratch();
6014 if (!(liveflags[i+1] & FLAG_CZNV)) {
6015 /* We can forget about flags */
6016 dont_care_flags();
6017 }
6018 #if INDIVIDUAL_INST
6019 flush(1);
6020 nop();
6021 flush(1);
6022 was_comp=0;
6023 #endif
6024 }
6025
6026 if (failure) {
6027 if (was_comp) {
6028 flush(1);
6029 was_comp=0;
6030 }
6031 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6032 #if USE_NORMAL_CALLING_CONVENTION
6033 raw_push_l_r(REG_PAR1);
6034 #endif
6035 raw_mov_l_mi((uae_u32)&regs.pc_p,
6036 (uae_u32)pc_hist[i].location);
6037 raw_call((uae_u32)cputbl[opcode]);
6038 //raw_add_l_mi((uae_u32)&oink,1); // FIXME
6039 #if USE_NORMAL_CALLING_CONVENTION
6040 raw_inc_sp(4);
6041 #endif
6042 if (needed_flags) {
6043 //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode+65536);
6044 }
6045 else {
6046 //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode);
6047 }
6048
6049 if (i < blocklen - 1) {
6050 uae_s8* branchadd;
6051
6052 raw_mov_l_rm(0,(uae_u32)specflags);
6053 raw_test_l_rr(0,0);
6054 raw_jz_b_oponly();
6055 branchadd=(uae_s8 *)get_target();
6056 emit_byte(0);
6057 raw_jmp((uae_u32)popall_do_nothing);
6058 *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
6059 }
6060 }
6061 }
6062 #if 1 /* This isn't completely kosher yet; It really needs to be
6063 be integrated into a general inter-block-dependency scheme */
6064 if (next_pc_p && taken_pc_p &&
6065 was_comp && taken_pc_p==current_block_pc_p) {
6066 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6067 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6068 uae_u8 x=bi1->needed_flags;
6069
6070 if (x==0xff || 1) { /* To be on the safe side */
6071 uae_u16* next=(uae_u16*)next_pc_p;
6072 uae_u32 op=DO_GET_OPCODE(next);
6073
6074 x=0x1f;
6075 x&=(~prop[op].set_flags);
6076 x|=prop[op].use_flags;
6077 }
6078
6079 x|=bi2->needed_flags;
6080 if (!(x & FLAG_CZNV)) {
6081 /* We can forget about flags */
6082 dont_care_flags();
6083 extra_len+=2; /* The next instruction now is part of this
6084 block */
6085 }
6086
6087 }
6088 #endif
6089 log_flush();
6090
6091 if (next_pc_p) { /* A branch was registered */
6092 uae_u32 t1=next_pc_p;
6093 uae_u32 t2=taken_pc_p;
6094 int cc=branch_cc;
6095
6096 uae_u32* branchadd;
6097 uae_u32* tba;
6098 bigstate tmp;
6099 blockinfo* tbi;
6100
6101 if (taken_pc_p<next_pc_p) {
6102 /* backward branch. Optimize for the "taken" case ---
6103 which means the raw_jcc should fall through when
6104 the 68k branch is taken. */
6105 t1=taken_pc_p;
6106 t2=next_pc_p;
6107 cc=branch_cc^1;
6108 }
6109
6110 tmp=live; /* ouch! This is big... */
6111 raw_jcc_l_oponly(cc);
6112 branchadd=(uae_u32*)get_target();
6113 emit_long(0);
6114
6115 /* predicted outcome */
6116 tbi=get_blockinfo_addr_new((void*)t1,1);
6117 match_states(tbi);
6118 raw_cmp_l_mi((uae_u32)specflags,0);
6119 raw_jcc_l_oponly(4);
6120 tba=(uae_u32*)get_target();
6121 emit_long(get_handler(t1)-((uae_u32)tba+4));
6122 raw_mov_l_mi((uae_u32)&regs.pc_p,t1);
6123 raw_jmp((uae_u32)popall_do_nothing);
6124 create_jmpdep(bi,0,tba,t1);
6125
6126 align_target(align_jumps);
6127 /* not-predicted outcome */
6128 *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6129 live=tmp; /* Ouch again */
6130 tbi=get_blockinfo_addr_new((void*)t2,1);
6131 match_states(tbi);
6132
6133 //flush(1); /* Can only get here if was_comp==1 */
6134 raw_cmp_l_mi((uae_u32)specflags,0);
6135 raw_jcc_l_oponly(4);
6136 tba=(uae_u32*)get_target();
6137 emit_long(get_handler(t2)-((uae_u32)tba+4));
6138 raw_mov_l_mi((uae_u32)&regs.pc_p,t2);
6139 raw_jmp((uae_u32)popall_do_nothing);
6140 create_jmpdep(bi,1,tba,t2);
6141 }
6142 else
6143 {
6144 if (was_comp) {
6145 flush(1);
6146 }
6147
6148 /* Let's find out where next_handler is... */
6149 if (was_comp && isinreg(PC_P)) {
6150 r=live.state[PC_P].realreg;
6151 raw_and_l_ri(r,TAGMASK);
6152 int r2 = (r==0) ? 1 : 0;
6153 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6154 raw_cmp_l_mi((uae_u32)specflags,0);
6155 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6156 raw_jmp_r(r2);
6157 }
6158 else if (was_comp && isconst(PC_P)) {
6159 uae_u32 v=live.state[PC_P].val;
6160 uae_u32* tba;
6161 blockinfo* tbi;
6162
6163 tbi=get_blockinfo_addr_new((void*)v,1);
6164 match_states(tbi);
6165
6166 raw_cmp_l_mi((uae_u32)specflags,0);
6167 raw_jcc_l_oponly(4);
6168 tba=(uae_u32*)get_target();
6169 emit_long(get_handler(v)-((uae_u32)tba+4));
6170 raw_mov_l_mi((uae_u32)&regs.pc_p,v);
6171 raw_jmp((uae_u32)popall_do_nothing);
6172 create_jmpdep(bi,0,tba,v);
6173 }
6174 else {
6175 r=REG_PC_TMP;
6176 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
6177 raw_and_l_ri(r,TAGMASK);
6178 int r2 = (r==0) ? 1 : 0;
6179 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6180 raw_cmp_l_mi((uae_u32)specflags,0);
6181 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6182 raw_jmp_r(r2);
6183 }
6184 }
6185 }
6186
6187 #if USE_MATCH
6188 if (callers_need_recompile(&live,&(bi->env))) {
6189 mark_callers_recompile(bi);
6190 }
6191
6192 big_to_small_state(&live,&(bi->env));
6193 #endif
6194
6195 if (next_pc_p+extra_len>=max_pcp &&
6196 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6197 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6198 else
6199 max_pcp+=LONGEST_68K_INST;
6200 bi->len=max_pcp-min_pcp;
6201 bi->min_pcp=min_pcp;
6202
6203 remove_from_list(bi);
6204 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6205 add_to_dormant(bi); /* No need to checksum it on cache flush.
6206 Please don't start changing ROMs in
6207 flight! */
6208 }
6209 else {
6210 calc_checksum(bi,&(bi->c1),&(bi->c2));
6211 add_to_active(bi);
6212 }
6213
6214 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6215
6216 #if JIT_DEBUG
6217 if (JITDebug)
6218 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6219
6220 if (JITDebug && disasm_block) {
6221 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6222 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6223 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6224 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6225 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6226 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6227 getchar();
6228 }
6229 #endif
6230
6231 log_dump();
6232 align_target(align_jumps);
6233
6234 /* This is the non-direct handler */
6235 bi->handler=
6236 bi->handler_to_use=(cpuop_func *)get_target();
6237 raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6238 raw_jnz((uae_u32)popall_cache_miss);
6239 comp_pc_p=(uae_u8*)pc_hist[0].location;
6240
6241 bi->status=BI_FINALIZING;
6242 init_comp();
6243 match_states(bi);
6244 flush(1);
6245
6246 raw_jmp((uae_u32)bi->direct_handler);
6247
6248 current_compile_p=get_target();
6249 raise_in_cl_list(bi);
6250
6251 /* We will flush soon, anyway, so let's do it now */
6252 if (current_compile_p>=max_compile_start)
6253 flush_icache_hard(7);
6254
6255 bi->status=BI_ACTIVE;
6256 if (redo_current_block)
6257 block_need_recompile(bi);
6258
6259 #if PROFILE_COMPILE_TIME
6260 compile_time += (clock() - start_time);
6261 #endif
6262 }
6263 }
6264
6265 void do_nothing(void)
6266 {
6267 /* What did you expect this to do? */
6268 }
6269
6270 void exec_nostats(void)
6271 {
6272 for (;;) {
6273 uae_u32 opcode = GET_OPCODE;
6274 (*cpufunctbl[opcode])(opcode);
6275 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6276 return; /* We will deal with the spcflags in the caller */
6277 }
6278 }
6279 }
6280
6281 void execute_normal(void)
6282 {
6283 if (!check_for_cache_miss()) {
6284 cpu_history pc_hist[MAXRUN];
6285 int blocklen = 0;
6286 #if REAL_ADDRESSING || DIRECT_ADDRESSING
6287 start_pc_p = regs.pc_p;
6288 start_pc = get_virtual_address(regs.pc_p);
6289 #else
6290 start_pc_p = regs.pc_oldp;
6291 start_pc = regs.pc;
6292 #endif
6293 for (;;) { /* Take note: This is the do-it-normal loop */
6294 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6295 uae_u32 opcode = GET_OPCODE;
6296 #if FLIGHT_RECORDER
6297 m68k_record_step(m68k_getpc());
6298 #endif
6299 (*cpufunctbl[opcode])(opcode);
6300 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6301 compile_block(pc_hist, blocklen);
6302 return; /* We will deal with the spcflags in the caller */
6303 }
6304 /* No need to check regs.spcflags, because if they were set,
6305 we'd have ended up inside that "if" */
6306 }
6307 }
6308 }
6309
6310 typedef void (*compiled_handler)(void);
6311
6312 #ifdef X86_ASSEMBLY
6313 void (*m68k_compile_execute)(void) = NULL;
6314 #else
6315 void m68k_do_compile_execute(void)
6316 {
6317 for (;;) {
6318 ((compiled_handler)(pushall_call_handler))();
6319 /* Whenever we return from that, we should check spcflags */
6320 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6321 if (m68k_do_specialties ())
6322 return;
6323 }
6324 }
6325 }
6326 #endif