ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.7
Committed: 2002-10-01T16:22:36Z (21 years, 9 months ago) by gbeauche
Branch: MAIN
Changes since 1.6: +123 -59 lines
Log Message:
- Rewrite blockinfo allocator et al. Use a template class so that this
  can work with other types related to blockinfos.
- Add new method to compute checksums. This should permit code inlining
  and follow-ups of const_jumps without breaking the lazy cache invalidator.
  aka. chain infos for checksuming. TODO: Incomplete support thus disabled.

File Contents

# Content
1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
2 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
3 #endif
4
5 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
6 #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
7 #endif
8
9 #define USE_MATCH 0
10
11 /* kludge for Brian, so he can compile under MSVC++ */
12 #define USE_NORMAL_CALLING_CONVENTION 0
13
14 #ifndef WIN32
15 #include <sys/types.h>
16 #include <sys/mman.h>
17 #endif
18
19 #include <stdlib.h>
20 #include <fcntl.h>
21 #include <errno.h>
22
23 #include "sysdeps.h"
24 #include "cpu_emulation.h"
25 #include "main.h"
26 #include "prefs.h"
27 #include "user_strings.h"
28 #include "vm_alloc.h"
29
30 #include "m68k.h"
31 #include "memory.h"
32 #include "readcpu.h"
33 #include "newcpu.h"
34 #include "comptbl.h"
35 #include "compiler/compemu.h"
36 #include "fpu/fpu.h"
37 #include "fpu/flags.h"
38
39 #define DEBUG 1
40 #include "debug.h"
41
42 #ifdef ENABLE_MON
43 #include "mon.h"
44 #endif
45
46 #ifndef WIN32
47 #define PROFILE_COMPILE_TIME 1
48 #endif
49
50 #ifdef WIN32
51 #undef write_log
52 #define write_log dummy_write_log
53 static void dummy_write_log(const char *, ...) { }
54 #endif
55
56 #if JIT_DEBUG
57 #undef abort
58 #define abort() do { \
59 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
60 exit(EXIT_FAILURE); \
61 } while (0)
62 #endif
63
64 #if PROFILE_COMPILE_TIME
65 #include <time.h>
66 static uae_u32 compile_count = 0;
67 static clock_t compile_time = 0;
68 static clock_t emul_start_time = 0;
69 static clock_t emul_end_time = 0;
70 #endif
71
72 compop_func *compfunctbl[65536];
73 compop_func *nfcompfunctbl[65536];
74 cpuop_func *nfcpufunctbl[65536];
75 uae_u8* comp_pc_p;
76
77 // From newcpu.cpp
78 extern bool quit_program;
79
80 // gb-- Extra data for Basilisk II/JIT
81 #if JIT_DEBUG
82 static bool JITDebug = false; // Enable runtime disassemblers through mon?
83 #else
84 const bool JITDebug = false; // Don't use JIT debug mode at all
85 #endif
86
87 const uae_u32 MIN_CACHE_SIZE = 2048; // Minimal translation cache size (2048 KB)
88 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
89 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
90 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
91 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
92 static bool have_cmov = false; // target has CMOV instructions ?
93 static bool have_rat_stall = true; // target has partial register stalls ?
94 static bool tune_alignment = false; // Tune code alignments for running CPU ?
95 static int align_loops = 32; // Align the start of loops
96 static int align_jumps = 32; // Align the start of jumps
97 static int zero_fd = -1;
98 static int optcount[10] = {
99 10, // How often a block has to be executed before it is translated
100 0, // How often to use naive translation
101 0, 0, 0, 0,
102 -1, -1, -1, -1
103 };
104
105 struct op_properties {
106 uae_u8 use_flags;
107 uae_u8 set_flags;
108 uae_u8 is_addx;
109 uae_u8 cflow;
110 };
111 static op_properties prop[65536];
112
113 static inline int end_block(uae_u32 opcode)
114 {
115 return (prop[opcode].cflow & fl_end_block);
116 }
117
118 uae_u8* start_pc_p;
119 uae_u32 start_pc;
120 uae_u32 current_block_pc_p;
121 uae_u32 current_block_start_target;
122 uae_u32 needed_flags;
123 static uae_u32 next_pc_p;
124 static uae_u32 taken_pc_p;
125 static int branch_cc;
126 static int redo_current_block;
127
128 int segvcount=0;
129 int soft_flush_count=0;
130 int hard_flush_count=0;
131 int checksum_count=0;
132 static uae_u8* current_compile_p=NULL;
133 static uae_u8* max_compile_start;
134 static uae_u8* compiled_code=NULL;
135 static uae_s32 reg_alloc_run;
136
137 void* pushall_call_handler=NULL;
138 static void* popall_do_nothing=NULL;
139 static void* popall_exec_nostats=NULL;
140 static void* popall_execute_normal=NULL;
141 static void* popall_cache_miss=NULL;
142 static void* popall_recompile_block=NULL;
143 static void* popall_check_checksum=NULL;
144
145 extern uae_u32 oink;
146 extern unsigned long foink3;
147 extern unsigned long foink;
148
149 /* The 68k only ever executes from even addresses. So right now, we
150 * waste half the entries in this array
151 * UPDATE: We now use those entries to store the start of the linked
152 * lists that we maintain for each hash result.
153 */
154 cacheline cache_tags[TAGSIZE];
155 int letit=0;
156 blockinfo* hold_bi[MAX_HOLD_BI];
157 blockinfo* active;
158 blockinfo* dormant;
159
160 /* 68040 */
161 extern struct cputbl op_smalltbl_0_nf[];
162 extern struct comptbl op_smalltbl_0_comp_nf[];
163 extern struct comptbl op_smalltbl_0_comp_ff[];
164
165 /* 68020 + 68881 */
166 extern struct cputbl op_smalltbl_1_nf[];
167
168 /* 68020 */
169 extern struct cputbl op_smalltbl_2_nf[];
170
171 /* 68010 */
172 extern struct cputbl op_smalltbl_3_nf[];
173
174 /* 68000 */
175 extern struct cputbl op_smalltbl_4_nf[];
176
177 /* 68000 slow but compatible. */
178 extern struct cputbl op_smalltbl_5_nf[];
179
180 static void flush_icache_hard(int n);
181 static void flush_icache_lazy(int n);
182 static void flush_icache_none(int n);
183 void (*flush_icache)(int n) = flush_icache_none;
184
185
186
187 bigstate live;
188 smallstate empty_ss;
189 smallstate default_ss;
190 static int optlev;
191
192 static int writereg(int r, int size);
193 static void unlock2(int r);
194 static void setlock(int r);
195 static int readreg_specific(int r, int size, int spec);
196 static int writereg_specific(int r, int size, int spec);
197 static void prepare_for_call_1(void);
198 static void prepare_for_call_2(void);
199 static void align_target(uae_u32 a);
200
201 static uae_s32 nextused[VREGS];
202
203 uae_u32 m68k_pc_offset;
204
205 /* Some arithmetic ooperations can be optimized away if the operands
206 * are known to be constant. But that's only a good idea when the
207 * side effects they would have on the flags are not important. This
208 * variable indicates whether we need the side effects or not
209 */
210 uae_u32 needflags=0;
211
212 /* Flag handling is complicated.
213 *
214 * x86 instructions create flags, which quite often are exactly what we
215 * want. So at times, the "68k" flags are actually in the x86 flags.
216 *
217 * Then again, sometimes we do x86 instructions that clobber the x86
218 * flags, but don't represent a corresponding m68k instruction. In that
219 * case, we have to save them.
220 *
221 * We used to save them to the stack, but now store them back directly
222 * into the regflags.cznv of the traditional emulation. Thus some odd
223 * names.
224 *
225 * So flags can be in either of two places (used to be three; boy were
226 * things complicated back then!); And either place can contain either
227 * valid flags or invalid trash (and on the stack, there was also the
228 * option of "nothing at all", now gone). A couple of variables keep
229 * track of the respective states.
230 *
231 * To make things worse, we might or might not be interested in the flags.
232 * by default, we are, but a call to dont_care_flags can change that
233 * until the next call to live_flags. If we are not, pretty much whatever
234 * is in the register and/or the native flags is seen as valid.
235 */
236
237 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
238 {
239 return cache_tags[cl+1].bi;
240 }
241
242 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
243 {
244 blockinfo* bi=get_blockinfo(cacheline(addr));
245
246 while (bi) {
247 if (bi->pc_p==addr)
248 return bi;
249 bi=bi->next_same_cl;
250 }
251 return NULL;
252 }
253
254
255 /*******************************************************************
256 * All sorts of list related functions for all of the lists *
257 *******************************************************************/
258
259 static __inline__ void remove_from_cl_list(blockinfo* bi)
260 {
261 uae_u32 cl=cacheline(bi->pc_p);
262
263 if (bi->prev_same_cl_p)
264 *(bi->prev_same_cl_p)=bi->next_same_cl;
265 if (bi->next_same_cl)
266 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
267 if (cache_tags[cl+1].bi)
268 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
269 else
270 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
271 }
272
273 static __inline__ void remove_from_list(blockinfo* bi)
274 {
275 if (bi->prev_p)
276 *(bi->prev_p)=bi->next;
277 if (bi->next)
278 bi->next->prev_p=bi->prev_p;
279 }
280
281 static __inline__ void remove_from_lists(blockinfo* bi)
282 {
283 remove_from_list(bi);
284 remove_from_cl_list(bi);
285 }
286
287 static __inline__ void add_to_cl_list(blockinfo* bi)
288 {
289 uae_u32 cl=cacheline(bi->pc_p);
290
291 if (cache_tags[cl+1].bi)
292 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
293 bi->next_same_cl=cache_tags[cl+1].bi;
294
295 cache_tags[cl+1].bi=bi;
296 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
297
298 cache_tags[cl].handler=bi->handler_to_use;
299 }
300
301 static __inline__ void raise_in_cl_list(blockinfo* bi)
302 {
303 remove_from_cl_list(bi);
304 add_to_cl_list(bi);
305 }
306
307 static __inline__ void add_to_active(blockinfo* bi)
308 {
309 if (active)
310 active->prev_p=&(bi->next);
311 bi->next=active;
312
313 active=bi;
314 bi->prev_p=&active;
315 }
316
317 static __inline__ void add_to_dormant(blockinfo* bi)
318 {
319 if (dormant)
320 dormant->prev_p=&(bi->next);
321 bi->next=dormant;
322
323 dormant=bi;
324 bi->prev_p=&dormant;
325 }
326
327 static __inline__ void remove_dep(dependency* d)
328 {
329 if (d->prev_p)
330 *(d->prev_p)=d->next;
331 if (d->next)
332 d->next->prev_p=d->prev_p;
333 d->prev_p=NULL;
334 d->next=NULL;
335 }
336
337 /* This block's code is about to be thrown away, so it no longer
338 depends on anything else */
339 static __inline__ void remove_deps(blockinfo* bi)
340 {
341 remove_dep(&(bi->dep[0]));
342 remove_dep(&(bi->dep[1]));
343 }
344
345 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
346 {
347 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
348 }
349
350 /********************************************************************
351 * Soft flush handling support functions *
352 ********************************************************************/
353
354 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
355 {
356 //write_log("bi is %p\n",bi);
357 if (dh!=bi->direct_handler_to_use) {
358 dependency* x=bi->deplist;
359 //write_log("bi->deplist=%p\n",bi->deplist);
360 while (x) {
361 //write_log("x is %p\n",x);
362 //write_log("x->next is %p\n",x->next);
363 //write_log("x->prev_p is %p\n",x->prev_p);
364
365 if (x->jmp_off) {
366 adjust_jmpdep(x,dh);
367 }
368 x=x->next;
369 }
370 bi->direct_handler_to_use=dh;
371 }
372 }
373
374 static __inline__ void invalidate_block(blockinfo* bi)
375 {
376 int i;
377
378 bi->optlevel=0;
379 bi->count=optcount[0]-1;
380 bi->handler=NULL;
381 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
382 bi->direct_handler=NULL;
383 set_dhtu(bi,bi->direct_pen);
384 bi->needed_flags=0xff;
385 bi->status=BI_INVALID;
386 for (i=0;i<2;i++) {
387 bi->dep[i].jmp_off=NULL;
388 bi->dep[i].target=NULL;
389 }
390 remove_deps(bi);
391 }
392
393 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
394 {
395 blockinfo* tbi=get_blockinfo_addr((void*)target);
396
397 Dif(!tbi) {
398 write_log("Could not create jmpdep!\n");
399 abort();
400 }
401 bi->dep[i].jmp_off=jmpaddr;
402 bi->dep[i].source=bi;
403 bi->dep[i].target=tbi;
404 bi->dep[i].next=tbi->deplist;
405 if (bi->dep[i].next)
406 bi->dep[i].next->prev_p=&(bi->dep[i].next);
407 bi->dep[i].prev_p=&(tbi->deplist);
408 tbi->deplist=&(bi->dep[i]);
409 }
410
411 static __inline__ void block_need_recompile(blockinfo * bi)
412 {
413 uae_u32 cl = cacheline(bi->pc_p);
414
415 set_dhtu(bi, bi->direct_pen);
416 bi->direct_handler = bi->direct_pen;
417
418 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
419 bi->handler = (cpuop_func *)popall_execute_normal;
420 if (bi == cache_tags[cl + 1].bi)
421 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
422 bi->status = BI_NEED_RECOMP;
423 }
424
425 static __inline__ void mark_callers_recompile(blockinfo * bi)
426 {
427 dependency *x = bi->deplist;
428
429 while (x) {
430 dependency *next = x->next; /* This disappears when we mark for
431 * recompilation and thus remove the
432 * blocks from the lists */
433 if (x->jmp_off) {
434 blockinfo *cbi = x->source;
435
436 Dif(cbi->status == BI_INVALID) {
437 // write_log("invalid block in dependency list\n"); // FIXME?
438 // abort();
439 }
440 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
441 block_need_recompile(cbi);
442 mark_callers_recompile(cbi);
443 }
444 else if (cbi->status == BI_COMPILING) {
445 redo_current_block = 1;
446 }
447 else if (cbi->status == BI_NEED_RECOMP) {
448 /* nothing */
449 }
450 else {
451 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
452 }
453 }
454 x = next;
455 }
456 }
457
458 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
459 {
460 blockinfo* bi=get_blockinfo_addr(addr);
461 int i;
462
463 if (!bi) {
464 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
465 if (hold_bi[i]) {
466 uae_u32 cl=cacheline(addr);
467
468 bi=hold_bi[i];
469 hold_bi[i]=NULL;
470 bi->pc_p=(uae_u8 *)addr;
471 invalidate_block(bi);
472 add_to_active(bi);
473 add_to_cl_list(bi);
474
475 }
476 }
477 }
478 if (!bi) {
479 write_log("Looking for blockinfo, can't find free one\n");
480 abort();
481 }
482 return bi;
483 }
484
485 static void prepare_block(blockinfo* bi);
486
487 /* Managment of blockinfos.
488
489 A blockinfo struct is allocated whenever a new block has to be
490 compiled. If the list of free blockinfos is empty, we allocate a new
491 pool of blockinfos and link the newly created blockinfos altogether
492 into the list of free blockinfos. Otherwise, we simply pop a structure
493 off the free list.
494
495 Blockinfo are lazily deallocated, i.e. chained altogether in the
496 list of free blockinfos whenvever a translation cache flush (hard or
497 soft) request occurs.
498 */
499
500 template< class T >
501 class LazyBlockAllocator
502 {
503 enum {
504 kPoolSize = 1 + 4096 / sizeof(T)
505 };
506 struct Pool {
507 T chunk[kPoolSize];
508 Pool * next;
509 };
510 Pool * mPools;
511 T * mChunks;
512 public:
513 LazyBlockAllocator() : mPools(0), mChunks(0) { }
514 ~LazyBlockAllocator();
515 T * acquire();
516 void release(T * const);
517 };
518
519 template< class T >
520 LazyBlockAllocator<T>::~LazyBlockAllocator()
521 {
522 Pool * currentPool = mPools;
523 while (currentPool) {
524 Pool * deadPool = currentPool;
525 currentPool = currentPool->next;
526 free(deadPool);
527 }
528 }
529
530 template< class T >
531 T * LazyBlockAllocator<T>::acquire()
532 {
533 if (!mChunks) {
534 // There is no chunk left, allocate a new pool and link the
535 // chunks into the free list
536 Pool * newPool = (Pool *)malloc(sizeof(Pool));
537 for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
538 chunk->next = mChunks;
539 mChunks = chunk;
540 }
541 newPool->next = mPools;
542 mPools = newPool;
543 }
544 T * chunk = mChunks;
545 mChunks = chunk->next;
546 return chunk;
547 }
548
549 template< class T >
550 void LazyBlockAllocator<T>::release(T * const chunk)
551 {
552 chunk->next = mChunks;
553 mChunks = chunk;
554 }
555
556 template< class T >
557 class HardBlockAllocator
558 {
559 public:
560 T * acquire() {
561 T * data = (T *)current_compile_p;
562 current_compile_p += sizeof(T);
563 return data;
564 }
565
566 void release(T * const chunk) {
567 // Deallocated on invalidation
568 }
569 };
570
571 #if USE_SEPARATE_BIA
572 static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
573 static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
574 #else
575 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
576 static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
577 #endif
578
579
580 static __inline__ blockinfo *alloc_blockinfo(void)
581 {
582 blockinfo *bi = BlockInfoAllocator.acquire();
583 #if USE_CHECKSUM_INFO
584 bi->csi = NULL;
585 #endif
586 return bi;
587 }
588
589 static __inline__ void free_blockinfo(blockinfo *bi)
590 {
591 #if USE_CHECKSUM_INFO
592 checksum_info *csi = bi->csi;
593 while (csi != NULL) {
594 checksum_info *csi2 = csi->next;
595 ChecksumInfoAllocator.release(csi);
596 csi = csi2;
597 }
598 #endif
599 BlockInfoAllocator.release(bi);
600 }
601
602 static __inline__ void alloc_blockinfos(void)
603 {
604 int i;
605 blockinfo* bi;
606
607 for (i=0;i<MAX_HOLD_BI;i++) {
608 if (hold_bi[i])
609 return;
610 bi=hold_bi[i]=alloc_blockinfo();
611 prepare_block(bi);
612 }
613 }
614
615 /********************************************************************
616 * Functions to emit data into memory, and other general support *
617 ********************************************************************/
618
619 static uae_u8* target;
620
621 static void emit_init(void)
622 {
623 }
624
625 static __inline__ void emit_byte(uae_u8 x)
626 {
627 *target++=x;
628 }
629
630 static __inline__ void emit_word(uae_u16 x)
631 {
632 *((uae_u16*)target)=x;
633 target+=2;
634 }
635
636 static __inline__ void emit_long(uae_u32 x)
637 {
638 *((uae_u32*)target)=x;
639 target+=4;
640 }
641
642 static __inline__ uae_u32 reverse32(uae_u32 v)
643 {
644 #if 1
645 // gb-- We have specialized byteswapping functions, just use them
646 return do_byteswap_32(v);
647 #else
648 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
649 #endif
650 }
651
652 /********************************************************************
653 * Getting the information about the target CPU *
654 ********************************************************************/
655
656 #include "codegen_x86.cpp"
657
658 void set_target(uae_u8* t)
659 {
660 target=t;
661 }
662
663 static __inline__ uae_u8* get_target_noopt(void)
664 {
665 return target;
666 }
667
668 __inline__ uae_u8* get_target(void)
669 {
670 return get_target_noopt();
671 }
672
673
674 /********************************************************************
675 * Flags status handling. EMIT TIME! *
676 ********************************************************************/
677
678 static void bt_l_ri_noclobber(R4 r, IMM i);
679
680 static void make_flags_live_internal(void)
681 {
682 if (live.flags_in_flags==VALID)
683 return;
684 Dif (live.flags_on_stack==TRASH) {
685 write_log("Want flags, got something on stack, but it is TRASH\n");
686 abort();
687 }
688 if (live.flags_on_stack==VALID) {
689 int tmp;
690 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
691 raw_reg_to_flags(tmp);
692 unlock2(tmp);
693
694 live.flags_in_flags=VALID;
695 return;
696 }
697 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
698 live.flags_in_flags,live.flags_on_stack);
699 abort();
700 }
701
702 static void flags_to_stack(void)
703 {
704 if (live.flags_on_stack==VALID)
705 return;
706 if (!live.flags_are_important) {
707 live.flags_on_stack=VALID;
708 return;
709 }
710 Dif (live.flags_in_flags!=VALID)
711 abort();
712 else {
713 int tmp;
714 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
715 raw_flags_to_reg(tmp);
716 unlock2(tmp);
717 }
718 live.flags_on_stack=VALID;
719 }
720
721 static __inline__ void clobber_flags(void)
722 {
723 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
724 flags_to_stack();
725 live.flags_in_flags=TRASH;
726 }
727
728 /* Prepare for leaving the compiled stuff */
729 static __inline__ void flush_flags(void)
730 {
731 flags_to_stack();
732 return;
733 }
734
735 int touchcnt;
736
737 /********************************************************************
738 * register allocation per block logging *
739 ********************************************************************/
740
741 static uae_s8 vstate[VREGS];
742 static uae_s8 vwritten[VREGS];
743 static uae_s8 nstate[N_REGS];
744
745 #define L_UNKNOWN -127
746 #define L_UNAVAIL -1
747 #define L_NEEDED -2
748 #define L_UNNEEDED -3
749
750 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
751 {
752 int i;
753
754 for (i = 0; i < VREGS; i++)
755 s->virt[i] = vstate[i];
756 for (i = 0; i < N_REGS; i++)
757 s->nat[i] = nstate[i];
758 }
759
760 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
761 {
762 int i;
763 int reverse = 0;
764
765 for (i = 0; i < VREGS; i++) {
766 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
767 return 1;
768 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
769 reverse++;
770 }
771 for (i = 0; i < N_REGS; i++) {
772 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
773 return 1;
774 if (nstate[i] < 0 && s->nat[i] >= 0)
775 reverse++;
776 }
777 if (reverse >= 2 && USE_MATCH)
778 return 1; /* In this case, it might be worth recompiling the
779 * callers */
780 return 0;
781 }
782
783 static __inline__ void log_startblock(void)
784 {
785 int i;
786
787 for (i = 0; i < VREGS; i++) {
788 vstate[i] = L_UNKNOWN;
789 vwritten[i] = 0;
790 }
791 for (i = 0; i < N_REGS; i++)
792 nstate[i] = L_UNKNOWN;
793 }
794
795 /* Using an n-reg for a temp variable */
796 static __inline__ void log_isused(int n)
797 {
798 if (nstate[n] == L_UNKNOWN)
799 nstate[n] = L_UNAVAIL;
800 }
801
802 static __inline__ void log_visused(int r)
803 {
804 if (vstate[r] == L_UNKNOWN)
805 vstate[r] = L_NEEDED;
806 }
807
808 static __inline__ void do_load_reg(int n, int r)
809 {
810 if (r == FLAGTMP)
811 raw_load_flagreg(n, r);
812 else if (r == FLAGX)
813 raw_load_flagx(n, r);
814 else
815 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
816 }
817
818 static __inline__ void check_load_reg(int n, int r)
819 {
820 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
821 }
822
823 static __inline__ void log_vwrite(int r)
824 {
825 vwritten[r] = 1;
826 }
827
828 /* Using an n-reg to hold a v-reg */
829 static __inline__ void log_isreg(int n, int r)
830 {
831 static int count = 0;
832
833 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
834 nstate[n] = r;
835 else {
836 do_load_reg(n, r);
837 if (nstate[n] == L_UNKNOWN)
838 nstate[n] = L_UNAVAIL;
839 }
840 if (vstate[r] == L_UNKNOWN)
841 vstate[r] = L_NEEDED;
842 }
843
844 static __inline__ void log_clobberreg(int r)
845 {
846 if (vstate[r] == L_UNKNOWN)
847 vstate[r] = L_UNNEEDED;
848 }
849
850 /* This ends all possibility of clever register allocation */
851
852 static __inline__ void log_flush(void)
853 {
854 int i;
855
856 for (i = 0; i < VREGS; i++)
857 if (vstate[i] == L_UNKNOWN)
858 vstate[i] = L_NEEDED;
859 for (i = 0; i < N_REGS; i++)
860 if (nstate[i] == L_UNKNOWN)
861 nstate[i] = L_UNAVAIL;
862 }
863
864 static __inline__ void log_dump(void)
865 {
866 int i;
867
868 return;
869
870 write_log("----------------------\n");
871 for (i = 0; i < N_REGS; i++) {
872 switch (nstate[i]) {
873 case L_UNKNOWN:
874 write_log("Nat %d : UNKNOWN\n", i);
875 break;
876 case L_UNAVAIL:
877 write_log("Nat %d : UNAVAIL\n", i);
878 break;
879 default:
880 write_log("Nat %d : %d\n", i, nstate[i]);
881 break;
882 }
883 }
884 for (i = 0; i < VREGS; i++) {
885 if (vstate[i] == L_UNNEEDED)
886 write_log("Virt %d: UNNEEDED\n", i);
887 }
888 }
889
890 /********************************************************************
891 * register status handling. EMIT TIME! *
892 ********************************************************************/
893
894 static __inline__ void set_status(int r, int status)
895 {
896 if (status == ISCONST)
897 log_clobberreg(r);
898 live.state[r].status=status;
899 }
900
901 static __inline__ int isinreg(int r)
902 {
903 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
904 }
905
906 static __inline__ void adjust_nreg(int r, uae_u32 val)
907 {
908 if (!val)
909 return;
910 raw_lea_l_brr(r,r,val);
911 }
912
913 static void tomem(int r)
914 {
915 int rr=live.state[r].realreg;
916
917 if (isinreg(r)) {
918 if (live.state[r].val && live.nat[rr].nholds==1
919 && !live.nat[rr].locked) {
920 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
921 // live.state[r].val,r,rr,target);
922 adjust_nreg(rr,live.state[r].val);
923 live.state[r].val=0;
924 live.state[r].dirtysize=4;
925 set_status(r,DIRTY);
926 }
927 }
928
929 if (live.state[r].status==DIRTY) {
930 switch (live.state[r].dirtysize) {
931 case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
932 case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
933 case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
934 default: abort();
935 }
936 log_vwrite(r);
937 set_status(r,CLEAN);
938 live.state[r].dirtysize=0;
939 }
940 }
941
942 static __inline__ int isconst(int r)
943 {
944 return live.state[r].status==ISCONST;
945 }
946
947 int is_const(int r)
948 {
949 return isconst(r);
950 }
951
952 static __inline__ void writeback_const(int r)
953 {
954 if (!isconst(r))
955 return;
956 Dif (live.state[r].needflush==NF_HANDLER) {
957 write_log("Trying to write back constant NF_HANDLER!\n");
958 abort();
959 }
960
961 raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
962 log_vwrite(r);
963 live.state[r].val=0;
964 set_status(r,INMEM);
965 }
966
967 static __inline__ void tomem_c(int r)
968 {
969 if (isconst(r)) {
970 writeback_const(r);
971 }
972 else
973 tomem(r);
974 }
975
976 static void evict(int r)
977 {
978 int rr;
979
980 if (!isinreg(r))
981 return;
982 tomem(r);
983 rr=live.state[r].realreg;
984
985 Dif (live.nat[rr].locked &&
986 live.nat[rr].nholds==1) {
987 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
988 abort();
989 }
990
991 live.nat[rr].nholds--;
992 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
993 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
994 int thisind=live.state[r].realind;
995
996 live.nat[rr].holds[thisind]=topreg;
997 live.state[topreg].realind=thisind;
998 }
999 live.state[r].realreg=-1;
1000 set_status(r,INMEM);
1001 }
1002
1003 static __inline__ void free_nreg(int r)
1004 {
1005 int i=live.nat[r].nholds;
1006
1007 while (i) {
1008 int vr;
1009
1010 --i;
1011 vr=live.nat[r].holds[i];
1012 evict(vr);
1013 }
1014 Dif (live.nat[r].nholds!=0) {
1015 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1016 abort();
1017 }
1018 }
1019
1020 /* Use with care! */
1021 static __inline__ void isclean(int r)
1022 {
1023 if (!isinreg(r))
1024 return;
1025 live.state[r].validsize=4;
1026 live.state[r].dirtysize=0;
1027 live.state[r].val=0;
1028 set_status(r,CLEAN);
1029 }
1030
1031 static __inline__ void disassociate(int r)
1032 {
1033 isclean(r);
1034 evict(r);
1035 }
1036
1037 static __inline__ void set_const(int r, uae_u32 val)
1038 {
1039 disassociate(r);
1040 live.state[r].val=val;
1041 set_status(r,ISCONST);
1042 }
1043
1044 static __inline__ uae_u32 get_offset(int r)
1045 {
1046 return live.state[r].val;
1047 }
1048
1049 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1050 {
1051 int bestreg;
1052 uae_s32 when;
1053 int i;
1054 uae_s32 badness=0; /* to shut up gcc */
1055 bestreg=-1;
1056 when=2000000000;
1057
1058 for (i=N_REGS;i--;) {
1059 badness=live.nat[i].touched;
1060 if (live.nat[i].nholds==0)
1061 badness=0;
1062 if (i==hint)
1063 badness-=200000000;
1064 if (!live.nat[i].locked && badness<when) {
1065 if ((size==1 && live.nat[i].canbyte) ||
1066 (size==2 && live.nat[i].canword) ||
1067 (size==4)) {
1068 bestreg=i;
1069 when=badness;
1070 if (live.nat[i].nholds==0 && hint<0)
1071 break;
1072 if (i==hint)
1073 break;
1074 }
1075 }
1076 }
1077 Dif (bestreg==-1)
1078 abort();
1079
1080 if (live.nat[bestreg].nholds>0) {
1081 free_nreg(bestreg);
1082 }
1083 if (isinreg(r)) {
1084 int rr=live.state[r].realreg;
1085 /* This will happen if we read a partially dirty register at a
1086 bigger size */
1087 Dif (willclobber || live.state[r].validsize>=size)
1088 abort();
1089 Dif (live.nat[rr].nholds!=1)
1090 abort();
1091 if (size==4 && live.state[r].validsize==2) {
1092 log_isused(bestreg);
1093 log_visused(r);
1094 raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
1095 raw_bswap_32(bestreg);
1096 raw_zero_extend_16_rr(rr,rr);
1097 raw_zero_extend_16_rr(bestreg,bestreg);
1098 raw_bswap_32(bestreg);
1099 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1100 live.state[r].validsize=4;
1101 live.nat[rr].touched=touchcnt++;
1102 return rr;
1103 }
1104 if (live.state[r].validsize==1) {
1105 /* Nothing yet */
1106 }
1107 evict(r);
1108 }
1109
1110 if (!willclobber) {
1111 if (live.state[r].status!=UNDEF) {
1112 if (isconst(r)) {
1113 raw_mov_l_ri(bestreg,live.state[r].val);
1114 live.state[r].val=0;
1115 live.state[r].dirtysize=4;
1116 set_status(r,DIRTY);
1117 log_isused(bestreg);
1118 }
1119 else {
1120 log_isreg(bestreg, r); /* This will also load it! */
1121 live.state[r].dirtysize=0;
1122 set_status(r,CLEAN);
1123 }
1124 }
1125 else {
1126 live.state[r].val=0;
1127 live.state[r].dirtysize=0;
1128 set_status(r,CLEAN);
1129 log_isused(bestreg);
1130 }
1131 live.state[r].validsize=4;
1132 }
1133 else { /* this is the easiest way, but not optimal. FIXME! */
1134 /* Now it's trickier, but hopefully still OK */
1135 if (!isconst(r) || size==4) {
1136 live.state[r].validsize=size;
1137 live.state[r].dirtysize=size;
1138 live.state[r].val=0;
1139 set_status(r,DIRTY);
1140 if (size == 4) {
1141 log_clobberreg(r);
1142 log_isused(bestreg);
1143 }
1144 else {
1145 log_visused(r);
1146 log_isused(bestreg);
1147 }
1148 }
1149 else {
1150 if (live.state[r].status!=UNDEF)
1151 raw_mov_l_ri(bestreg,live.state[r].val);
1152 live.state[r].val=0;
1153 live.state[r].validsize=4;
1154 live.state[r].dirtysize=4;
1155 set_status(r,DIRTY);
1156 log_isused(bestreg);
1157 }
1158 }
1159 live.state[r].realreg=bestreg;
1160 live.state[r].realind=live.nat[bestreg].nholds;
1161 live.nat[bestreg].touched=touchcnt++;
1162 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1163 live.nat[bestreg].nholds++;
1164
1165 return bestreg;
1166 }
1167
1168 static int alloc_reg(int r, int size, int willclobber)
1169 {
1170 return alloc_reg_hinted(r,size,willclobber,-1);
1171 }
1172
1173 static void unlock2(int r)
1174 {
1175 Dif (!live.nat[r].locked)
1176 abort();
1177 live.nat[r].locked--;
1178 }
1179
1180 static void setlock(int r)
1181 {
1182 live.nat[r].locked++;
1183 }
1184
1185
1186 static void mov_nregs(int d, int s)
1187 {
1188 int ns=live.nat[s].nholds;
1189 int nd=live.nat[d].nholds;
1190 int i;
1191
1192 if (s==d)
1193 return;
1194
1195 if (nd>0)
1196 free_nreg(d);
1197
1198 log_isused(d);
1199 raw_mov_l_rr(d,s);
1200
1201 for (i=0;i<live.nat[s].nholds;i++) {
1202 int vs=live.nat[s].holds[i];
1203
1204 live.state[vs].realreg=d;
1205 live.state[vs].realind=i;
1206 live.nat[d].holds[i]=vs;
1207 }
1208 live.nat[d].nholds=live.nat[s].nholds;
1209
1210 live.nat[s].nholds=0;
1211 }
1212
1213
1214 static __inline__ void make_exclusive(int r, int size, int spec)
1215 {
1216 int clobber;
1217 reg_status oldstate;
1218 int rr=live.state[r].realreg;
1219 int nr;
1220 int nind;
1221 int ndirt=0;
1222 int i;
1223
1224 if (!isinreg(r))
1225 return;
1226 if (live.nat[rr].nholds==1)
1227 return;
1228 for (i=0;i<live.nat[rr].nholds;i++) {
1229 int vr=live.nat[rr].holds[i];
1230 if (vr!=r &&
1231 (live.state[vr].status==DIRTY || live.state[vr].val))
1232 ndirt++;
1233 }
1234 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1235 /* Everything else is clean, so let's keep this register */
1236 for (i=0;i<live.nat[rr].nholds;i++) {
1237 int vr=live.nat[rr].holds[i];
1238 if (vr!=r) {
1239 evict(vr);
1240 i--; /* Try that index again! */
1241 }
1242 }
1243 Dif (live.nat[rr].nholds!=1) {
1244 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1245 rr,live.nat[rr].nholds,r);
1246 abort();
1247 }
1248 return;
1249 }
1250
1251 /* We have to split the register */
1252 oldstate=live.state[r];
1253
1254 setlock(rr); /* Make sure this doesn't go away */
1255 /* Forget about r being in the register rr */
1256 disassociate(r);
1257 /* Get a new register, that we will clobber completely */
1258 if (oldstate.status==DIRTY) {
1259 /* If dirtysize is <4, we need a register that can handle the
1260 eventual smaller memory store! Thanks to Quake68k for exposing
1261 this detail ;-) */
1262 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1263 }
1264 else {
1265 nr=alloc_reg_hinted(r,4,1,spec);
1266 }
1267 nind=live.state[r].realind;
1268 live.state[r]=oldstate; /* Keep all the old state info */
1269 live.state[r].realreg=nr;
1270 live.state[r].realind=nind;
1271
1272 if (size<live.state[r].validsize) {
1273 if (live.state[r].val) {
1274 /* Might as well compensate for the offset now */
1275 raw_lea_l_brr(nr,rr,oldstate.val);
1276 live.state[r].val=0;
1277 live.state[r].dirtysize=4;
1278 set_status(r,DIRTY);
1279 }
1280 else
1281 raw_mov_l_rr(nr,rr); /* Make another copy */
1282 }
1283 unlock2(rr);
1284 }
1285
1286 static __inline__ void add_offset(int r, uae_u32 off)
1287 {
1288 live.state[r].val+=off;
1289 }
1290
1291 static __inline__ void remove_offset(int r, int spec)
1292 {
1293 reg_status oldstate;
1294 int rr;
1295
1296 if (isconst(r))
1297 return;
1298 if (live.state[r].val==0)
1299 return;
1300 if (isinreg(r) && live.state[r].validsize<4)
1301 evict(r);
1302
1303 if (!isinreg(r))
1304 alloc_reg_hinted(r,4,0,spec);
1305
1306 Dif (live.state[r].validsize!=4) {
1307 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1308 abort();
1309 }
1310 make_exclusive(r,0,-1);
1311 /* make_exclusive might have done the job already */
1312 if (live.state[r].val==0)
1313 return;
1314
1315 rr=live.state[r].realreg;
1316
1317 if (live.nat[rr].nholds==1) {
1318 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1319 // live.state[r].val,r,rr,target);
1320 adjust_nreg(rr,live.state[r].val);
1321 live.state[r].dirtysize=4;
1322 live.state[r].val=0;
1323 set_status(r,DIRTY);
1324 return;
1325 }
1326 write_log("Failed in remove_offset\n");
1327 abort();
1328 }
1329
1330 static __inline__ void remove_all_offsets(void)
1331 {
1332 int i;
1333
1334 for (i=0;i<VREGS;i++)
1335 remove_offset(i,-1);
1336 }
1337
1338 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1339 {
1340 int n;
1341 int answer=-1;
1342
1343 if (live.state[r].status==UNDEF) {
1344 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1345 }
1346 if (!can_offset)
1347 remove_offset(r,spec);
1348
1349 if (isinreg(r) && live.state[r].validsize>=size) {
1350 n=live.state[r].realreg;
1351 switch(size) {
1352 case 1:
1353 if (live.nat[n].canbyte || spec>=0) {
1354 answer=n;
1355 }
1356 break;
1357 case 2:
1358 if (live.nat[n].canword || spec>=0) {
1359 answer=n;
1360 }
1361 break;
1362 case 4:
1363 answer=n;
1364 break;
1365 default: abort();
1366 }
1367 if (answer<0)
1368 evict(r);
1369 }
1370 /* either the value was in memory to start with, or it was evicted and
1371 is in memory now */
1372 if (answer<0) {
1373 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1374 }
1375
1376 if (spec>=0 && spec!=answer) {
1377 /* Too bad */
1378 mov_nregs(spec,answer);
1379 answer=spec;
1380 }
1381 live.nat[answer].locked++;
1382 live.nat[answer].touched=touchcnt++;
1383 return answer;
1384 }
1385
1386
1387
1388 static int readreg(int r, int size)
1389 {
1390 return readreg_general(r,size,-1,0);
1391 }
1392
1393 static int readreg_specific(int r, int size, int spec)
1394 {
1395 return readreg_general(r,size,spec,0);
1396 }
1397
1398 static int readreg_offset(int r, int size)
1399 {
1400 return readreg_general(r,size,-1,1);
1401 }
1402
1403 /* writereg_general(r, size, spec)
1404 *
1405 * INPUT
1406 * - r : mid-layer register
1407 * - size : requested size (1/2/4)
1408 * - spec : -1 if find or make a register free, otherwise specifies
1409 * the physical register to use in any case
1410 *
1411 * OUTPUT
1412 * - hard (physical, x86 here) register allocated to virtual register r
1413 */
1414 static __inline__ int writereg_general(int r, int size, int spec)
1415 {
1416 int n;
1417 int answer=-1;
1418
1419 if (size<4) {
1420 remove_offset(r,spec);
1421 }
1422
1423 make_exclusive(r,size,spec);
1424 if (isinreg(r)) {
1425 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1426 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1427 n=live.state[r].realreg;
1428
1429 Dif (live.nat[n].nholds!=1)
1430 abort();
1431 switch(size) {
1432 case 1:
1433 if (live.nat[n].canbyte || spec>=0) {
1434 live.state[r].dirtysize=ndsize;
1435 live.state[r].validsize=nvsize;
1436 answer=n;
1437 }
1438 break;
1439 case 2:
1440 if (live.nat[n].canword || spec>=0) {
1441 live.state[r].dirtysize=ndsize;
1442 live.state[r].validsize=nvsize;
1443 answer=n;
1444 }
1445 break;
1446 case 4:
1447 live.state[r].dirtysize=ndsize;
1448 live.state[r].validsize=nvsize;
1449 answer=n;
1450 break;
1451 default: abort();
1452 }
1453 if (answer<0)
1454 evict(r);
1455 }
1456 /* either the value was in memory to start with, or it was evicted and
1457 is in memory now */
1458 if (answer<0) {
1459 answer=alloc_reg_hinted(r,size,1,spec);
1460 }
1461 if (spec>=0 && spec!=answer) {
1462 mov_nregs(spec,answer);
1463 answer=spec;
1464 }
1465 if (live.state[r].status==UNDEF)
1466 live.state[r].validsize=4;
1467 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1468 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1469
1470 live.nat[answer].locked++;
1471 live.nat[answer].touched=touchcnt++;
1472 if (size==4) {
1473 live.state[r].val=0;
1474 }
1475 else {
1476 Dif (live.state[r].val) {
1477 write_log("Problem with val\n");
1478 abort();
1479 }
1480 }
1481 set_status(r,DIRTY);
1482 return answer;
1483 }
1484
1485 static int writereg(int r, int size)
1486 {
1487 return writereg_general(r,size,-1);
1488 }
1489
1490 static int writereg_specific(int r, int size, int spec)
1491 {
1492 return writereg_general(r,size,spec);
1493 }
1494
1495 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1496 {
1497 int n;
1498 int answer=-1;
1499
1500 if (live.state[r].status==UNDEF) {
1501 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1502 }
1503 remove_offset(r,spec);
1504 make_exclusive(r,0,spec);
1505
1506 Dif (wsize<rsize) {
1507 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1508 abort();
1509 }
1510 if (isinreg(r) && live.state[r].validsize>=rsize) {
1511 n=live.state[r].realreg;
1512 Dif (live.nat[n].nholds!=1)
1513 abort();
1514
1515 switch(rsize) {
1516 case 1:
1517 if (live.nat[n].canbyte || spec>=0) {
1518 answer=n;
1519 }
1520 break;
1521 case 2:
1522 if (live.nat[n].canword || spec>=0) {
1523 answer=n;
1524 }
1525 break;
1526 case 4:
1527 answer=n;
1528 break;
1529 default: abort();
1530 }
1531 if (answer<0)
1532 evict(r);
1533 }
1534 /* either the value was in memory to start with, or it was evicted and
1535 is in memory now */
1536 if (answer<0) {
1537 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1538 }
1539
1540 if (spec>=0 && spec!=answer) {
1541 /* Too bad */
1542 mov_nregs(spec,answer);
1543 answer=spec;
1544 }
1545 if (wsize>live.state[r].dirtysize)
1546 live.state[r].dirtysize=wsize;
1547 if (wsize>live.state[r].validsize)
1548 live.state[r].validsize=wsize;
1549 set_status(r,DIRTY);
1550
1551 live.nat[answer].locked++;
1552 live.nat[answer].touched=touchcnt++;
1553
1554 Dif (live.state[r].val) {
1555 write_log("Problem with val(rmw)\n");
1556 abort();
1557 }
1558 return answer;
1559 }
1560
1561 static int rmw(int r, int wsize, int rsize)
1562 {
1563 return rmw_general(r,wsize,rsize,-1);
1564 }
1565
1566 static int rmw_specific(int r, int wsize, int rsize, int spec)
1567 {
1568 return rmw_general(r,wsize,rsize,spec);
1569 }
1570
1571
1572 /* needed for restoring the carry flag on non-P6 cores */
1573 static void bt_l_ri_noclobber(R4 r, IMM i)
1574 {
1575 int size=4;
1576 if (i<16)
1577 size=2;
1578 r=readreg(r,size);
1579 raw_bt_l_ri(r,i);
1580 unlock2(r);
1581 }
1582
1583 /********************************************************************
1584 * FPU register status handling. EMIT TIME! *
1585 ********************************************************************/
1586
1587 static void f_tomem(int r)
1588 {
1589 if (live.fate[r].status==DIRTY) {
1590 #if USE_LONG_DOUBLE
1591 raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1592 #else
1593 raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1594 #endif
1595 live.fate[r].status=CLEAN;
1596 }
1597 }
1598
1599 static void f_tomem_drop(int r)
1600 {
1601 if (live.fate[r].status==DIRTY) {
1602 #if USE_LONG_DOUBLE
1603 raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1604 #else
1605 raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1606 #endif
1607 live.fate[r].status=INMEM;
1608 }
1609 }
1610
1611
1612 static __inline__ int f_isinreg(int r)
1613 {
1614 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1615 }
1616
1617 static void f_evict(int r)
1618 {
1619 int rr;
1620
1621 if (!f_isinreg(r))
1622 return;
1623 rr=live.fate[r].realreg;
1624 if (live.fat[rr].nholds==1)
1625 f_tomem_drop(r);
1626 else
1627 f_tomem(r);
1628
1629 Dif (live.fat[rr].locked &&
1630 live.fat[rr].nholds==1) {
1631 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1632 abort();
1633 }
1634
1635 live.fat[rr].nholds--;
1636 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1637 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1638 int thisind=live.fate[r].realind;
1639 live.fat[rr].holds[thisind]=topreg;
1640 live.fate[topreg].realind=thisind;
1641 }
1642 live.fate[r].status=INMEM;
1643 live.fate[r].realreg=-1;
1644 }
1645
1646 static __inline__ void f_free_nreg(int r)
1647 {
1648 int i=live.fat[r].nholds;
1649
1650 while (i) {
1651 int vr;
1652
1653 --i;
1654 vr=live.fat[r].holds[i];
1655 f_evict(vr);
1656 }
1657 Dif (live.fat[r].nholds!=0) {
1658 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1659 abort();
1660 }
1661 }
1662
1663
1664 /* Use with care! */
1665 static __inline__ void f_isclean(int r)
1666 {
1667 if (!f_isinreg(r))
1668 return;
1669 live.fate[r].status=CLEAN;
1670 }
1671
1672 static __inline__ void f_disassociate(int r)
1673 {
1674 f_isclean(r);
1675 f_evict(r);
1676 }
1677
1678
1679
1680 static int f_alloc_reg(int r, int willclobber)
1681 {
1682 int bestreg;
1683 uae_s32 when;
1684 int i;
1685 uae_s32 badness;
1686 bestreg=-1;
1687 when=2000000000;
1688 for (i=N_FREGS;i--;) {
1689 badness=live.fat[i].touched;
1690 if (live.fat[i].nholds==0)
1691 badness=0;
1692
1693 if (!live.fat[i].locked && badness<when) {
1694 bestreg=i;
1695 when=badness;
1696 if (live.fat[i].nholds==0)
1697 break;
1698 }
1699 }
1700 Dif (bestreg==-1)
1701 abort();
1702
1703 if (live.fat[bestreg].nholds>0) {
1704 f_free_nreg(bestreg);
1705 }
1706 if (f_isinreg(r)) {
1707 f_evict(r);
1708 }
1709
1710 if (!willclobber) {
1711 if (live.fate[r].status!=UNDEF) {
1712 #if USE_LONG_DOUBLE
1713 raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
1714 #else
1715 raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
1716 #endif
1717 }
1718 live.fate[r].status=CLEAN;
1719 }
1720 else {
1721 live.fate[r].status=DIRTY;
1722 }
1723 live.fate[r].realreg=bestreg;
1724 live.fate[r].realind=live.fat[bestreg].nholds;
1725 live.fat[bestreg].touched=touchcnt++;
1726 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
1727 live.fat[bestreg].nholds++;
1728
1729 return bestreg;
1730 }
1731
1732 static void f_unlock(int r)
1733 {
1734 Dif (!live.fat[r].locked)
1735 abort();
1736 live.fat[r].locked--;
1737 }
1738
1739 static void f_setlock(int r)
1740 {
1741 live.fat[r].locked++;
1742 }
1743
1744 static __inline__ int f_readreg(int r)
1745 {
1746 int n;
1747 int answer=-1;
1748
1749 if (f_isinreg(r)) {
1750 n=live.fate[r].realreg;
1751 answer=n;
1752 }
1753 /* either the value was in memory to start with, or it was evicted and
1754 is in memory now */
1755 if (answer<0)
1756 answer=f_alloc_reg(r,0);
1757
1758 live.fat[answer].locked++;
1759 live.fat[answer].touched=touchcnt++;
1760 return answer;
1761 }
1762
1763 static __inline__ void f_make_exclusive(int r, int clobber)
1764 {
1765 freg_status oldstate;
1766 int rr=live.fate[r].realreg;
1767 int nr;
1768 int nind;
1769 int ndirt=0;
1770 int i;
1771
1772 if (!f_isinreg(r))
1773 return;
1774 if (live.fat[rr].nholds==1)
1775 return;
1776 for (i=0;i<live.fat[rr].nholds;i++) {
1777 int vr=live.fat[rr].holds[i];
1778 if (vr!=r && live.fate[vr].status==DIRTY)
1779 ndirt++;
1780 }
1781 if (!ndirt && !live.fat[rr].locked) {
1782 /* Everything else is clean, so let's keep this register */
1783 for (i=0;i<live.fat[rr].nholds;i++) {
1784 int vr=live.fat[rr].holds[i];
1785 if (vr!=r) {
1786 f_evict(vr);
1787 i--; /* Try that index again! */
1788 }
1789 }
1790 Dif (live.fat[rr].nholds!=1) {
1791 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
1792 for (i=0;i<live.fat[rr].nholds;i++) {
1793 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
1794 live.fate[live.fat[rr].holds[i]].realreg,
1795 live.fate[live.fat[rr].holds[i]].realind);
1796 }
1797 write_log("\n");
1798 abort();
1799 }
1800 return;
1801 }
1802
1803 /* We have to split the register */
1804 oldstate=live.fate[r];
1805
1806 f_setlock(rr); /* Make sure this doesn't go away */
1807 /* Forget about r being in the register rr */
1808 f_disassociate(r);
1809 /* Get a new register, that we will clobber completely */
1810 nr=f_alloc_reg(r,1);
1811 nind=live.fate[r].realind;
1812 if (!clobber)
1813 raw_fmov_rr(nr,rr); /* Make another copy */
1814 live.fate[r]=oldstate; /* Keep all the old state info */
1815 live.fate[r].realreg=nr;
1816 live.fate[r].realind=nind;
1817 f_unlock(rr);
1818 }
1819
1820
1821 static __inline__ int f_writereg(int r)
1822 {
1823 int n;
1824 int answer=-1;
1825
1826 f_make_exclusive(r,1);
1827 if (f_isinreg(r)) {
1828 n=live.fate[r].realreg;
1829 answer=n;
1830 }
1831 if (answer<0) {
1832 answer=f_alloc_reg(r,1);
1833 }
1834 live.fate[r].status=DIRTY;
1835 live.fat[answer].locked++;
1836 live.fat[answer].touched=touchcnt++;
1837 return answer;
1838 }
1839
1840 static int f_rmw(int r)
1841 {
1842 int n;
1843
1844 f_make_exclusive(r,0);
1845 if (f_isinreg(r)) {
1846 n=live.fate[r].realreg;
1847 }
1848 else
1849 n=f_alloc_reg(r,0);
1850 live.fate[r].status=DIRTY;
1851 live.fat[n].locked++;
1852 live.fat[n].touched=touchcnt++;
1853 return n;
1854 }
1855
1856 static void fflags_into_flags_internal(uae_u32 tmp)
1857 {
1858 int r;
1859
1860 clobber_flags();
1861 r=f_readreg(FP_RESULT);
1862 if (FFLAG_NREG_CLOBBER_CONDITION) {
1863 int tmp2=tmp;
1864 tmp=writereg_specific(tmp,4,FFLAG_NREG);
1865 raw_fflags_into_flags(r);
1866 unlock2(tmp);
1867 forget_about(tmp2);
1868 }
1869 else
1870 raw_fflags_into_flags(r);
1871 f_unlock(r);
1872 }
1873
1874
1875
1876
1877 /********************************************************************
1878 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
1879 ********************************************************************/
1880
1881 /*
1882 * RULES FOR HANDLING REGISTERS:
1883 *
1884 * * In the function headers, order the parameters
1885 * - 1st registers written to
1886 * - 2nd read/modify/write registers
1887 * - 3rd registers read from
1888 * * Before calling raw_*, you must call readreg, writereg or rmw for
1889 * each register
1890 * * The order for this is
1891 * - 1st call remove_offset for all registers written to with size<4
1892 * - 2nd call readreg for all registers read without offset
1893 * - 3rd call rmw for all rmw registers
1894 * - 4th call readreg_offset for all registers that can handle offsets
1895 * - 5th call get_offset for all the registers from the previous step
1896 * - 6th call writereg for all written-to registers
1897 * - 7th call raw_*
1898 * - 8th unlock2 all registers that were locked
1899 */
1900
1901 MIDFUNC(0,live_flags,(void))
1902 {
1903 live.flags_on_stack=TRASH;
1904 live.flags_in_flags=VALID;
1905 live.flags_are_important=1;
1906 }
1907 MENDFUNC(0,live_flags,(void))
1908
1909 MIDFUNC(0,dont_care_flags,(void))
1910 {
1911 live.flags_are_important=0;
1912 }
1913 MENDFUNC(0,dont_care_flags,(void))
1914
1915
1916 MIDFUNC(0,duplicate_carry,(void))
1917 {
1918 evict(FLAGX);
1919 make_flags_live_internal();
1920 COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
1921 log_vwrite(FLAGX);
1922 }
1923 MENDFUNC(0,duplicate_carry,(void))
1924
1925 MIDFUNC(0,restore_carry,(void))
1926 {
1927 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
1928 bt_l_ri_noclobber(FLAGX,0);
1929 }
1930 else { /* Avoid the stall the above creates.
1931 This is slow on non-P6, though.
1932 */
1933 COMPCALL(rol_b_ri(FLAGX,8));
1934 isclean(FLAGX);
1935 }
1936 }
1937 MENDFUNC(0,restore_carry,(void))
1938
1939 MIDFUNC(0,start_needflags,(void))
1940 {
1941 needflags=1;
1942 }
1943 MENDFUNC(0,start_needflags,(void))
1944
1945 MIDFUNC(0,end_needflags,(void))
1946 {
1947 needflags=0;
1948 }
1949 MENDFUNC(0,end_needflags,(void))
1950
1951 MIDFUNC(0,make_flags_live,(void))
1952 {
1953 make_flags_live_internal();
1954 }
1955 MENDFUNC(0,make_flags_live,(void))
1956
1957 MIDFUNC(1,fflags_into_flags,(W2 tmp))
1958 {
1959 clobber_flags();
1960 fflags_into_flags_internal(tmp);
1961 }
1962 MENDFUNC(1,fflags_into_flags,(W2 tmp))
1963
1964
1965 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
1966 {
1967 int size=4;
1968 if (i<16)
1969 size=2;
1970 CLOBBER_BT;
1971 r=readreg(r,size);
1972 raw_bt_l_ri(r,i);
1973 unlock2(r);
1974 }
1975 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
1976
1977 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
1978 {
1979 CLOBBER_BT;
1980 r=readreg(r,4);
1981 b=readreg(b,4);
1982 raw_bt_l_rr(r,b);
1983 unlock2(r);
1984 unlock2(b);
1985 }
1986 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
1987
1988 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
1989 {
1990 int size=4;
1991 if (i<16)
1992 size=2;
1993 CLOBBER_BT;
1994 r=rmw(r,size,size);
1995 raw_btc_l_ri(r,i);
1996 unlock2(r);
1997 }
1998 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
1999
2000 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2001 {
2002 CLOBBER_BT;
2003 b=readreg(b,4);
2004 r=rmw(r,4,4);
2005 raw_btc_l_rr(r,b);
2006 unlock2(r);
2007 unlock2(b);
2008 }
2009 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2010
2011
2012 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2013 {
2014 int size=4;
2015 if (i<16)
2016 size=2;
2017 CLOBBER_BT;
2018 r=rmw(r,size,size);
2019 raw_btr_l_ri(r,i);
2020 unlock2(r);
2021 }
2022 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2023
2024 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2025 {
2026 CLOBBER_BT;
2027 b=readreg(b,4);
2028 r=rmw(r,4,4);
2029 raw_btr_l_rr(r,b);
2030 unlock2(r);
2031 unlock2(b);
2032 }
2033 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2034
2035
2036 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2037 {
2038 int size=4;
2039 if (i<16)
2040 size=2;
2041 CLOBBER_BT;
2042 r=rmw(r,size,size);
2043 raw_bts_l_ri(r,i);
2044 unlock2(r);
2045 }
2046 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2047
2048 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2049 {
2050 CLOBBER_BT;
2051 b=readreg(b,4);
2052 r=rmw(r,4,4);
2053 raw_bts_l_rr(r,b);
2054 unlock2(r);
2055 unlock2(b);
2056 }
2057 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2058
2059 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2060 {
2061 CLOBBER_MOV;
2062 d=writereg(d,4);
2063 raw_mov_l_rm(d,s);
2064 unlock2(d);
2065 }
2066 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2067
2068
2069 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2070 {
2071 r=readreg(r,4);
2072 raw_call_r(r);
2073 unlock2(r);
2074 }
2075 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2076
2077 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2078 {
2079 CLOBBER_SUB;
2080 raw_sub_l_mi(d,s) ;
2081 }
2082 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2083
2084 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2085 {
2086 CLOBBER_MOV;
2087 raw_mov_l_mi(d,s) ;
2088 }
2089 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2090
2091 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2092 {
2093 CLOBBER_MOV;
2094 raw_mov_w_mi(d,s) ;
2095 }
2096 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2097
2098 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2099 {
2100 CLOBBER_MOV;
2101 raw_mov_b_mi(d,s) ;
2102 }
2103 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2104
2105 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2106 {
2107 if (!i && !needflags)
2108 return;
2109 CLOBBER_ROL;
2110 r=rmw(r,1,1);
2111 raw_rol_b_ri(r,i);
2112 unlock2(r);
2113 }
2114 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2115
2116 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2117 {
2118 if (!i && !needflags)
2119 return;
2120 CLOBBER_ROL;
2121 r=rmw(r,2,2);
2122 raw_rol_w_ri(r,i);
2123 unlock2(r);
2124 }
2125 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2126
2127 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2128 {
2129 if (!i && !needflags)
2130 return;
2131 CLOBBER_ROL;
2132 r=rmw(r,4,4);
2133 raw_rol_l_ri(r,i);
2134 unlock2(r);
2135 }
2136 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2137
2138 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2139 {
2140 if (isconst(r)) {
2141 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2142 return;
2143 }
2144 CLOBBER_ROL;
2145 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2146 d=rmw(d,4,4);
2147 Dif (r!=1) {
2148 write_log("Illegal register %d in raw_rol_b\n",r);
2149 abort();
2150 }
2151 raw_rol_l_rr(d,r) ;
2152 unlock2(r);
2153 unlock2(d);
2154 }
2155 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2156
2157 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2158 { /* Can only do this with r==1, i.e. cl */
2159
2160 if (isconst(r)) {
2161 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2162 return;
2163 }
2164 CLOBBER_ROL;
2165 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2166 d=rmw(d,2,2);
2167 Dif (r!=1) {
2168 write_log("Illegal register %d in raw_rol_b\n",r);
2169 abort();
2170 }
2171 raw_rol_w_rr(d,r) ;
2172 unlock2(r);
2173 unlock2(d);
2174 }
2175 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2176
2177 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2178 { /* Can only do this with r==1, i.e. cl */
2179
2180 if (isconst(r)) {
2181 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2182 return;
2183 }
2184
2185 CLOBBER_ROL;
2186 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2187 d=rmw(d,1,1);
2188 Dif (r!=1) {
2189 write_log("Illegal register %d in raw_rol_b\n",r);
2190 abort();
2191 }
2192 raw_rol_b_rr(d,r) ;
2193 unlock2(r);
2194 unlock2(d);
2195 }
2196 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2197
2198
2199 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2200 {
2201 if (isconst(r)) {
2202 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2203 return;
2204 }
2205 CLOBBER_SHLL;
2206 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2207 d=rmw(d,4,4);
2208 Dif (r!=1) {
2209 write_log("Illegal register %d in raw_rol_b\n",r);
2210 abort();
2211 }
2212 raw_shll_l_rr(d,r) ;
2213 unlock2(r);
2214 unlock2(d);
2215 }
2216 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2217
2218 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2219 { /* Can only do this with r==1, i.e. cl */
2220
2221 if (isconst(r)) {
2222 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2223 return;
2224 }
2225 CLOBBER_SHLL;
2226 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2227 d=rmw(d,2,2);
2228 Dif (r!=1) {
2229 write_log("Illegal register %d in raw_shll_b\n",r);
2230 abort();
2231 }
2232 raw_shll_w_rr(d,r) ;
2233 unlock2(r);
2234 unlock2(d);
2235 }
2236 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2237
2238 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2239 { /* Can only do this with r==1, i.e. cl */
2240
2241 if (isconst(r)) {
2242 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2243 return;
2244 }
2245
2246 CLOBBER_SHLL;
2247 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2248 d=rmw(d,1,1);
2249 Dif (r!=1) {
2250 write_log("Illegal register %d in raw_shll_b\n",r);
2251 abort();
2252 }
2253 raw_shll_b_rr(d,r) ;
2254 unlock2(r);
2255 unlock2(d);
2256 }
2257 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2258
2259
2260 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2261 {
2262 if (!i && !needflags)
2263 return;
2264 CLOBBER_ROR;
2265 r=rmw(r,1,1);
2266 raw_ror_b_ri(r,i);
2267 unlock2(r);
2268 }
2269 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2270
2271 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2272 {
2273 if (!i && !needflags)
2274 return;
2275 CLOBBER_ROR;
2276 r=rmw(r,2,2);
2277 raw_ror_w_ri(r,i);
2278 unlock2(r);
2279 }
2280 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2281
2282 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2283 {
2284 if (!i && !needflags)
2285 return;
2286 CLOBBER_ROR;
2287 r=rmw(r,4,4);
2288 raw_ror_l_ri(r,i);
2289 unlock2(r);
2290 }
2291 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2292
2293 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2294 {
2295 if (isconst(r)) {
2296 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2297 return;
2298 }
2299 CLOBBER_ROR;
2300 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2301 d=rmw(d,4,4);
2302 raw_ror_l_rr(d,r) ;
2303 unlock2(r);
2304 unlock2(d);
2305 }
2306 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2307
2308 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2309 {
2310 if (isconst(r)) {
2311 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2312 return;
2313 }
2314 CLOBBER_ROR;
2315 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2316 d=rmw(d,2,2);
2317 raw_ror_w_rr(d,r) ;
2318 unlock2(r);
2319 unlock2(d);
2320 }
2321 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2322
2323 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2324 {
2325 if (isconst(r)) {
2326 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2327 return;
2328 }
2329
2330 CLOBBER_ROR;
2331 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2332 d=rmw(d,1,1);
2333 raw_ror_b_rr(d,r) ;
2334 unlock2(r);
2335 unlock2(d);
2336 }
2337 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2338
2339 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2340 {
2341 if (isconst(r)) {
2342 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2343 return;
2344 }
2345 CLOBBER_SHRL;
2346 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2347 d=rmw(d,4,4);
2348 Dif (r!=1) {
2349 write_log("Illegal register %d in raw_rol_b\n",r);
2350 abort();
2351 }
2352 raw_shrl_l_rr(d,r) ;
2353 unlock2(r);
2354 unlock2(d);
2355 }
2356 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2357
2358 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2359 { /* Can only do this with r==1, i.e. cl */
2360
2361 if (isconst(r)) {
2362 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2363 return;
2364 }
2365 CLOBBER_SHRL;
2366 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2367 d=rmw(d,2,2);
2368 Dif (r!=1) {
2369 write_log("Illegal register %d in raw_shrl_b\n",r);
2370 abort();
2371 }
2372 raw_shrl_w_rr(d,r) ;
2373 unlock2(r);
2374 unlock2(d);
2375 }
2376 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2377
2378 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2379 { /* Can only do this with r==1, i.e. cl */
2380
2381 if (isconst(r)) {
2382 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2383 return;
2384 }
2385
2386 CLOBBER_SHRL;
2387 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2388 d=rmw(d,1,1);
2389 Dif (r!=1) {
2390 write_log("Illegal register %d in raw_shrl_b\n",r);
2391 abort();
2392 }
2393 raw_shrl_b_rr(d,r) ;
2394 unlock2(r);
2395 unlock2(d);
2396 }
2397 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2398
2399
2400
2401 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2402 {
2403 if (!i && !needflags)
2404 return;
2405 if (isconst(r) && !needflags) {
2406 live.state[r].val<<=i;
2407 return;
2408 }
2409 CLOBBER_SHLL;
2410 r=rmw(r,4,4);
2411 raw_shll_l_ri(r,i);
2412 unlock2(r);
2413 }
2414 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2415
2416 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2417 {
2418 if (!i && !needflags)
2419 return;
2420 CLOBBER_SHLL;
2421 r=rmw(r,2,2);
2422 raw_shll_w_ri(r,i);
2423 unlock2(r);
2424 }
2425 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2426
2427 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2428 {
2429 if (!i && !needflags)
2430 return;
2431 CLOBBER_SHLL;
2432 r=rmw(r,1,1);
2433 raw_shll_b_ri(r,i);
2434 unlock2(r);
2435 }
2436 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2437
2438 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2439 {
2440 if (!i && !needflags)
2441 return;
2442 if (isconst(r) && !needflags) {
2443 live.state[r].val>>=i;
2444 return;
2445 }
2446 CLOBBER_SHRL;
2447 r=rmw(r,4,4);
2448 raw_shrl_l_ri(r,i);
2449 unlock2(r);
2450 }
2451 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2452
2453 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2454 {
2455 if (!i && !needflags)
2456 return;
2457 CLOBBER_SHRL;
2458 r=rmw(r,2,2);
2459 raw_shrl_w_ri(r,i);
2460 unlock2(r);
2461 }
2462 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2463
2464 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2465 {
2466 if (!i && !needflags)
2467 return;
2468 CLOBBER_SHRL;
2469 r=rmw(r,1,1);
2470 raw_shrl_b_ri(r,i);
2471 unlock2(r);
2472 }
2473 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2474
2475 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2476 {
2477 if (!i && !needflags)
2478 return;
2479 CLOBBER_SHRA;
2480 r=rmw(r,4,4);
2481 raw_shra_l_ri(r,i);
2482 unlock2(r);
2483 }
2484 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2485
2486 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2487 {
2488 if (!i && !needflags)
2489 return;
2490 CLOBBER_SHRA;
2491 r=rmw(r,2,2);
2492 raw_shra_w_ri(r,i);
2493 unlock2(r);
2494 }
2495 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2496
2497 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2498 {
2499 if (!i && !needflags)
2500 return;
2501 CLOBBER_SHRA;
2502 r=rmw(r,1,1);
2503 raw_shra_b_ri(r,i);
2504 unlock2(r);
2505 }
2506 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2507
2508 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2509 {
2510 if (isconst(r)) {
2511 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2512 return;
2513 }
2514 CLOBBER_SHRA;
2515 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2516 d=rmw(d,4,4);
2517 Dif (r!=1) {
2518 write_log("Illegal register %d in raw_rol_b\n",r);
2519 abort();
2520 }
2521 raw_shra_l_rr(d,r) ;
2522 unlock2(r);
2523 unlock2(d);
2524 }
2525 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2526
2527 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2528 { /* Can only do this with r==1, i.e. cl */
2529
2530 if (isconst(r)) {
2531 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2532 return;
2533 }
2534 CLOBBER_SHRA;
2535 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2536 d=rmw(d,2,2);
2537 Dif (r!=1) {
2538 write_log("Illegal register %d in raw_shra_b\n",r);
2539 abort();
2540 }
2541 raw_shra_w_rr(d,r) ;
2542 unlock2(r);
2543 unlock2(d);
2544 }
2545 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2546
2547 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2548 { /* Can only do this with r==1, i.e. cl */
2549
2550 if (isconst(r)) {
2551 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2552 return;
2553 }
2554
2555 CLOBBER_SHRA;
2556 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2557 d=rmw(d,1,1);
2558 Dif (r!=1) {
2559 write_log("Illegal register %d in raw_shra_b\n",r);
2560 abort();
2561 }
2562 raw_shra_b_rr(d,r) ;
2563 unlock2(r);
2564 unlock2(d);
2565 }
2566 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2567
2568
2569 MIDFUNC(2,setcc,(W1 d, IMM cc))
2570 {
2571 CLOBBER_SETCC;
2572 d=writereg(d,1);
2573 raw_setcc(d,cc);
2574 unlock2(d);
2575 }
2576 MENDFUNC(2,setcc,(W1 d, IMM cc))
2577
2578 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2579 {
2580 CLOBBER_SETCC;
2581 raw_setcc_m(d,cc);
2582 }
2583 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2584
2585 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2586 {
2587 if (d==s)
2588 return;
2589 CLOBBER_CMOV;
2590 s=readreg(s,4);
2591 d=rmw(d,4,4);
2592 raw_cmov_l_rr(d,s,cc);
2593 unlock2(s);
2594 unlock2(d);
2595 }
2596 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2597
2598 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2599 {
2600 CLOBBER_CMOV;
2601 d=rmw(d,4,4);
2602 raw_cmov_l_rm(d,s,cc);
2603 unlock2(d);
2604 }
2605 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2606
2607 MIDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2608 {
2609 CLOBBER_BSF;
2610 s=readreg(s,4);
2611 d=writereg(d,4);
2612 raw_bsf_l_rr(d,s);
2613 unlock2(s);
2614 unlock2(d);
2615 }
2616 MENDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2617
2618 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2619 {
2620 CLOBBER_MUL;
2621 s=readreg(s,4);
2622 d=rmw(d,4,4);
2623 raw_imul_32_32(d,s);
2624 unlock2(s);
2625 unlock2(d);
2626 }
2627 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2628
2629 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2630 {
2631 CLOBBER_MUL;
2632 s=rmw_specific(s,4,4,MUL_NREG2);
2633 d=rmw_specific(d,4,4,MUL_NREG1);
2634 raw_imul_64_32(d,s);
2635 unlock2(s);
2636 unlock2(d);
2637 }
2638 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2639
2640 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2641 {
2642 CLOBBER_MUL;
2643 s=rmw_specific(s,4,4,MUL_NREG2);
2644 d=rmw_specific(d,4,4,MUL_NREG1);
2645 raw_mul_64_32(d,s);
2646 unlock2(s);
2647 unlock2(d);
2648 }
2649 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2650
2651 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2652 {
2653 CLOBBER_MUL;
2654 s=readreg(s,4);
2655 d=rmw(d,4,4);
2656 raw_mul_32_32(d,s);
2657 unlock2(s);
2658 unlock2(d);
2659 }
2660 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
2661
2662 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2663 {
2664 int isrmw;
2665
2666 if (isconst(s)) {
2667 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
2668 return;
2669 }
2670
2671 CLOBBER_SE16;
2672 isrmw=(s==d);
2673 if (!isrmw) {
2674 s=readreg(s,2);
2675 d=writereg(d,4);
2676 }
2677 else { /* If we try to lock this twice, with different sizes, we
2678 are int trouble! */
2679 s=d=rmw(s,4,2);
2680 }
2681 raw_sign_extend_16_rr(d,s);
2682 if (!isrmw) {
2683 unlock2(d);
2684 unlock2(s);
2685 }
2686 else {
2687 unlock2(s);
2688 }
2689 }
2690 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2691
2692 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
2693 {
2694 int isrmw;
2695
2696 if (isconst(s)) {
2697 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
2698 return;
2699 }
2700
2701 isrmw=(s==d);
2702 CLOBBER_SE8;
2703 if (!isrmw) {
2704 s=readreg(s,1);
2705 d=writereg(d,4);
2706 }
2707 else { /* If we try to lock this twice, with different sizes, we
2708 are int trouble! */
2709 s=d=rmw(s,4,1);
2710 }
2711
2712 raw_sign_extend_8_rr(d,s);
2713
2714 if (!isrmw) {
2715 unlock2(d);
2716 unlock2(s);
2717 }
2718 else {
2719 unlock2(s);
2720 }
2721 }
2722 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
2723
2724
2725 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
2726 {
2727 int isrmw;
2728
2729 if (isconst(s)) {
2730 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
2731 return;
2732 }
2733
2734 isrmw=(s==d);
2735 CLOBBER_ZE16;
2736 if (!isrmw) {
2737 s=readreg(s,2);
2738 d=writereg(d,4);
2739 }
2740 else { /* If we try to lock this twice, with different sizes, we
2741 are int trouble! */
2742 s=d=rmw(s,4,2);
2743 }
2744 raw_zero_extend_16_rr(d,s);
2745 if (!isrmw) {
2746 unlock2(d);
2747 unlock2(s);
2748 }
2749 else {
2750 unlock2(s);
2751 }
2752 }
2753 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
2754
2755 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
2756 {
2757 int isrmw;
2758 if (isconst(s)) {
2759 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
2760 return;
2761 }
2762
2763 isrmw=(s==d);
2764 CLOBBER_ZE8;
2765 if (!isrmw) {
2766 s=readreg(s,1);
2767 d=writereg(d,4);
2768 }
2769 else { /* If we try to lock this twice, with different sizes, we
2770 are int trouble! */
2771 s=d=rmw(s,4,1);
2772 }
2773
2774 raw_zero_extend_8_rr(d,s);
2775
2776 if (!isrmw) {
2777 unlock2(d);
2778 unlock2(s);
2779 }
2780 else {
2781 unlock2(s);
2782 }
2783 }
2784 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
2785
2786 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
2787 {
2788 if (d==s)
2789 return;
2790 if (isconst(s)) {
2791 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
2792 return;
2793 }
2794
2795 CLOBBER_MOV;
2796 s=readreg(s,1);
2797 d=writereg(d,1);
2798 raw_mov_b_rr(d,s);
2799 unlock2(d);
2800 unlock2(s);
2801 }
2802 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
2803
2804 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
2805 {
2806 if (d==s)
2807 return;
2808 if (isconst(s)) {
2809 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
2810 return;
2811 }
2812
2813 CLOBBER_MOV;
2814 s=readreg(s,2);
2815 d=writereg(d,2);
2816 raw_mov_w_rr(d,s);
2817 unlock2(d);
2818 unlock2(s);
2819 }
2820 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
2821
2822
2823 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
2824 {
2825 CLOBBER_MOV;
2826 baser=readreg(baser,4);
2827 index=readreg(index,4);
2828 d=writereg(d,4);
2829
2830 raw_mov_l_rrm_indexed(d,baser,index,factor);
2831 unlock2(d);
2832 unlock2(baser);
2833 unlock2(index);
2834 }
2835 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
2836
2837 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
2838 {
2839 CLOBBER_MOV;
2840 baser=readreg(baser,4);
2841 index=readreg(index,4);
2842 d=writereg(d,2);
2843
2844 raw_mov_w_rrm_indexed(d,baser,index,factor);
2845 unlock2(d);
2846 unlock2(baser);
2847 unlock2(index);
2848 }
2849 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
2850
2851 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
2852 {
2853 CLOBBER_MOV;
2854 baser=readreg(baser,4);
2855 index=readreg(index,4);
2856 d=writereg(d,1);
2857
2858 raw_mov_b_rrm_indexed(d,baser,index,factor);
2859
2860 unlock2(d);
2861 unlock2(baser);
2862 unlock2(index);
2863 }
2864 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
2865
2866
2867 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
2868 {
2869 CLOBBER_MOV;
2870 baser=readreg(baser,4);
2871 index=readreg(index,4);
2872 s=readreg(s,4);
2873
2874 Dif (baser==s || index==s)
2875 abort();
2876
2877
2878 raw_mov_l_mrr_indexed(baser,index,factor,s);
2879 unlock2(s);
2880 unlock2(baser);
2881 unlock2(index);
2882 }
2883 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
2884
2885 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2886 {
2887 CLOBBER_MOV;
2888 baser=readreg(baser,4);
2889 index=readreg(index,4);
2890 s=readreg(s,2);
2891
2892 raw_mov_w_mrr_indexed(baser,index,factor,s);
2893 unlock2(s);
2894 unlock2(baser);
2895 unlock2(index);
2896 }
2897 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2898
2899 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2900 {
2901 CLOBBER_MOV;
2902 s=readreg(s,1);
2903 baser=readreg(baser,4);
2904 index=readreg(index,4);
2905
2906 raw_mov_b_mrr_indexed(baser,index,factor,s);
2907 unlock2(s);
2908 unlock2(baser);
2909 unlock2(index);
2910 }
2911 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2912
2913
2914 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2915 {
2916 int basereg=baser;
2917 int indexreg=index;
2918
2919 CLOBBER_MOV;
2920 s=readreg(s,4);
2921 baser=readreg_offset(baser,4);
2922 index=readreg_offset(index,4);
2923
2924 base+=get_offset(basereg);
2925 base+=factor*get_offset(indexreg);
2926
2927 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
2928 unlock2(s);
2929 unlock2(baser);
2930 unlock2(index);
2931 }
2932 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2933
2934 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2935 {
2936 int basereg=baser;
2937 int indexreg=index;
2938
2939 CLOBBER_MOV;
2940 s=readreg(s,2);
2941 baser=readreg_offset(baser,4);
2942 index=readreg_offset(index,4);
2943
2944 base+=get_offset(basereg);
2945 base+=factor*get_offset(indexreg);
2946
2947 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
2948 unlock2(s);
2949 unlock2(baser);
2950 unlock2(index);
2951 }
2952 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2953
2954 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2955 {
2956 int basereg=baser;
2957 int indexreg=index;
2958
2959 CLOBBER_MOV;
2960 s=readreg(s,1);
2961 baser=readreg_offset(baser,4);
2962 index=readreg_offset(index,4);
2963
2964 base+=get_offset(basereg);
2965 base+=factor*get_offset(indexreg);
2966
2967 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
2968 unlock2(s);
2969 unlock2(baser);
2970 unlock2(index);
2971 }
2972 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2973
2974
2975
2976 /* Read a long from base+baser+factor*index */
2977 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2978 {
2979 int basereg=baser;
2980 int indexreg=index;
2981
2982 CLOBBER_MOV;
2983 baser=readreg_offset(baser,4);
2984 index=readreg_offset(index,4);
2985 base+=get_offset(basereg);
2986 base+=factor*get_offset(indexreg);
2987 d=writereg(d,4);
2988 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
2989 unlock2(d);
2990 unlock2(baser);
2991 unlock2(index);
2992 }
2993 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2994
2995
2996 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2997 {
2998 int basereg=baser;
2999 int indexreg=index;
3000
3001 CLOBBER_MOV;
3002 remove_offset(d,-1);
3003 baser=readreg_offset(baser,4);
3004 index=readreg_offset(index,4);
3005 base+=get_offset(basereg);
3006 base+=factor*get_offset(indexreg);
3007 d=writereg(d,2);
3008 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3009 unlock2(d);
3010 unlock2(baser);
3011 unlock2(index);
3012 }
3013 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3014
3015
3016 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3017 {
3018 int basereg=baser;
3019 int indexreg=index;
3020
3021 CLOBBER_MOV;
3022 remove_offset(d,-1);
3023 baser=readreg_offset(baser,4);
3024 index=readreg_offset(index,4);
3025 base+=get_offset(basereg);
3026 base+=factor*get_offset(indexreg);
3027 d=writereg(d,1);
3028 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3029 unlock2(d);
3030 unlock2(baser);
3031 unlock2(index);
3032 }
3033 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3034
3035 /* Read a long from base+factor*index */
3036 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3037 {
3038 int indexreg=index;
3039
3040 if (isconst(index)) {
3041 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3042 return;
3043 }
3044
3045 CLOBBER_MOV;
3046 index=readreg_offset(index,4);
3047 base+=get_offset(indexreg)*factor;
3048 d=writereg(d,4);
3049
3050 raw_mov_l_rm_indexed(d,base,index,factor);
3051 unlock2(index);
3052 unlock2(d);
3053 }
3054 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3055
3056
3057 /* read the long at the address contained in s+offset and store in d */
3058 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3059 {
3060 if (isconst(s)) {
3061 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3062 return;
3063 }
3064 CLOBBER_MOV;
3065 s=readreg(s,4);
3066 d=writereg(d,4);
3067
3068 raw_mov_l_rR(d,s,offset);
3069 unlock2(d);
3070 unlock2(s);
3071 }
3072 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3073
3074 /* read the word at the address contained in s+offset and store in d */
3075 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3076 {
3077 if (isconst(s)) {
3078 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3079 return;
3080 }
3081 CLOBBER_MOV;
3082 s=readreg(s,4);
3083 d=writereg(d,2);
3084
3085 raw_mov_w_rR(d,s,offset);
3086 unlock2(d);
3087 unlock2(s);
3088 }
3089 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3090
3091 /* read the word at the address contained in s+offset and store in d */
3092 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3093 {
3094 if (isconst(s)) {
3095 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3096 return;
3097 }
3098 CLOBBER_MOV;
3099 s=readreg(s,4);
3100 d=writereg(d,1);
3101
3102 raw_mov_b_rR(d,s,offset);
3103 unlock2(d);
3104 unlock2(s);
3105 }
3106 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3107
3108 /* read the long at the address contained in s+offset and store in d */
3109 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3110 {
3111 int sreg=s;
3112 if (isconst(s)) {
3113 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3114 return;
3115 }
3116 CLOBBER_MOV;
3117 s=readreg_offset(s,4);
3118 offset+=get_offset(sreg);
3119 d=writereg(d,4);
3120
3121 raw_mov_l_brR(d,s,offset);
3122 unlock2(d);
3123 unlock2(s);
3124 }
3125 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3126
3127 /* read the word at the address contained in s+offset and store in d */
3128 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3129 {
3130 int sreg=s;
3131 if (isconst(s)) {
3132 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3133 return;
3134 }
3135 CLOBBER_MOV;
3136 remove_offset(d,-1);
3137 s=readreg_offset(s,4);
3138 offset+=get_offset(sreg);
3139 d=writereg(d,2);
3140
3141 raw_mov_w_brR(d,s,offset);
3142 unlock2(d);
3143 unlock2(s);
3144 }
3145 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3146
3147 /* read the word at the address contained in s+offset and store in d */
3148 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3149 {
3150 int sreg=s;
3151 if (isconst(s)) {
3152 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3153 return;
3154 }
3155 CLOBBER_MOV;
3156 remove_offset(d,-1);
3157 s=readreg_offset(s,4);
3158 offset+=get_offset(sreg);
3159 d=writereg(d,1);
3160
3161 raw_mov_b_brR(d,s,offset);
3162 unlock2(d);
3163 unlock2(s);
3164 }
3165 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3166
3167 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3168 {
3169 int dreg=d;
3170 if (isconst(d)) {
3171 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3172 return;
3173 }
3174
3175 CLOBBER_MOV;
3176 d=readreg_offset(d,4);
3177 offset+=get_offset(dreg);
3178 raw_mov_l_Ri(d,i,offset);
3179 unlock2(d);
3180 }
3181 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3182
3183 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3184 {
3185 int dreg=d;
3186 if (isconst(d)) {
3187 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3188 return;
3189 }
3190
3191 CLOBBER_MOV;
3192 d=readreg_offset(d,4);
3193 offset+=get_offset(dreg);
3194 raw_mov_w_Ri(d,i,offset);
3195 unlock2(d);
3196 }
3197 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3198
3199 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3200 {
3201 int dreg=d;
3202 if (isconst(d)) {
3203 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3204 return;
3205 }
3206
3207 CLOBBER_MOV;
3208 d=readreg_offset(d,4);
3209 offset+=get_offset(dreg);
3210 raw_mov_b_Ri(d,i,offset);
3211 unlock2(d);
3212 }
3213 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3214
3215 /* Warning! OFFSET is byte sized only! */
3216 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3217 {
3218 if (isconst(d)) {
3219 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3220 return;
3221 }
3222 if (isconst(s)) {
3223 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3224 return;
3225 }
3226
3227 CLOBBER_MOV;
3228 s=readreg(s,4);
3229 d=readreg(d,4);
3230
3231 raw_mov_l_Rr(d,s,offset);
3232 unlock2(d);
3233 unlock2(s);
3234 }
3235 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3236
3237 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3238 {
3239 if (isconst(d)) {
3240 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3241 return;
3242 }
3243 if (isconst(s)) {
3244 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3245 return;
3246 }
3247
3248 CLOBBER_MOV;
3249 s=readreg(s,2);
3250 d=readreg(d,4);
3251 raw_mov_w_Rr(d,s,offset);
3252 unlock2(d);
3253 unlock2(s);
3254 }
3255 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3256
3257 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3258 {
3259 if (isconst(d)) {
3260 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3261 return;
3262 }
3263 if (isconst(s)) {
3264 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3265 return;
3266 }
3267
3268 CLOBBER_MOV;
3269 s=readreg(s,1);
3270 d=readreg(d,4);
3271 raw_mov_b_Rr(d,s,offset);
3272 unlock2(d);
3273 unlock2(s);
3274 }
3275 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3276
3277 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3278 {
3279 if (isconst(s)) {
3280 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3281 return;
3282 }
3283 #if USE_OFFSET
3284 if (d==s) {
3285 add_offset(d,offset);
3286 return;
3287 }
3288 #endif
3289 CLOBBER_LEA;
3290 s=readreg(s,4);
3291 d=writereg(d,4);
3292 raw_lea_l_brr(d,s,offset);
3293 unlock2(d);
3294 unlock2(s);
3295 }
3296 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3297
3298 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3299 {
3300 if (!offset) {
3301 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3302 return;
3303 }
3304 CLOBBER_LEA;
3305 s=readreg(s,4);
3306 index=readreg(index,4);
3307 d=writereg(d,4);
3308
3309 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3310 unlock2(d);
3311 unlock2(index);
3312 unlock2(s);
3313 }
3314 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3315
3316 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3317 {
3318 CLOBBER_LEA;
3319 s=readreg(s,4);
3320 index=readreg(index,4);
3321 d=writereg(d,4);
3322
3323 raw_lea_l_rr_indexed(d,s,index,factor);
3324 unlock2(d);
3325 unlock2(index);
3326 unlock2(s);
3327 }
3328 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3329
3330 /* write d to the long at the address contained in s+offset */
3331 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3332 {
3333 int dreg=d;
3334 if (isconst(d)) {
3335 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3336 return;
3337 }
3338
3339 CLOBBER_MOV;
3340 s=readreg(s,4);
3341 d=readreg_offset(d,4);
3342 offset+=get_offset(dreg);
3343
3344 raw_mov_l_bRr(d,s,offset);
3345 unlock2(d);
3346 unlock2(s);
3347 }
3348 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3349
3350 /* write the word at the address contained in s+offset and store in d */
3351 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3352 {
3353 int dreg=d;
3354
3355 if (isconst(d)) {
3356 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3357 return;
3358 }
3359
3360 CLOBBER_MOV;
3361 s=readreg(s,2);
3362 d=readreg_offset(d,4);
3363 offset+=get_offset(dreg);
3364 raw_mov_w_bRr(d,s,offset);
3365 unlock2(d);
3366 unlock2(s);
3367 }
3368 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3369
3370 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3371 {
3372 int dreg=d;
3373 if (isconst(d)) {
3374 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3375 return;
3376 }
3377
3378 CLOBBER_MOV;
3379 s=readreg(s,1);
3380 d=readreg_offset(d,4);
3381 offset+=get_offset(dreg);
3382 raw_mov_b_bRr(d,s,offset);
3383 unlock2(d);
3384 unlock2(s);
3385 }
3386 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3387
3388 MIDFUNC(1,bswap_32,(RW4 r))
3389 {
3390 int reg=r;
3391
3392 if (isconst(r)) {
3393 uae_u32 oldv=live.state[r].val;
3394 live.state[r].val=reverse32(oldv);
3395 return;
3396 }
3397
3398 CLOBBER_SW32;
3399 r=rmw(r,4,4);
3400 raw_bswap_32(r);
3401 unlock2(r);
3402 }
3403 MENDFUNC(1,bswap_32,(RW4 r))
3404
3405 MIDFUNC(1,bswap_16,(RW2 r))
3406 {
3407 if (isconst(r)) {
3408 uae_u32 oldv=live.state[r].val;
3409 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3410 (oldv&0xffff0000);
3411 return;
3412 }
3413
3414 CLOBBER_SW16;
3415 r=rmw(r,2,2);
3416
3417 raw_bswap_16(r);
3418 unlock2(r);
3419 }
3420 MENDFUNC(1,bswap_16,(RW2 r))
3421
3422
3423
3424 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3425 {
3426 int olds;
3427
3428 if (d==s) { /* How pointless! */
3429 return;
3430 }
3431 if (isconst(s)) {
3432 COMPCALL(mov_l_ri)(d,live.state[s].val);
3433 return;
3434 }
3435 olds=s;
3436 disassociate(d);
3437 s=readreg_offset(s,4);
3438 live.state[d].realreg=s;
3439 live.state[d].realind=live.nat[s].nholds;
3440 live.state[d].val=live.state[olds].val;
3441 live.state[d].validsize=4;
3442 live.state[d].dirtysize=4;
3443 set_status(d,DIRTY);
3444
3445 live.nat[s].holds[live.nat[s].nholds]=d;
3446 live.nat[s].nholds++;
3447 log_clobberreg(d);
3448 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3449 d,s,live.state[d].realind,live.nat[s].nholds); */
3450 unlock2(s);
3451 }
3452 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3453
3454 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3455 {
3456 if (isconst(s)) {
3457 COMPCALL(mov_l_mi)(d,live.state[s].val);
3458 return;
3459 }
3460 CLOBBER_MOV;
3461 s=readreg(s,4);
3462
3463 raw_mov_l_mr(d,s);
3464 unlock2(s);
3465 }
3466 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3467
3468
3469 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3470 {
3471 if (isconst(s)) {
3472 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3473 return;
3474 }
3475 CLOBBER_MOV;
3476 s=readreg(s,2);
3477
3478 raw_mov_w_mr(d,s);
3479 unlock2(s);
3480 }
3481 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3482
3483 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3484 {
3485 CLOBBER_MOV;
3486 d=writereg(d,2);
3487
3488 raw_mov_w_rm(d,s);
3489 unlock2(d);
3490 }
3491 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3492
3493 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3494 {
3495 if (isconst(s)) {
3496 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3497 return;
3498 }
3499
3500 CLOBBER_MOV;
3501 s=readreg(s,1);
3502
3503 raw_mov_b_mr(d,s);
3504 unlock2(s);
3505 }
3506 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3507
3508 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3509 {
3510 CLOBBER_MOV;
3511 d=writereg(d,1);
3512
3513 raw_mov_b_rm(d,s);
3514 unlock2(d);
3515 }
3516 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3517
3518 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3519 {
3520 set_const(d,s);
3521 return;
3522 }
3523 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3524
3525 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3526 {
3527 CLOBBER_MOV;
3528 d=writereg(d,2);
3529
3530 raw_mov_w_ri(d,s);
3531 unlock2(d);
3532 }
3533 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3534
3535 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3536 {
3537 CLOBBER_MOV;
3538 d=writereg(d,1);
3539
3540 raw_mov_b_ri(d,s);
3541 unlock2(d);
3542 }
3543 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3544
3545
3546 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3547 {
3548 CLOBBER_ADD;
3549 raw_add_l_mi(d,s) ;
3550 }
3551 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3552
3553 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3554 {
3555 CLOBBER_ADD;
3556 raw_add_w_mi(d,s) ;
3557 }
3558 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3559
3560 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3561 {
3562 CLOBBER_ADD;
3563 raw_add_b_mi(d,s) ;
3564 }
3565 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3566
3567
3568 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3569 {
3570 CLOBBER_TEST;
3571 d=readreg(d,4);
3572
3573 raw_test_l_ri(d,i);
3574 unlock2(d);
3575 }
3576 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3577
3578 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3579 {
3580 CLOBBER_TEST;
3581 d=readreg(d,4);
3582 s=readreg(s,4);
3583
3584 raw_test_l_rr(d,s);;
3585 unlock2(d);
3586 unlock2(s);
3587 }
3588 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3589
3590 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3591 {
3592 CLOBBER_TEST;
3593 d=readreg(d,2);
3594 s=readreg(s,2);
3595
3596 raw_test_w_rr(d,s);
3597 unlock2(d);
3598 unlock2(s);
3599 }
3600 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3601
3602 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3603 {
3604 CLOBBER_TEST;
3605 d=readreg(d,1);
3606 s=readreg(s,1);
3607
3608 raw_test_b_rr(d,s);
3609 unlock2(d);
3610 unlock2(s);
3611 }
3612 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3613
3614
3615 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3616 {
3617 if (isconst(d) && !needflags) {
3618 live.state[d].val &= i;
3619 return;
3620 }
3621
3622 CLOBBER_AND;
3623 d=rmw(d,4,4);
3624
3625 raw_and_l_ri(d,i);
3626 unlock2(d);
3627 }
3628 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
3629
3630 MIDFUNC(2,and_l,(RW4 d, R4 s))
3631 {
3632 CLOBBER_AND;
3633 s=readreg(s,4);
3634 d=rmw(d,4,4);
3635
3636 raw_and_l(d,s);
3637 unlock2(d);
3638 unlock2(s);
3639 }
3640 MENDFUNC(2,and_l,(RW4 d, R4 s))
3641
3642 MIDFUNC(2,and_w,(RW2 d, R2 s))
3643 {
3644 CLOBBER_AND;
3645 s=readreg(s,2);
3646 d=rmw(d,2,2);
3647
3648 raw_and_w(d,s);
3649 unlock2(d);
3650 unlock2(s);
3651 }
3652 MENDFUNC(2,and_w,(RW2 d, R2 s))
3653
3654 MIDFUNC(2,and_b,(RW1 d, R1 s))
3655 {
3656 CLOBBER_AND;
3657 s=readreg(s,1);
3658 d=rmw(d,1,1);
3659
3660 raw_and_b(d,s);
3661 unlock2(d);
3662 unlock2(s);
3663 }
3664 MENDFUNC(2,and_b,(RW1 d, R1 s))
3665
3666 // gb-- used for making an fpcr value in compemu_fpp.cpp
3667 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
3668 {
3669 CLOBBER_OR;
3670 d=rmw(d,4,4);
3671
3672 raw_or_l_rm(d,s);
3673 unlock2(d);
3674 }
3675 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
3676
3677 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
3678 {
3679 if (isconst(d) && !needflags) {
3680 live.state[d].val|=i;
3681 return;
3682 }
3683 CLOBBER_OR;
3684 d=rmw(d,4,4);
3685
3686 raw_or_l_ri(d,i);
3687 unlock2(d);
3688 }
3689 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
3690
3691 MIDFUNC(2,or_l,(RW4 d, R4 s))
3692 {
3693 if (isconst(d) && isconst(s) && !needflags) {
3694 live.state[d].val|=live.state[s].val;
3695 return;
3696 }
3697 CLOBBER_OR;
3698 s=readreg(s,4);
3699 d=rmw(d,4,4);
3700
3701 raw_or_l(d,s);
3702 unlock2(d);
3703 unlock2(s);
3704 }
3705 MENDFUNC(2,or_l,(RW4 d, R4 s))
3706
3707 MIDFUNC(2,or_w,(RW2 d, R2 s))
3708 {
3709 CLOBBER_OR;
3710 s=readreg(s,2);
3711 d=rmw(d,2,2);
3712
3713 raw_or_w(d,s);
3714 unlock2(d);
3715 unlock2(s);
3716 }
3717 MENDFUNC(2,or_w,(RW2 d, R2 s))
3718
3719 MIDFUNC(2,or_b,(RW1 d, R1 s))
3720 {
3721 CLOBBER_OR;
3722 s=readreg(s,1);
3723 d=rmw(d,1,1);
3724
3725 raw_or_b(d,s);
3726 unlock2(d);
3727 unlock2(s);
3728 }
3729 MENDFUNC(2,or_b,(RW1 d, R1 s))
3730
3731 MIDFUNC(2,adc_l,(RW4 d, R4 s))
3732 {
3733 CLOBBER_ADC;
3734 s=readreg(s,4);
3735 d=rmw(d,4,4);
3736
3737 raw_adc_l(d,s);
3738
3739 unlock2(d);
3740 unlock2(s);
3741 }
3742 MENDFUNC(2,adc_l,(RW4 d, R4 s))
3743
3744 MIDFUNC(2,adc_w,(RW2 d, R2 s))
3745 {
3746 CLOBBER_ADC;
3747 s=readreg(s,2);
3748 d=rmw(d,2,2);
3749
3750 raw_adc_w(d,s);
3751 unlock2(d);
3752 unlock2(s);
3753 }
3754 MENDFUNC(2,adc_w,(RW2 d, R2 s))
3755
3756 MIDFUNC(2,adc_b,(RW1 d, R1 s))
3757 {
3758 CLOBBER_ADC;
3759 s=readreg(s,1);
3760 d=rmw(d,1,1);
3761
3762 raw_adc_b(d,s);
3763 unlock2(d);
3764 unlock2(s);
3765 }
3766 MENDFUNC(2,adc_b,(RW1 d, R1 s))
3767
3768 MIDFUNC(2,add_l,(RW4 d, R4 s))
3769 {
3770 if (isconst(s)) {
3771 COMPCALL(add_l_ri)(d,live.state[s].val);
3772 return;
3773 }
3774
3775 CLOBBER_ADD;
3776 s=readreg(s,4);
3777 d=rmw(d,4,4);
3778
3779 raw_add_l(d,s);
3780
3781 unlock2(d);
3782 unlock2(s);
3783 }
3784 MENDFUNC(2,add_l,(RW4 d, R4 s))
3785
3786 MIDFUNC(2,add_w,(RW2 d, R2 s))
3787 {
3788 if (isconst(s)) {
3789 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
3790 return;
3791 }
3792
3793 CLOBBER_ADD;
3794 s=readreg(s,2);
3795 d=rmw(d,2,2);
3796
3797 raw_add_w(d,s);
3798 unlock2(d);
3799 unlock2(s);
3800 }
3801 MENDFUNC(2,add_w,(RW2 d, R2 s))
3802
3803 MIDFUNC(2,add_b,(RW1 d, R1 s))
3804 {
3805 if (isconst(s)) {
3806 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
3807 return;
3808 }
3809
3810 CLOBBER_ADD;
3811 s=readreg(s,1);
3812 d=rmw(d,1,1);
3813
3814 raw_add_b(d,s);
3815 unlock2(d);
3816 unlock2(s);
3817 }
3818 MENDFUNC(2,add_b,(RW1 d, R1 s))
3819
3820 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
3821 {
3822 if (!i && !needflags)
3823 return;
3824 if (isconst(d) && !needflags) {
3825 live.state[d].val-=i;
3826 return;
3827 }
3828 #if USE_OFFSET
3829 if (!needflags) {
3830 add_offset(d,-i);
3831 return;
3832 }
3833 #endif
3834
3835 CLOBBER_SUB;
3836 d=rmw(d,4,4);
3837
3838 raw_sub_l_ri(d,i);
3839 unlock2(d);
3840 }
3841 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
3842
3843 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
3844 {
3845 if (!i && !needflags)
3846 return;
3847
3848 CLOBBER_SUB;
3849 d=rmw(d,2,2);
3850
3851 raw_sub_w_ri(d,i);
3852 unlock2(d);
3853 }
3854 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
3855
3856 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
3857 {
3858 if (!i && !needflags)
3859 return;
3860
3861 CLOBBER_SUB;
3862 d=rmw(d,1,1);
3863
3864 raw_sub_b_ri(d,i);
3865
3866 unlock2(d);
3867 }
3868 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
3869
3870 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
3871 {
3872 if (!i && !needflags)
3873 return;
3874 if (isconst(d) && !needflags) {
3875 live.state[d].val+=i;
3876 return;
3877 }
3878 #if USE_OFFSET
3879 if (!needflags) {
3880 add_offset(d,i);
3881 return;
3882 }
3883 #endif
3884 CLOBBER_ADD;
3885 d=rmw(d,4,4);
3886 raw_add_l_ri(d,i);
3887 unlock2(d);
3888 }
3889 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
3890
3891 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
3892 {
3893 if (!i && !needflags)
3894 return;
3895
3896 CLOBBER_ADD;
3897 d=rmw(d,2,2);
3898
3899 raw_add_w_ri(d,i);
3900 unlock2(d);
3901 }
3902 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
3903
3904 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
3905 {
3906 if (!i && !needflags)
3907 return;
3908
3909 CLOBBER_ADD;
3910 d=rmw(d,1,1);
3911
3912 raw_add_b_ri(d,i);
3913
3914 unlock2(d);
3915 }
3916 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
3917
3918 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
3919 {
3920 CLOBBER_SBB;
3921 s=readreg(s,4);
3922 d=rmw(d,4,4);
3923
3924 raw_sbb_l(d,s);
3925 unlock2(d);
3926 unlock2(s);
3927 }
3928 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
3929
3930 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
3931 {
3932 CLOBBER_SBB;
3933 s=readreg(s,2);
3934 d=rmw(d,2,2);
3935
3936 raw_sbb_w(d,s);
3937 unlock2(d);
3938 unlock2(s);
3939 }
3940 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
3941
3942 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
3943 {
3944 CLOBBER_SBB;
3945 s=readreg(s,1);
3946 d=rmw(d,1,1);
3947
3948 raw_sbb_b(d,s);
3949 unlock2(d);
3950 unlock2(s);
3951 }
3952 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
3953
3954 MIDFUNC(2,sub_l,(RW4 d, R4 s))
3955 {
3956 if (isconst(s)) {
3957 COMPCALL(sub_l_ri)(d,live.state[s].val);
3958 return;
3959 }
3960
3961 CLOBBER_SUB;
3962 s=readreg(s,4);
3963 d=rmw(d,4,4);
3964
3965 raw_sub_l(d,s);
3966 unlock2(d);
3967 unlock2(s);
3968 }
3969 MENDFUNC(2,sub_l,(RW4 d, R4 s))
3970
3971 MIDFUNC(2,sub_w,(RW2 d, R2 s))
3972 {
3973 if (isconst(s)) {
3974 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
3975 return;
3976 }
3977
3978 CLOBBER_SUB;
3979 s=readreg(s,2);
3980 d=rmw(d,2,2);
3981
3982 raw_sub_w(d,s);
3983 unlock2(d);
3984 unlock2(s);
3985 }
3986 MENDFUNC(2,sub_w,(RW2 d, R2 s))
3987
3988 MIDFUNC(2,sub_b,(RW1 d, R1 s))
3989 {
3990 if (isconst(s)) {
3991 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
3992 return;
3993 }
3994
3995 CLOBBER_SUB;
3996 s=readreg(s,1);
3997 d=rmw(d,1,1);
3998
3999 raw_sub_b(d,s);
4000 unlock2(d);
4001 unlock2(s);
4002 }
4003 MENDFUNC(2,sub_b,(RW1 d, R1 s))
4004
4005 MIDFUNC(2,cmp_l,(R4 d, R4 s))
4006 {
4007 CLOBBER_CMP;
4008 s=readreg(s,4);
4009 d=readreg(d,4);
4010
4011 raw_cmp_l(d,s);
4012 unlock2(d);
4013 unlock2(s);
4014 }
4015 MENDFUNC(2,cmp_l,(R4 d, R4 s))
4016
4017 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4018 {
4019 CLOBBER_CMP;
4020 r=readreg(r,4);
4021
4022 raw_cmp_l_ri(r,i);
4023 unlock2(r);
4024 }
4025 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4026
4027 MIDFUNC(2,cmp_w,(R2 d, R2 s))
4028 {
4029 CLOBBER_CMP;
4030 s=readreg(s,2);
4031 d=readreg(d,2);
4032
4033 raw_cmp_w(d,s);
4034 unlock2(d);
4035 unlock2(s);
4036 }
4037 MENDFUNC(2,cmp_w,(R2 d, R2 s))
4038
4039 MIDFUNC(2,cmp_b,(R1 d, R1 s))
4040 {
4041 CLOBBER_CMP;
4042 s=readreg(s,1);
4043 d=readreg(d,1);
4044
4045 raw_cmp_b(d,s);
4046 unlock2(d);
4047 unlock2(s);
4048 }
4049 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4050
4051
4052 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4053 {
4054 CLOBBER_XOR;
4055 s=readreg(s,4);
4056 d=rmw(d,4,4);
4057
4058 raw_xor_l(d,s);
4059 unlock2(d);
4060 unlock2(s);
4061 }
4062 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4063
4064 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4065 {
4066 CLOBBER_XOR;
4067 s=readreg(s,2);
4068 d=rmw(d,2,2);
4069
4070 raw_xor_w(d,s);
4071 unlock2(d);
4072 unlock2(s);
4073 }
4074 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4075
4076 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4077 {
4078 CLOBBER_XOR;
4079 s=readreg(s,1);
4080 d=rmw(d,1,1);
4081
4082 raw_xor_b(d,s);
4083 unlock2(d);
4084 unlock2(s);
4085 }
4086 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4087
4088 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4089 {
4090 clobber_flags();
4091 remove_all_offsets();
4092 if (osize==4) {
4093 if (out1!=in1 && out1!=r) {
4094 COMPCALL(forget_about)(out1);
4095 }
4096 }
4097 else {
4098 tomem_c(out1);
4099 }
4100
4101 in1=readreg_specific(in1,isize,REG_PAR1);
4102 r=readreg(r,4);
4103 prepare_for_call_1(); /* This should ensure that there won't be
4104 any need for swapping nregs in prepare_for_call_2
4105 */
4106 #if USE_NORMAL_CALLING_CONVENTION
4107 raw_push_l_r(in1);
4108 #endif
4109 unlock2(in1);
4110 unlock2(r);
4111
4112 prepare_for_call_2();
4113 raw_call_r(r);
4114
4115 #if USE_NORMAL_CALLING_CONVENTION
4116 raw_inc_sp(4);
4117 #endif
4118
4119
4120 live.nat[REG_RESULT].holds[0]=out1;
4121 live.nat[REG_RESULT].nholds=1;
4122 live.nat[REG_RESULT].touched=touchcnt++;
4123
4124 live.state[out1].realreg=REG_RESULT;
4125 live.state[out1].realind=0;
4126 live.state[out1].val=0;
4127 live.state[out1].validsize=osize;
4128 live.state[out1].dirtysize=osize;
4129 set_status(out1,DIRTY);
4130 }
4131 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4132
4133 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4134 {
4135 clobber_flags();
4136 remove_all_offsets();
4137 in1=readreg_specific(in1,isize1,REG_PAR1);
4138 in2=readreg_specific(in2,isize2,REG_PAR2);
4139 r=readreg(r,4);
4140 prepare_for_call_1(); /* This should ensure that there won't be
4141 any need for swapping nregs in prepare_for_call_2
4142 */
4143 #if USE_NORMAL_CALLING_CONVENTION
4144 raw_push_l_r(in2);
4145 raw_push_l_r(in1);
4146 #endif
4147 unlock2(r);
4148 unlock2(in1);
4149 unlock2(in2);
4150 prepare_for_call_2();
4151 raw_call_r(r);
4152 #if USE_NORMAL_CALLING_CONVENTION
4153 raw_inc_sp(8);
4154 #endif
4155 }
4156 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4157
4158 /* forget_about() takes a mid-layer register */
4159 MIDFUNC(1,forget_about,(W4 r))
4160 {
4161 if (isinreg(r))
4162 disassociate(r);
4163 live.state[r].val=0;
4164 set_status(r,UNDEF);
4165 }
4166 MENDFUNC(1,forget_about,(W4 r))
4167
4168 MIDFUNC(0,nop,(void))
4169 {
4170 raw_nop();
4171 }
4172 MENDFUNC(0,nop,(void))
4173
4174
4175 MIDFUNC(1,f_forget_about,(FW r))
4176 {
4177 if (f_isinreg(r))
4178 f_disassociate(r);
4179 live.fate[r].status=UNDEF;
4180 }
4181 MENDFUNC(1,f_forget_about,(FW r))
4182
4183 MIDFUNC(1,fmov_pi,(FW r))
4184 {
4185 r=f_writereg(r);
4186 raw_fmov_pi(r);
4187 f_unlock(r);
4188 }
4189 MENDFUNC(1,fmov_pi,(FW r))
4190
4191 MIDFUNC(1,fmov_log10_2,(FW r))
4192 {
4193 r=f_writereg(r);
4194 raw_fmov_log10_2(r);
4195 f_unlock(r);
4196 }
4197 MENDFUNC(1,fmov_log10_2,(FW r))
4198
4199 MIDFUNC(1,fmov_log2_e,(FW r))
4200 {
4201 r=f_writereg(r);
4202 raw_fmov_log2_e(r);
4203 f_unlock(r);
4204 }
4205 MENDFUNC(1,fmov_log2_e,(FW r))
4206
4207 MIDFUNC(1,fmov_loge_2,(FW r))
4208 {
4209 r=f_writereg(r);
4210 raw_fmov_loge_2(r);
4211 f_unlock(r);
4212 }
4213 MENDFUNC(1,fmov_loge_2,(FW r))
4214
4215 MIDFUNC(1,fmov_1,(FW r))
4216 {
4217 r=f_writereg(r);
4218 raw_fmov_1(r);
4219 f_unlock(r);
4220 }
4221 MENDFUNC(1,fmov_1,(FW r))
4222
4223 MIDFUNC(1,fmov_0,(FW r))
4224 {
4225 r=f_writereg(r);
4226 raw_fmov_0(r);
4227 f_unlock(r);
4228 }
4229 MENDFUNC(1,fmov_0,(FW r))
4230
4231 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4232 {
4233 r=f_writereg(r);
4234 raw_fmov_rm(r,m);
4235 f_unlock(r);
4236 }
4237 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4238
4239 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4240 {
4241 r=f_writereg(r);
4242 raw_fmovi_rm(r,m);
4243 f_unlock(r);
4244 }
4245 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4246
4247 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4248 {
4249 r=f_readreg(r);
4250 raw_fmovi_mr(m,r);
4251 f_unlock(r);
4252 }
4253 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4254
4255 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4256 {
4257 r=f_writereg(r);
4258 raw_fmovs_rm(r,m);
4259 f_unlock(r);
4260 }
4261 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4262
4263 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4264 {
4265 r=f_readreg(r);
4266 raw_fmovs_mr(m,r);
4267 f_unlock(r);
4268 }
4269 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4270
4271 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4272 {
4273 r=f_readreg(r);
4274 raw_fmov_ext_mr(m,r);
4275 f_unlock(r);
4276 }
4277 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4278
4279 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4280 {
4281 r=f_readreg(r);
4282 raw_fmov_mr(m,r);
4283 f_unlock(r);
4284 }
4285 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4286
4287 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4288 {
4289 r=f_writereg(r);
4290 raw_fmov_ext_rm(r,m);
4291 f_unlock(r);
4292 }
4293 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4294
4295 MIDFUNC(2,fmov_rr,(FW d, FR s))
4296 {
4297 if (d==s) { /* How pointless! */
4298 return;
4299 }
4300 #if USE_F_ALIAS
4301 f_disassociate(d);
4302 s=f_readreg(s);
4303 live.fate[d].realreg=s;
4304 live.fate[d].realind=live.fat[s].nholds;
4305 live.fate[d].status=DIRTY;
4306 live.fat[s].holds[live.fat[s].nholds]=d;
4307 live.fat[s].nholds++;
4308 f_unlock(s);
4309 #else
4310 s=f_readreg(s);
4311 d=f_writereg(d);
4312 raw_fmov_rr(d,s);
4313 f_unlock(s);
4314 f_unlock(d);
4315 #endif
4316 }
4317 MENDFUNC(2,fmov_rr,(FW d, FR s))
4318
4319 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4320 {
4321 index=readreg(index,4);
4322
4323 raw_fldcw_m_indexed(index,base);
4324 unlock2(index);
4325 }
4326 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4327
4328 MIDFUNC(1,ftst_r,(FR r))
4329 {
4330 r=f_readreg(r);
4331 raw_ftst_r(r);
4332 f_unlock(r);
4333 }
4334 MENDFUNC(1,ftst_r,(FR r))
4335
4336 MIDFUNC(0,dont_care_fflags,(void))
4337 {
4338 f_disassociate(FP_RESULT);
4339 }
4340 MENDFUNC(0,dont_care_fflags,(void))
4341
4342 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4343 {
4344 s=f_readreg(s);
4345 d=f_writereg(d);
4346 raw_fsqrt_rr(d,s);
4347 f_unlock(s);
4348 f_unlock(d);
4349 }
4350 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4351
4352 MIDFUNC(2,fabs_rr,(FW d, FR s))
4353 {
4354 s=f_readreg(s);
4355 d=f_writereg(d);
4356 raw_fabs_rr(d,s);
4357 f_unlock(s);
4358 f_unlock(d);
4359 }
4360 MENDFUNC(2,fabs_rr,(FW d, FR s))
4361
4362 MIDFUNC(2,fsin_rr,(FW d, FR s))
4363 {
4364 s=f_readreg(s);
4365 d=f_writereg(d);
4366 raw_fsin_rr(d,s);
4367 f_unlock(s);
4368 f_unlock(d);
4369 }
4370 MENDFUNC(2,fsin_rr,(FW d, FR s))
4371
4372 MIDFUNC(2,fcos_rr,(FW d, FR s))
4373 {
4374 s=f_readreg(s);
4375 d=f_writereg(d);
4376 raw_fcos_rr(d,s);
4377 f_unlock(s);
4378 f_unlock(d);
4379 }
4380 MENDFUNC(2,fcos_rr,(FW d, FR s))
4381
4382 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4383 {
4384 s=f_readreg(s);
4385 d=f_writereg(d);
4386 raw_ftwotox_rr(d,s);
4387 f_unlock(s);
4388 f_unlock(d);
4389 }
4390 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4391
4392 MIDFUNC(2,fetox_rr,(FW d, FR s))
4393 {
4394 s=f_readreg(s);
4395 d=f_writereg(d);
4396 raw_fetox_rr(d,s);
4397 f_unlock(s);
4398 f_unlock(d);
4399 }
4400 MENDFUNC(2,fetox_rr,(FW d, FR s))
4401
4402 MIDFUNC(2,frndint_rr,(FW d, FR s))
4403 {
4404 s=f_readreg(s);
4405 d=f_writereg(d);
4406 raw_frndint_rr(d,s);
4407 f_unlock(s);
4408 f_unlock(d);
4409 }
4410 MENDFUNC(2,frndint_rr,(FW d, FR s))
4411
4412 MIDFUNC(2,flog2_rr,(FW d, FR s))
4413 {
4414 s=f_readreg(s);
4415 d=f_writereg(d);
4416 raw_flog2_rr(d,s);
4417 f_unlock(s);
4418 f_unlock(d);
4419 }
4420 MENDFUNC(2,flog2_rr,(FW d, FR s))
4421
4422 MIDFUNC(2,fneg_rr,(FW d, FR s))
4423 {
4424 s=f_readreg(s);
4425 d=f_writereg(d);
4426 raw_fneg_rr(d,s);
4427 f_unlock(s);
4428 f_unlock(d);
4429 }
4430 MENDFUNC(2,fneg_rr,(FW d, FR s))
4431
4432 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4433 {
4434 s=f_readreg(s);
4435 d=f_rmw(d);
4436 raw_fadd_rr(d,s);
4437 f_unlock(s);
4438 f_unlock(d);
4439 }
4440 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4441
4442 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4443 {
4444 s=f_readreg(s);
4445 d=f_rmw(d);
4446 raw_fsub_rr(d,s);
4447 f_unlock(s);
4448 f_unlock(d);
4449 }
4450 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4451
4452 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4453 {
4454 d=f_readreg(d);
4455 s=f_readreg(s);
4456 raw_fcmp_rr(d,s);
4457 f_unlock(s);
4458 f_unlock(d);
4459 }
4460 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4461
4462 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4463 {
4464 s=f_readreg(s);
4465 d=f_rmw(d);
4466 raw_fdiv_rr(d,s);
4467 f_unlock(s);
4468 f_unlock(d);
4469 }
4470 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4471
4472 MIDFUNC(2,frem_rr,(FRW d, FR s))
4473 {
4474 s=f_readreg(s);
4475 d=f_rmw(d);
4476 raw_frem_rr(d,s);
4477 f_unlock(s);
4478 f_unlock(d);
4479 }
4480 MENDFUNC(2,frem_rr,(FRW d, FR s))
4481
4482 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4483 {
4484 s=f_readreg(s);
4485 d=f_rmw(d);
4486 raw_frem1_rr(d,s);
4487 f_unlock(s);
4488 f_unlock(d);
4489 }
4490 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4491
4492 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4493 {
4494 s=f_readreg(s);
4495 d=f_rmw(d);
4496 raw_fmul_rr(d,s);
4497 f_unlock(s);
4498 f_unlock(d);
4499 }
4500 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4501
4502 /********************************************************************
4503 * Support functions exposed to gencomp. CREATE time *
4504 ********************************************************************/
4505
4506 int kill_rodent(int r)
4507 {
4508 return KILLTHERAT &&
4509 have_rat_stall &&
4510 (live.state[r].status==INMEM ||
4511 live.state[r].status==CLEAN ||
4512 live.state[r].status==ISCONST ||
4513 live.state[r].dirtysize==4);
4514 }
4515
4516 uae_u32 get_const(int r)
4517 {
4518 Dif (!isconst(r)) {
4519 write_log("Register %d should be constant, but isn't\n",r);
4520 abort();
4521 }
4522 return live.state[r].val;
4523 }
4524
4525 void sync_m68k_pc(void)
4526 {
4527 if (m68k_pc_offset) {
4528 add_l_ri(PC_P,m68k_pc_offset);
4529 comp_pc_p+=m68k_pc_offset;
4530 m68k_pc_offset=0;
4531 }
4532 }
4533
4534 /********************************************************************
4535 * Scratch registers management *
4536 ********************************************************************/
4537
4538 struct scratch_t {
4539 uae_u32 regs[VREGS];
4540 fpu_register fregs[VFREGS];
4541 };
4542
4543 static scratch_t scratch;
4544
4545 /********************************************************************
4546 * Support functions exposed to newcpu *
4547 ********************************************************************/
4548
4549 static inline const char *str_on_off(bool b)
4550 {
4551 return b ? "on" : "off";
4552 }
4553
4554 static __inline__ unsigned int cft_map (unsigned int f)
4555 {
4556 #ifndef HAVE_GET_WORD_UNSWAPPED
4557 return f;
4558 #else
4559 return ((f >> 8) & 255) | ((f & 255) << 8);
4560 #endif
4561 }
4562
4563 void compiler_init(void)
4564 {
4565 static bool initialized = false;
4566 if (initialized)
4567 return;
4568
4569 #ifndef WIN32
4570 // Open /dev/zero
4571 zero_fd = open("/dev/zero", O_RDWR);
4572 if (zero_fd < 0) {
4573 char str[200];
4574 sprintf(str, GetString(STR_NO_DEV_ZERO_ERR), strerror(errno));
4575 ErrorAlert(str);
4576 QuitEmulator();
4577 }
4578 #endif
4579
4580 #if JIT_DEBUG
4581 // JIT debug mode ?
4582 JITDebug = PrefsFindBool("jitdebug");
4583 #endif
4584 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4585
4586 #ifdef USE_JIT_FPU
4587 // Use JIT compiler for FPU instructions ?
4588 avoid_fpu = !PrefsFindBool("jitfpu");
4589 #else
4590 // JIT FPU is always disabled
4591 avoid_fpu = true;
4592 #endif
4593 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4594
4595 // Get size of the translation cache (in KB)
4596 cache_size = PrefsFindInt32("jitcachesize");
4597 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4598
4599 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4600 raw_init_cpu();
4601 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4602 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4603 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4604
4605 // Translation cache flush mechanism
4606 lazy_flush = PrefsFindBool("jitlazyflush");
4607 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4608 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4609
4610 // Compiler features
4611 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4612 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4613 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4614 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4615
4616 // Build compiler tables
4617 build_comp();
4618
4619 initialized = true;
4620
4621 #if PROFILE_COMPILE_TIME
4622 write_log("<JIT compiler> : gather statistics on translation time\n");
4623 emul_start_time = clock();
4624 #endif
4625 }
4626
4627 void compiler_exit(void)
4628 {
4629 #if PROFILE_COMPILE_TIME
4630 emul_end_time = clock();
4631 #endif
4632
4633 // Deallocate translation cache
4634 if (compiled_code) {
4635 vm_release(compiled_code, cache_size * 1024);
4636 compiled_code = 0;
4637 }
4638
4639 #ifndef WIN32
4640 // Close /dev/zero
4641 if (zero_fd > 0)
4642 close(zero_fd);
4643 #endif
4644
4645 #if PROFILE_COMPILE_TIME
4646 write_log("### Compile Block statistics\n");
4647 write_log("Number of calls to compile_block : %d\n", compile_count);
4648 uae_u32 emul_time = emul_end_time - emul_start_time;
4649 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
4650 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
4651 100.0*double(compile_time)/double(emul_time));
4652 write_log("\n");
4653 #endif
4654 }
4655
4656 bool compiler_use_jit(void)
4657 {
4658 // Check for the "jit" prefs item
4659 if (!PrefsFindBool("jit"))
4660 return false;
4661
4662 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
4663 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
4664 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
4665 return false;
4666 }
4667
4668 // FIXME: there are currently problems with JIT compilation and anything below a 68040
4669 if (CPUType < 4) {
4670 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
4671 return false;
4672 }
4673
4674 return true;
4675 }
4676
4677 void init_comp(void)
4678 {
4679 int i;
4680 uae_s8* cb=can_byte;
4681 uae_s8* cw=can_word;
4682 uae_s8* au=always_used;
4683
4684 for (i=0;i<VREGS;i++) {
4685 live.state[i].realreg=-1;
4686 live.state[i].needflush=NF_SCRATCH;
4687 live.state[i].val=0;
4688 set_status(i,UNDEF);
4689 }
4690
4691 for (i=0;i<VFREGS;i++) {
4692 live.fate[i].status=UNDEF;
4693 live.fate[i].realreg=-1;
4694 live.fate[i].needflush=NF_SCRATCH;
4695 }
4696
4697 for (i=0;i<VREGS;i++) {
4698 if (i<16) { /* First 16 registers map to 68k registers */
4699 live.state[i].mem=((uae_u32*)&regs)+i;
4700 live.state[i].needflush=NF_TOMEM;
4701 set_status(i,INMEM);
4702 }
4703 else
4704 live.state[i].mem=scratch.regs+i;
4705 }
4706 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
4707 live.state[PC_P].needflush=NF_TOMEM;
4708 set_const(PC_P,(uae_u32)comp_pc_p);
4709
4710 live.state[FLAGX].mem=&(regflags.x);
4711 live.state[FLAGX].needflush=NF_TOMEM;
4712 set_status(FLAGX,INMEM);
4713
4714 live.state[FLAGTMP].mem=&(regflags.cznv);
4715 live.state[FLAGTMP].needflush=NF_TOMEM;
4716 set_status(FLAGTMP,INMEM);
4717
4718 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
4719 set_status(NEXT_HANDLER,UNDEF);
4720
4721 for (i=0;i<VFREGS;i++) {
4722 if (i<8) { /* First 8 registers map to 68k FPU registers */
4723 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
4724 live.fate[i].needflush=NF_TOMEM;
4725 live.fate[i].status=INMEM;
4726 }
4727 else if (i==FP_RESULT) {
4728 live.fate[i].mem=(uae_u32*)(&fpu.result);
4729 live.fate[i].needflush=NF_TOMEM;
4730 live.fate[i].status=INMEM;
4731 }
4732 else
4733 live.fate[i].mem=(uae_u32*)(scratch.fregs+i);
4734 }
4735
4736
4737 for (i=0;i<N_REGS;i++) {
4738 live.nat[i].touched=0;
4739 live.nat[i].nholds=0;
4740 live.nat[i].locked=0;
4741 if (*cb==i) {
4742 live.nat[i].canbyte=1; cb++;
4743 } else live.nat[i].canbyte=0;
4744 if (*cw==i) {
4745 live.nat[i].canword=1; cw++;
4746 } else live.nat[i].canword=0;
4747 if (*au==i) {
4748 live.nat[i].locked=1; au++;
4749 }
4750 }
4751
4752 for (i=0;i<N_FREGS;i++) {
4753 live.fat[i].touched=0;
4754 live.fat[i].nholds=0;
4755 live.fat[i].locked=0;
4756 }
4757
4758 touchcnt=1;
4759 m68k_pc_offset=0;
4760 live.flags_in_flags=TRASH;
4761 live.flags_on_stack=VALID;
4762 live.flags_are_important=1;
4763
4764 raw_fp_init();
4765 }
4766
4767 /* Only do this if you really mean it! The next call should be to init!*/
4768 void flush(int save_regs)
4769 {
4770 int fi,i;
4771
4772 log_flush();
4773 flush_flags(); /* low level */
4774 sync_m68k_pc(); /* mid level */
4775
4776 if (save_regs) {
4777 for (i=0;i<VFREGS;i++) {
4778 if (live.fate[i].needflush==NF_SCRATCH ||
4779 live.fate[i].status==CLEAN) {
4780 f_disassociate(i);
4781 }
4782 }
4783 for (i=0;i<VREGS;i++) {
4784 if (live.state[i].needflush==NF_TOMEM) {
4785 switch(live.state[i].status) {
4786 case INMEM:
4787 if (live.state[i].val) {
4788 raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
4789 log_vwrite(i);
4790 live.state[i].val=0;
4791 }
4792 break;
4793 case CLEAN:
4794 case DIRTY:
4795 remove_offset(i,-1); tomem(i); break;
4796 case ISCONST:
4797 if (i!=PC_P)
4798 writeback_const(i);
4799 break;
4800 default: break;
4801 }
4802 Dif (live.state[i].val && i!=PC_P) {
4803 write_log("Register %d still has val %x\n",
4804 i,live.state[i].val);
4805 }
4806 }
4807 }
4808 for (i=0;i<VFREGS;i++) {
4809 if (live.fate[i].needflush==NF_TOMEM &&
4810 live.fate[i].status==DIRTY) {
4811 f_evict(i);
4812 }
4813 }
4814 raw_fp_cleanup_drop();
4815 }
4816 if (needflags) {
4817 write_log("Warning! flush with needflags=1!\n");
4818 }
4819 }
4820
4821 static void flush_keepflags(void)
4822 {
4823 int fi,i;
4824
4825 for (i=0;i<VFREGS;i++) {
4826 if (live.fate[i].needflush==NF_SCRATCH ||
4827 live.fate[i].status==CLEAN) {
4828 f_disassociate(i);
4829 }
4830 }
4831 for (i=0;i<VREGS;i++) {
4832 if (live.state[i].needflush==NF_TOMEM) {
4833 switch(live.state[i].status) {
4834 case INMEM:
4835 /* Can't adjust the offset here --- that needs "add" */
4836 break;
4837 case CLEAN:
4838 case DIRTY:
4839 remove_offset(i,-1); tomem(i); break;
4840 case ISCONST:
4841 if (i!=PC_P)
4842 writeback_const(i);
4843 break;
4844 default: break;
4845 }
4846 }
4847 }
4848 for (i=0;i<VFREGS;i++) {
4849 if (live.fate[i].needflush==NF_TOMEM &&
4850 live.fate[i].status==DIRTY) {
4851 f_evict(i);
4852 }
4853 }
4854 raw_fp_cleanup_drop();
4855 }
4856
4857 void freescratch(void)
4858 {
4859 int i;
4860 for (i=0;i<N_REGS;i++)
4861 if (live.nat[i].locked && i!=4)
4862 write_log("Warning! %d is locked\n",i);
4863
4864 for (i=0;i<VREGS;i++)
4865 if (live.state[i].needflush==NF_SCRATCH) {
4866 forget_about(i);
4867 }
4868
4869 for (i=0;i<VFREGS;i++)
4870 if (live.fate[i].needflush==NF_SCRATCH) {
4871 f_forget_about(i);
4872 }
4873 }
4874
4875 /********************************************************************
4876 * Support functions, internal *
4877 ********************************************************************/
4878
4879
4880 static void align_target(uae_u32 a)
4881 {
4882 /* Fill with NOPs --- makes debugging with gdb easier */
4883 while ((uae_u32)target&(a-1))
4884 *target++=0x90;
4885 }
4886
4887 static __inline__ int isinrom(uintptr addr)
4888 {
4889 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
4890 }
4891
4892 static void flush_all(void)
4893 {
4894 int i;
4895
4896 log_flush();
4897 for (i=0;i<VREGS;i++)
4898 if (live.state[i].status==DIRTY) {
4899 if (!call_saved[live.state[i].realreg]) {
4900 tomem(i);
4901 }
4902 }
4903 for (i=0;i<VFREGS;i++)
4904 if (f_isinreg(i))
4905 f_evict(i);
4906 raw_fp_cleanup_drop();
4907 }
4908
4909 /* Make sure all registers that will get clobbered by a call are
4910 save and sound in memory */
4911 static void prepare_for_call_1(void)
4912 {
4913 flush_all(); /* If there are registers that don't get clobbered,
4914 * we should be a bit more selective here */
4915 }
4916
4917 /* We will call a C routine in a moment. That will clobber all registers,
4918 so we need to disassociate everything */
4919 static void prepare_for_call_2(void)
4920 {
4921 int i;
4922 for (i=0;i<N_REGS;i++)
4923 if (!call_saved[i] && live.nat[i].nholds>0)
4924 free_nreg(i);
4925
4926 for (i=0;i<N_FREGS;i++)
4927 if (live.fat[i].nholds>0)
4928 f_free_nreg(i);
4929
4930 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
4931 flags at the very start of the call_r
4932 functions! */
4933 }
4934
4935 /********************************************************************
4936 * Memory access and related functions, CREATE time *
4937 ********************************************************************/
4938
4939 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
4940 {
4941 next_pc_p=not_taken;
4942 taken_pc_p=taken;
4943 branch_cc=cond;
4944 }
4945
4946
4947 static uae_u32 get_handler_address(uae_u32 addr)
4948 {
4949 uae_u32 cl=cacheline(addr);
4950 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
4951 return (uae_u32)&(bi->direct_handler_to_use);
4952 }
4953
4954 static uae_u32 get_handler(uae_u32 addr)
4955 {
4956 uae_u32 cl=cacheline(addr);
4957 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
4958 return (uae_u32)bi->direct_handler_to_use;
4959 }
4960
4961 static void load_handler(int reg, uae_u32 addr)
4962 {
4963 mov_l_rm(reg,get_handler_address(addr));
4964 }
4965
4966 /* This version assumes that it is writing *real* memory, and *will* fail
4967 * if that assumption is wrong! No branches, no second chances, just
4968 * straight go-for-it attitude */
4969
4970 static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
4971 {
4972 int f=tmp;
4973
4974 if (clobber)
4975 f=source;
4976 switch(size) {
4977 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
4978 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
4979 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
4980 }
4981 forget_about(tmp);
4982 forget_about(f);
4983 }
4984
4985 void writebyte(int address, int source, int tmp)
4986 {
4987 writemem_real(address,source,20,1,tmp,0);
4988 }
4989
4990 static __inline__ void writeword_general(int address, int source, int tmp,
4991 int clobber)
4992 {
4993 writemem_real(address,source,16,2,tmp,clobber);
4994 }
4995
4996 void writeword_clobber(int address, int source, int tmp)
4997 {
4998 writeword_general(address,source,tmp,1);
4999 }
5000
5001 void writeword(int address, int source, int tmp)
5002 {
5003 writeword_general(address,source,tmp,0);
5004 }
5005
5006 static __inline__ void writelong_general(int address, int source, int tmp,
5007 int clobber)
5008 {
5009 writemem_real(address,source,12,4,tmp,clobber);
5010 }
5011
5012 void writelong_clobber(int address, int source, int tmp)
5013 {
5014 writelong_general(address,source,tmp,1);
5015 }
5016
5017 void writelong(int address, int source, int tmp)
5018 {
5019 writelong_general(address,source,tmp,0);
5020 }
5021
5022
5023
5024 /* This version assumes that it is reading *real* memory, and *will* fail
5025 * if that assumption is wrong! No branches, no second chances, just
5026 * straight go-for-it attitude */
5027
5028 static void readmem_real(int address, int dest, int offset, int size, int tmp)
5029 {
5030 int f=tmp;
5031
5032 if (size==4 && address!=dest)
5033 f=dest;
5034
5035 switch(size) {
5036 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5037 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5038 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5039 }
5040 forget_about(tmp);
5041 }
5042
5043 void readbyte(int address, int dest, int tmp)
5044 {
5045 readmem_real(address,dest,8,1,tmp);
5046 }
5047
5048 void readword(int address, int dest, int tmp)
5049 {
5050 readmem_real(address,dest,4,2,tmp);
5051 }
5052
5053 void readlong(int address, int dest, int tmp)
5054 {
5055 readmem_real(address,dest,0,4,tmp);
5056 }
5057
5058 void get_n_addr(int address, int dest, int tmp)
5059 {
5060 // a is the register containing the virtual address
5061 // after the offset had been fetched
5062 int a=tmp;
5063
5064 // f is the register that will contain the offset
5065 int f=tmp;
5066
5067 // a == f == tmp if (address == dest)
5068 if (address!=dest) {
5069 a=address;
5070 f=dest;
5071 }
5072
5073 #if REAL_ADDRESSING
5074 mov_l_rr(dest, address);
5075 #elif DIRECT_ADDRESSING
5076 lea_l_brr(dest,address,MEMBaseDiff);
5077 #endif
5078 forget_about(tmp);
5079 }
5080
5081 void get_n_addr_jmp(int address, int dest, int tmp)
5082 {
5083 /* For this, we need to get the same address as the rest of UAE
5084 would --- otherwise we end up translating everything twice */
5085 get_n_addr(address,dest,tmp);
5086 }
5087
5088
5089 /* base is a register, but dp is an actual value.
5090 target is a register, as is tmp */
5091 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5092 {
5093 int reg = (dp >> 12) & 15;
5094 int regd_shift=(dp >> 9) & 3;
5095
5096 if (dp & 0x100) {
5097 int ignorebase=(dp&0x80);
5098 int ignorereg=(dp&0x40);
5099 int addbase=0;
5100 int outer=0;
5101
5102 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5103 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5104
5105 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5106 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5107
5108 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5109 if (!ignorereg) {
5110 if ((dp & 0x800) == 0)
5111 sign_extend_16_rr(target,reg);
5112 else
5113 mov_l_rr(target,reg);
5114 shll_l_ri(target,regd_shift);
5115 }
5116 else
5117 mov_l_ri(target,0);
5118
5119 /* target is now regd */
5120 if (!ignorebase)
5121 add_l(target,base);
5122 add_l_ri(target,addbase);
5123 if (dp&0x03) readlong(target,target,tmp);
5124 } else { /* do the getlong first, then add regd */
5125 if (!ignorebase) {
5126 mov_l_rr(target,base);
5127 add_l_ri(target,addbase);
5128 }
5129 else
5130 mov_l_ri(target,addbase);
5131 if (dp&0x03) readlong(target,target,tmp);
5132
5133 if (!ignorereg) {
5134 if ((dp & 0x800) == 0)
5135 sign_extend_16_rr(tmp,reg);
5136 else
5137 mov_l_rr(tmp,reg);
5138 shll_l_ri(tmp,regd_shift);
5139 /* tmp is now regd */
5140 add_l(target,tmp);
5141 }
5142 }
5143 add_l_ri(target,outer);
5144 }
5145 else { /* 68000 version */
5146 if ((dp & 0x800) == 0) { /* Sign extend */
5147 sign_extend_16_rr(target,reg);
5148 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5149 }
5150 else {
5151 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5152 }
5153 }
5154 forget_about(tmp);
5155 }
5156
5157
5158
5159
5160
5161 void set_cache_state(int enabled)
5162 {
5163 if (enabled!=letit)
5164 flush_icache_hard(77);
5165 letit=enabled;
5166 }
5167
5168 int get_cache_state(void)
5169 {
5170 return letit;
5171 }
5172
5173 uae_u32 get_jitted_size(void)
5174 {
5175 if (compiled_code)
5176 return current_compile_p-compiled_code;
5177 return 0;
5178 }
5179
5180 void alloc_cache(void)
5181 {
5182 if (compiled_code) {
5183 flush_icache_hard(6);
5184 vm_release(compiled_code, cache_size * 1024);
5185 compiled_code = 0;
5186 }
5187
5188 if (cache_size == 0)
5189 return;
5190
5191 while (!compiled_code && cache_size) {
5192 if ((compiled_code = (uae_u8 *)vm_acquire(cache_size * 1024)) == VM_MAP_FAILED) {
5193 compiled_code = 0;
5194 cache_size /= 2;
5195 }
5196 }
5197 vm_protect(compiled_code, cache_size, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5198
5199 if (compiled_code) {
5200 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5201 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5202 current_compile_p = compiled_code;
5203 current_cache_size = 0;
5204 }
5205 }
5206
5207
5208
5209 extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM;
5210
5211 static void calc_checksum(CSI_TYPE* csi, uae_u32* c1, uae_u32* c2)
5212 {
5213 uae_u32 k1=0;
5214 uae_u32 k2=0;
5215 uae_s32 len=CSI_LENGTH(csi);
5216 uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5217 uae_u32* pos;
5218
5219 len+=(tmp&3);
5220 tmp&=(~3);
5221 pos=(uae_u32*)tmp;
5222
5223 if (len<0 || len>MAX_CHECKSUM_LEN) {
5224 *c1=0;
5225 *c2=0;
5226 }
5227 else {
5228 while (len>0) {
5229 k1+=*pos;
5230 k2^=*pos;
5231 pos++;
5232 len-=4;
5233 }
5234 *c1=k1;
5235 *c2=k2;
5236 }
5237 }
5238
5239 static void show_checksum(CSI_TYPE* csi)
5240 {
5241 uae_u32 k1=0;
5242 uae_u32 k2=0;
5243 uae_s32 len=CSI_LENGTH(csi);
5244 uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5245 uae_u32* pos;
5246
5247 len+=(tmp&3);
5248 tmp&=(~3);
5249 pos=(uae_u32*)tmp;
5250
5251 if (len<0 || len>MAX_CHECKSUM_LEN) {
5252 return;
5253 }
5254 else {
5255 while (len>0) {
5256 write_log("%08x ",*pos);
5257 pos++;
5258 len-=4;
5259 }
5260 write_log(" bla\n");
5261 }
5262 }
5263
5264
5265 int check_for_cache_miss(void)
5266 {
5267 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5268
5269 if (bi) {
5270 int cl=cacheline(regs.pc_p);
5271 if (bi!=cache_tags[cl+1].bi) {
5272 raise_in_cl_list(bi);
5273 return 1;
5274 }
5275 }
5276 return 0;
5277 }
5278
5279
5280 static void recompile_block(void)
5281 {
5282 /* An existing block's countdown code has expired. We need to make
5283 sure that execute_normal doesn't refuse to recompile due to a
5284 perceived cache miss... */
5285 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5286
5287 Dif (!bi)
5288 abort();
5289 raise_in_cl_list(bi);
5290 execute_normal();
5291 return;
5292 }
5293 static void cache_miss(void)
5294 {
5295 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5296 uae_u32 cl=cacheline(regs.pc_p);
5297 blockinfo* bi2=get_blockinfo(cl);
5298
5299 if (!bi) {
5300 execute_normal(); /* Compile this block now */
5301 return;
5302 }
5303 Dif (!bi2 || bi==bi2) {
5304 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5305 abort();
5306 }
5307 raise_in_cl_list(bi);
5308 return;
5309 }
5310
5311 static int called_check_checksum(blockinfo* bi);
5312
5313 static inline int block_check_checksum(blockinfo* bi)
5314 {
5315 uae_u32 c1,c2;
5316 bool isgood;
5317
5318 if (bi->status!=BI_NEED_CHECK)
5319 return 1; /* This block is in a checked state */
5320
5321 checksum_count++;
5322
5323 #if USE_CHECKSUM_INFO
5324 checksum_info *csi = bi->csi;
5325 Dif(!csi) abort();
5326 isgood = true;
5327 while (csi && isgood) {
5328 if (csi->c1 || csi->c2)
5329 calc_checksum(csi,&c1,&c2);
5330 else
5331 c1 = c2 = 1; /* Make sure it doesn't match */
5332 isgood = isgood && (c1 == csi->c1 && c2 == csi->c2);
5333 csi = csi->next;
5334 }
5335 #else
5336 if (bi->c1 || bi->c2)
5337 calc_checksum(bi,&c1,&c2);
5338 else {
5339 c1=c2=1; /* Make sure it doesn't match */
5340 }
5341
5342 isgood=(c1==bi->c1 && c2==bi->c2);
5343 #endif
5344
5345 if (isgood) {
5346 /* This block is still OK. So we reactivate. Of course, that
5347 means we have to move it into the needs-to-be-flushed list */
5348 bi->handler_to_use=bi->handler;
5349 set_dhtu(bi,bi->direct_handler);
5350 bi->status=BI_CHECKING;
5351 isgood=called_check_checksum(bi);
5352 }
5353 if (isgood) {
5354 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5355 c1,c2,bi->c1,bi->c2);*/
5356 remove_from_list(bi);
5357 add_to_active(bi);
5358 raise_in_cl_list(bi);
5359 bi->status=BI_ACTIVE;
5360 }
5361 else {
5362 /* This block actually changed. We need to invalidate it,
5363 and set it up to be recompiled */
5364 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5365 c1,c2,bi->c1,bi->c2); */
5366 invalidate_block(bi);
5367 raise_in_cl_list(bi);
5368 }
5369 return isgood;
5370 }
5371
5372 static int called_check_checksum(blockinfo* bi)
5373 {
5374 dependency* x=bi->deplist;
5375 int isgood=1;
5376 int i;
5377
5378 for (i=0;i<2 && isgood;i++) {
5379 if (bi->dep[i].jmp_off) {
5380 isgood=block_check_checksum(bi->dep[i].target);
5381 }
5382 }
5383 return isgood;
5384 }
5385
5386 static void check_checksum(void)
5387 {
5388 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5389 uae_u32 cl=cacheline(regs.pc_p);
5390 blockinfo* bi2=get_blockinfo(cl);
5391
5392 /* These are not the droids you are looking for... */
5393 if (!bi) {
5394 /* Whoever is the primary target is in a dormant state, but
5395 calling it was accidental, and we should just compile this
5396 new block */
5397 execute_normal();
5398 return;
5399 }
5400 if (bi!=bi2) {
5401 /* The block was hit accidentally, but it does exist. Cache miss */
5402 cache_miss();
5403 return;
5404 }
5405
5406 if (!block_check_checksum(bi))
5407 execute_normal();
5408 }
5409
5410 static __inline__ void match_states(blockinfo* bi)
5411 {
5412 int i;
5413 smallstate* s=&(bi->env);
5414
5415 if (bi->status==BI_NEED_CHECK) {
5416 block_check_checksum(bi);
5417 }
5418 if (bi->status==BI_ACTIVE ||
5419 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5420 block makes (about not using
5421 certain vregs) */
5422 for (i=0;i<16;i++) {
5423 if (s->virt[i]==L_UNNEEDED) {
5424 // write_log("unneeded reg %d at %p\n",i,target);
5425 COMPCALL(forget_about)(i); // FIXME
5426 }
5427 }
5428 }
5429 flush(1);
5430
5431 /* And now deal with the *demands* the block makes */
5432 for (i=0;i<N_REGS;i++) {
5433 int v=s->nat[i];
5434 if (v>=0) {
5435 // printf("Loading reg %d into %d at %p\n",v,i,target);
5436 readreg_specific(v,4,i);
5437 // do_load_reg(i,v);
5438 // setlock(i);
5439 }
5440 }
5441 for (i=0;i<N_REGS;i++) {
5442 int v=s->nat[i];
5443 if (v>=0) {
5444 unlock2(i);
5445 }
5446 }
5447 }
5448
5449 static uae_u8 popallspace[1024]; /* That should be enough space */
5450
5451 static __inline__ void create_popalls(void)
5452 {
5453 int i,r;
5454
5455 current_compile_p=popallspace;
5456 set_target(current_compile_p);
5457 #if USE_PUSH_POP
5458 /* If we can't use gcc inline assembly, we need to pop some
5459 registers before jumping back to the various get-out routines.
5460 This generates the code for it.
5461 */
5462 align_target(align_jumps);
5463 popall_do_nothing=get_target();
5464 for (i=0;i<N_REGS;i++) {
5465 if (need_to_preserve[i])
5466 raw_pop_l_r(i);
5467 }
5468 raw_jmp((uae_u32)do_nothing);
5469
5470 align_target(align_jumps);
5471 popall_execute_normal=get_target();
5472 for (i=0;i<N_REGS;i++) {
5473 if (need_to_preserve[i])
5474 raw_pop_l_r(i);
5475 }
5476 raw_jmp((uae_u32)execute_normal);
5477
5478 align_target(align_jumps);
5479 popall_cache_miss=get_target();
5480 for (i=0;i<N_REGS;i++) {
5481 if (need_to_preserve[i])
5482 raw_pop_l_r(i);
5483 }
5484 raw_jmp((uae_u32)cache_miss);
5485
5486 align_target(align_jumps);
5487 popall_recompile_block=get_target();
5488 for (i=0;i<N_REGS;i++) {
5489 if (need_to_preserve[i])
5490 raw_pop_l_r(i);
5491 }
5492 raw_jmp((uae_u32)recompile_block);
5493
5494 align_target(align_jumps);
5495 popall_exec_nostats=get_target();
5496 for (i=0;i<N_REGS;i++) {
5497 if (need_to_preserve[i])
5498 raw_pop_l_r(i);
5499 }
5500 raw_jmp((uae_u32)exec_nostats);
5501
5502 align_target(align_jumps);
5503 popall_check_checksum=get_target();
5504 for (i=0;i<N_REGS;i++) {
5505 if (need_to_preserve[i])
5506 raw_pop_l_r(i);
5507 }
5508 raw_jmp((uae_u32)check_checksum);
5509
5510 align_target(align_jumps);
5511 current_compile_p=get_target();
5512 #else
5513 popall_exec_nostats=(void *)exec_nostats;
5514 popall_execute_normal=(void *)execute_normal;
5515 popall_cache_miss=(void *)cache_miss;
5516 popall_recompile_block=(void *)recompile_block;
5517 popall_do_nothing=(void *)do_nothing;
5518 popall_check_checksum=(void *)check_checksum;
5519 #endif
5520
5521 /* And now, the code to do the matching pushes and then jump
5522 into a handler routine */
5523 pushall_call_handler=get_target();
5524 #if USE_PUSH_POP
5525 for (i=N_REGS;i--;) {
5526 if (need_to_preserve[i])
5527 raw_push_l_r(i);
5528 }
5529 #endif
5530 r=REG_PC_TMP;
5531 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5532 raw_and_l_ri(r,TAGMASK);
5533 raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5534
5535 #ifdef X86_ASSEMBLY
5536 align_target(align_jumps);
5537 m68k_compile_execute = (void (*)(void))get_target();
5538 for (i=N_REGS;i--;) {
5539 if (need_to_preserve[i])
5540 raw_push_l_r(i);
5541 }
5542 align_target(align_loops);
5543 uae_u32 dispatch_loop = (uae_u32)get_target();
5544 r=REG_PC_TMP;
5545 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5546 raw_and_l_ri(r,TAGMASK);
5547 raw_call_m_indexed((uae_u32)cache_tags,r,4);
5548 raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5549 raw_jcc_b_oponly(NATIVE_CC_EQ);
5550 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5551 raw_call((uae_u32)m68k_do_specialties);
5552 raw_test_l_rr(REG_RESULT,REG_RESULT);
5553 raw_jcc_b_oponly(NATIVE_CC_EQ);
5554 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5555 raw_cmp_b_mi((uae_u32)&quit_program,0);
5556 raw_jcc_b_oponly(NATIVE_CC_EQ);
5557 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5558 for (i=0;i<N_REGS;i++) {
5559 if (need_to_preserve[i])
5560 raw_pop_l_r(i);
5561 }
5562 raw_ret();
5563 #endif
5564 }
5565
5566 static __inline__ void reset_lists(void)
5567 {
5568 int i;
5569
5570 for (i=0;i<MAX_HOLD_BI;i++)
5571 hold_bi[i]=NULL;
5572 active=NULL;
5573 dormant=NULL;
5574 }
5575
5576 static void prepare_block(blockinfo* bi)
5577 {
5578 int i;
5579
5580 set_target(current_compile_p);
5581 align_target(align_jumps);
5582 bi->direct_pen=(cpuop_func *)get_target();
5583 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5584 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5585 raw_jmp((uae_u32)popall_execute_normal);
5586
5587 align_target(align_jumps);
5588 bi->direct_pcc=(cpuop_func *)get_target();
5589 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5590 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5591 raw_jmp((uae_u32)popall_check_checksum);
5592 current_compile_p=get_target();
5593
5594 bi->deplist=NULL;
5595 for (i=0;i<2;i++) {
5596 bi->dep[i].prev_p=NULL;
5597 bi->dep[i].next=NULL;
5598 }
5599 bi->env=default_ss;
5600 bi->status=BI_INVALID;
5601 bi->havestate=0;
5602 //bi->env=empty_ss;
5603 }
5604
5605 void build_comp(void)
5606 {
5607 int i;
5608 int jumpcount=0;
5609 unsigned long opcode;
5610 struct comptbl* tbl=op_smalltbl_0_comp_ff;
5611 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
5612 int count;
5613 int cpu_level = 0; // 68000 (default)
5614 if (CPUType == 4)
5615 cpu_level = 4; // 68040 with FPU
5616 else {
5617 if (FPUType)
5618 cpu_level = 3; // 68020 with FPU
5619 else if (CPUType >= 2)
5620 cpu_level = 2; // 68020
5621 else if (CPUType == 1)
5622 cpu_level = 1;
5623 }
5624 struct cputbl *nfctbl = (
5625 cpu_level == 4 ? op_smalltbl_0_nf
5626 : cpu_level == 3 ? op_smalltbl_1_nf
5627 : cpu_level == 2 ? op_smalltbl_2_nf
5628 : cpu_level == 1 ? op_smalltbl_3_nf
5629 : op_smalltbl_4_nf);
5630
5631 write_log ("<JIT compiler> : building compiler function tables\n");
5632
5633 for (opcode = 0; opcode < 65536; opcode++) {
5634 nfcpufunctbl[opcode] = op_illg_1;
5635 compfunctbl[opcode] = NULL;
5636 nfcompfunctbl[opcode] = NULL;
5637 prop[opcode].use_flags = 0x1f;
5638 prop[opcode].set_flags = 0x1f;
5639 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
5640 }
5641
5642 for (i = 0; tbl[i].opcode < 65536; i++) {
5643 int cflow = table68k[tbl[i].opcode].cflow;
5644 prop[cft_map(tbl[i].opcode)].cflow = cflow;
5645
5646 int uses_fpu = tbl[i].specific & 32;
5647 if (uses_fpu && avoid_fpu)
5648 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
5649 else
5650 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
5651 }
5652
5653 for (i = 0; nftbl[i].opcode < 65536; i++) {
5654 int uses_fpu = tbl[i].specific & 32;
5655 if (uses_fpu && avoid_fpu)
5656 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
5657 else
5658 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
5659
5660 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
5661 }
5662
5663 for (i = 0; nfctbl[i].handler; i++) {
5664 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
5665 }
5666
5667 for (opcode = 0; opcode < 65536; opcode++) {
5668 compop_func *f;
5669 compop_func *nff;
5670 cpuop_func *nfcf;
5671 int isaddx,cflow;
5672
5673 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
5674 continue;
5675
5676 if (table68k[opcode].handler != -1) {
5677 f = compfunctbl[cft_map(table68k[opcode].handler)];
5678 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
5679 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
5680 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
5681 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
5682 prop[cft_map(opcode)].cflow = cflow;
5683 prop[cft_map(opcode)].is_addx = isaddx;
5684 compfunctbl[cft_map(opcode)] = f;
5685 nfcompfunctbl[cft_map(opcode)] = nff;
5686 Dif (nfcf == op_illg_1)
5687 abort();
5688 nfcpufunctbl[cft_map(opcode)] = nfcf;
5689 }
5690 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
5691 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
5692 }
5693 for (i = 0; nfctbl[i].handler != NULL; i++) {
5694 if (nfctbl[i].specific)
5695 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
5696 }
5697
5698 count=0;
5699 for (opcode = 0; opcode < 65536; opcode++) {
5700 if (compfunctbl[cft_map(opcode)])
5701 count++;
5702 }
5703 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
5704
5705 /* Initialise state */
5706 create_popalls();
5707 alloc_cache();
5708 reset_lists();
5709
5710 for (i=0;i<TAGSIZE;i+=2) {
5711 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
5712 cache_tags[i+1].bi=NULL;
5713 }
5714
5715 #if 0
5716 for (i=0;i<N_REGS;i++) {
5717 empty_ss.nat[i].holds=-1;
5718 empty_ss.nat[i].validsize=0;
5719 empty_ss.nat[i].dirtysize=0;
5720 }
5721 #endif
5722 for (i=0;i<VREGS;i++) {
5723 empty_ss.virt[i]=L_NEEDED;
5724 }
5725 for (i=0;i<N_REGS;i++) {
5726 empty_ss.nat[i]=L_UNKNOWN;
5727 }
5728 default_ss=empty_ss;
5729 }
5730
5731
5732 static void flush_icache_none(int n)
5733 {
5734 /* Nothing to do. */
5735 }
5736
5737 static void flush_icache_hard(int n)
5738 {
5739 uae_u32 i;
5740 blockinfo* bi, *dbi;
5741
5742 hard_flush_count++;
5743 #if 0
5744 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
5745 n,regs.pc,regs.pc_p,current_cache_size/1024);
5746 current_cache_size = 0;
5747 #endif
5748 bi=active;
5749 while(bi) {
5750 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
5751 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
5752 dbi=bi; bi=bi->next;
5753 free_blockinfo(dbi);
5754 }
5755 bi=dormant;
5756 while(bi) {
5757 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
5758 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
5759 dbi=bi; bi=bi->next;
5760 free_blockinfo(dbi);
5761 }
5762
5763 reset_lists();
5764 if (!compiled_code)
5765 return;
5766 current_compile_p=compiled_code;
5767 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
5768 }
5769
5770
5771 /* "Soft flushing" --- instead of actually throwing everything away,
5772 we simply mark everything as "needs to be checked".
5773 */
5774
5775 static inline void flush_icache_lazy(int n)
5776 {
5777 uae_u32 i;
5778 blockinfo* bi;
5779 blockinfo* bi2;
5780
5781 soft_flush_count++;
5782 if (!active)
5783 return;
5784
5785 bi=active;
5786 while (bi) {
5787 uae_u32 cl=cacheline(bi->pc_p);
5788 if (bi->status==BI_INVALID ||
5789 bi->status==BI_NEED_RECOMP) {
5790 if (bi==cache_tags[cl+1].bi)
5791 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
5792 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
5793 set_dhtu(bi,bi->direct_pen);
5794 bi->status=BI_INVALID;
5795 }
5796 else {
5797 if (bi==cache_tags[cl+1].bi)
5798 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
5799 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
5800 set_dhtu(bi,bi->direct_pcc);
5801 bi->status=BI_NEED_CHECK;
5802 }
5803 bi2=bi;
5804 bi=bi->next;
5805 }
5806 /* bi2 is now the last entry in the active list */
5807 bi2->next=dormant;
5808 if (dormant)
5809 dormant->prev_p=&(bi2->next);
5810
5811 dormant=active;
5812 active->prev_p=&dormant;
5813 active=NULL;
5814 }
5815
5816 static void catastrophe(void)
5817 {
5818 abort();
5819 }
5820
5821 int failure;
5822
5823 #define TARGET_M68K 0
5824 #define TARGET_POWERPC 1
5825 #define TARGET_X86 2
5826 #if defined(i386) || defined(__i386__)
5827 #define TARGET_NATIVE TARGET_X86
5828 #endif
5829 #if defined(powerpc) || defined(__powerpc__)
5830 #define TARGET_NATIVE TARGET_POWERPC
5831 #endif
5832
5833 #ifdef ENABLE_MON
5834 static uae_u32 mon_read_byte_jit(uae_u32 addr)
5835 {
5836 uae_u8 *m = (uae_u8 *)addr;
5837 return (uae_u32)(*m);
5838 }
5839
5840 static void mon_write_byte_jit(uae_u32 addr, uae_u32 b)
5841 {
5842 uae_u8 *m = (uae_u8 *)addr;
5843 *m = b;
5844 }
5845 #endif
5846
5847 void disasm_block(int target, uint8 * start, size_t length)
5848 {
5849 if (!JITDebug)
5850 return;
5851
5852 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
5853 char disasm_str[200];
5854 sprintf(disasm_str, "%s $%x $%x",
5855 target == TARGET_M68K ? "d68" :
5856 target == TARGET_X86 ? "d86" :
5857 target == TARGET_POWERPC ? "d" : "x",
5858 start, start + length - 1);
5859
5860 uae_u32 (*old_mon_read_byte)(uae_u32) = mon_read_byte;
5861 void (*old_mon_write_byte)(uae_u32, uae_u32) = mon_write_byte;
5862
5863 mon_read_byte = mon_read_byte_jit;
5864 mon_write_byte = mon_write_byte_jit;
5865
5866 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
5867 mon(4, arg);
5868
5869 mon_read_byte = old_mon_read_byte;
5870 mon_write_byte = old_mon_write_byte;
5871 #endif
5872 }
5873
5874 static inline void disasm_native_block(uint8 *start, size_t length)
5875 {
5876 disasm_block(TARGET_NATIVE, start, length);
5877 }
5878
5879 static inline void disasm_m68k_block(uint8 *start, size_t length)
5880 {
5881 disasm_block(TARGET_M68K, start, length);
5882 }
5883
5884 #ifdef HAVE_GET_WORD_UNSWAPPED
5885 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
5886 #else
5887 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
5888 #endif
5889
5890 #if JIT_DEBUG
5891 static uae_u8 *last_regs_pc_p = 0;
5892 static uae_u8 *last_compiled_block_addr = 0;
5893
5894 void compiler_dumpstate(void)
5895 {
5896 if (!JITDebug)
5897 return;
5898
5899 write_log("### Host addresses\n");
5900 write_log("MEM_BASE : %x\n", MEMBaseDiff);
5901 write_log("PC_P : %p\n", &regs.pc_p);
5902 write_log("SPCFLAGS : %p\n", &regs.spcflags);
5903 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
5904 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
5905 write_log("\n");
5906
5907 write_log("### M68k processor state\n");
5908 m68k_dumpstate(0);
5909 write_log("\n");
5910
5911 write_log("### Block in Mac address space\n");
5912 write_log("M68K block : %p\n",
5913 (void *)get_virtual_address(last_regs_pc_p));
5914 write_log("Native block : %p (%d bytes)\n",
5915 (void *)get_virtual_address(last_compiled_block_addr),
5916 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
5917 write_log("\n");
5918 }
5919 #endif
5920
5921 static void compile_block(cpu_history* pc_hist, int blocklen)
5922 {
5923 if (letit && compiled_code) {
5924 #if PROFILE_COMPILE_TIME
5925 compile_count++;
5926 clock_t start_time = clock();
5927 #endif
5928 #if JIT_DEBUG
5929 bool disasm_block = false;
5930 #endif
5931
5932 /* OK, here we need to 'compile' a block */
5933 int i;
5934 int r;
5935 int was_comp=0;
5936 uae_u8 liveflags[MAXRUN+1];
5937 uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
5938 uae_u32 min_pcp=max_pcp;
5939 uae_u32 cl=cacheline(pc_hist[0].location);
5940 void* specflags=(void*)&regs.spcflags;
5941 blockinfo* bi=NULL;
5942 blockinfo* bi2;
5943 int extra_len=0;
5944
5945 redo_current_block=0;
5946 if (current_compile_p>=max_compile_start)
5947 flush_icache_hard(7);
5948
5949 alloc_blockinfos();
5950
5951 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
5952 bi2=get_blockinfo(cl);
5953
5954 optlev=bi->optlevel;
5955 if (bi->status!=BI_INVALID) {
5956 Dif (bi!=bi2) {
5957 /* I don't think it can happen anymore. Shouldn't, in
5958 any case. So let's make sure... */
5959 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
5960 bi->count,bi->optlevel,bi->handler_to_use,
5961 cache_tags[cl].handler);
5962 abort();
5963 }
5964
5965 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
5966 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
5967 /* What the heck? We are not supposed to be here! */
5968 abort();
5969 }
5970 }
5971 if (bi->count==-1) {
5972 optlev++;
5973 while (!optcount[optlev])
5974 optlev++;
5975 bi->count=optcount[optlev]-1;
5976 }
5977 current_block_pc_p=(uae_u32)pc_hist[0].location;
5978
5979 remove_deps(bi); /* We are about to create new code */
5980 bi->optlevel=optlev;
5981 bi->pc_p=(uae_u8*)pc_hist[0].location;
5982
5983 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
5984 i=blocklen;
5985 while (i--) {
5986 uae_u16* currpcp=pc_hist[i].location;
5987 uae_u32 op=DO_GET_OPCODE(currpcp);
5988
5989 if ((uae_u32)currpcp<min_pcp)
5990 min_pcp=(uae_u32)currpcp;
5991 if ((uae_u32)currpcp>max_pcp)
5992 max_pcp=(uae_u32)currpcp;
5993
5994 liveflags[i]=((liveflags[i+1]&
5995 (~prop[op].set_flags))|
5996 prop[op].use_flags);
5997 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
5998 liveflags[i]&= ~FLAG_Z;
5999 }
6000
6001 bi->needed_flags=liveflags[0];
6002
6003 align_target(align_loops);
6004 was_comp=0;
6005
6006 bi->direct_handler=(cpuop_func *)get_target();
6007 set_dhtu(bi,bi->direct_handler);
6008 bi->status=BI_COMPILING;
6009 current_block_start_target=(uae_u32)get_target();
6010
6011 log_startblock();
6012
6013 if (bi->count>=0) { /* Need to generate countdown code */
6014 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6015 raw_sub_l_mi((uae_u32)&(bi->count),1);
6016 raw_jl((uae_u32)popall_recompile_block);
6017 }
6018 if (optlev==0) { /* No need to actually translate */
6019 /* Execute normally without keeping stats */
6020 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6021 raw_jmp((uae_u32)popall_exec_nostats);
6022 }
6023 else {
6024 reg_alloc_run=0;
6025 next_pc_p=0;
6026 taken_pc_p=0;
6027 branch_cc=0;
6028
6029 comp_pc_p=(uae_u8*)pc_hist[0].location;
6030 init_comp();
6031 was_comp=1;
6032
6033 #if JIT_DEBUG
6034 if (JITDebug) {
6035 raw_mov_l_mi((uae_u32)&last_regs_pc_p,(uae_u32)pc_hist[0].location);
6036 raw_mov_l_mi((uae_u32)&last_compiled_block_addr,(uae_u32)current_block_start_target);
6037 }
6038 #endif
6039
6040 for (i=0;i<blocklen &&
6041 get_target_noopt()<max_compile_start;i++) {
6042 cpuop_func **cputbl;
6043 compop_func **comptbl;
6044 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6045 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6046 if (!needed_flags) {
6047 cputbl=nfcpufunctbl;
6048 comptbl=nfcompfunctbl;
6049 }
6050 else {
6051 cputbl=cpufunctbl;
6052 comptbl=compfunctbl;
6053 }
6054
6055 failure = 1; // gb-- defaults to failure state
6056 if (comptbl[opcode] && optlev>1) {
6057 failure=0;
6058 if (!was_comp) {
6059 comp_pc_p=(uae_u8*)pc_hist[i].location;
6060 init_comp();
6061 }
6062 was_comp++;
6063
6064 comptbl[opcode](opcode);
6065 freescratch();
6066 if (!(liveflags[i+1] & FLAG_CZNV)) {
6067 /* We can forget about flags */
6068 dont_care_flags();
6069 }
6070 #if INDIVIDUAL_INST
6071 flush(1);
6072 nop();
6073 flush(1);
6074 was_comp=0;
6075 #endif
6076 }
6077
6078 if (failure) {
6079 if (was_comp) {
6080 flush(1);
6081 was_comp=0;
6082 }
6083 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6084 #if USE_NORMAL_CALLING_CONVENTION
6085 raw_push_l_r(REG_PAR1);
6086 #endif
6087 raw_mov_l_mi((uae_u32)&regs.pc_p,
6088 (uae_u32)pc_hist[i].location);
6089 raw_call((uae_u32)cputbl[opcode]);
6090 //raw_add_l_mi((uae_u32)&oink,1); // FIXME
6091 #if USE_NORMAL_CALLING_CONVENTION
6092 raw_inc_sp(4);
6093 #endif
6094 if (needed_flags) {
6095 //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode+65536);
6096 }
6097 else {
6098 //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode);
6099 }
6100
6101 if (i < blocklen - 1) {
6102 uae_s8* branchadd;
6103
6104 raw_mov_l_rm(0,(uae_u32)specflags);
6105 raw_test_l_rr(0,0);
6106 raw_jz_b_oponly();
6107 branchadd=(uae_s8 *)get_target();
6108 emit_byte(0);
6109 raw_jmp((uae_u32)popall_do_nothing);
6110 *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
6111 }
6112 }
6113 }
6114 #if 1 /* This isn't completely kosher yet; It really needs to be
6115 be integrated into a general inter-block-dependency scheme */
6116 if (next_pc_p && taken_pc_p &&
6117 was_comp && taken_pc_p==current_block_pc_p) {
6118 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6119 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6120 uae_u8 x=bi1->needed_flags;
6121
6122 if (x==0xff || 1) { /* To be on the safe side */
6123 uae_u16* next=(uae_u16*)next_pc_p;
6124 uae_u32 op=DO_GET_OPCODE(next);
6125
6126 x=0x1f;
6127 x&=(~prop[op].set_flags);
6128 x|=prop[op].use_flags;
6129 }
6130
6131 x|=bi2->needed_flags;
6132 if (!(x & FLAG_CZNV)) {
6133 /* We can forget about flags */
6134 dont_care_flags();
6135 extra_len+=2; /* The next instruction now is part of this
6136 block */
6137 }
6138
6139 }
6140 #endif
6141 log_flush();
6142
6143 if (next_pc_p) { /* A branch was registered */
6144 uae_u32 t1=next_pc_p;
6145 uae_u32 t2=taken_pc_p;
6146 int cc=branch_cc;
6147
6148 uae_u32* branchadd;
6149 uae_u32* tba;
6150 bigstate tmp;
6151 blockinfo* tbi;
6152
6153 if (taken_pc_p<next_pc_p) {
6154 /* backward branch. Optimize for the "taken" case ---
6155 which means the raw_jcc should fall through when
6156 the 68k branch is taken. */
6157 t1=taken_pc_p;
6158 t2=next_pc_p;
6159 cc=branch_cc^1;
6160 }
6161
6162 tmp=live; /* ouch! This is big... */
6163 raw_jcc_l_oponly(cc);
6164 branchadd=(uae_u32*)get_target();
6165 emit_long(0);
6166
6167 /* predicted outcome */
6168 tbi=get_blockinfo_addr_new((void*)t1,1);
6169 match_states(tbi);
6170 raw_cmp_l_mi((uae_u32)specflags,0);
6171 raw_jcc_l_oponly(4);
6172 tba=(uae_u32*)get_target();
6173 emit_long(get_handler(t1)-((uae_u32)tba+4));
6174 raw_mov_l_mi((uae_u32)&regs.pc_p,t1);
6175 raw_jmp((uae_u32)popall_do_nothing);
6176 create_jmpdep(bi,0,tba,t1);
6177
6178 align_target(align_jumps);
6179 /* not-predicted outcome */
6180 *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6181 live=tmp; /* Ouch again */
6182 tbi=get_blockinfo_addr_new((void*)t2,1);
6183 match_states(tbi);
6184
6185 //flush(1); /* Can only get here if was_comp==1 */
6186 raw_cmp_l_mi((uae_u32)specflags,0);
6187 raw_jcc_l_oponly(4);
6188 tba=(uae_u32*)get_target();
6189 emit_long(get_handler(t2)-((uae_u32)tba+4));
6190 raw_mov_l_mi((uae_u32)&regs.pc_p,t2);
6191 raw_jmp((uae_u32)popall_do_nothing);
6192 create_jmpdep(bi,1,tba,t2);
6193 }
6194 else
6195 {
6196 if (was_comp) {
6197 flush(1);
6198 }
6199
6200 /* Let's find out where next_handler is... */
6201 if (was_comp && isinreg(PC_P)) {
6202 r=live.state[PC_P].realreg;
6203 raw_and_l_ri(r,TAGMASK);
6204 int r2 = (r==0) ? 1 : 0;
6205 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6206 raw_cmp_l_mi((uae_u32)specflags,0);
6207 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6208 raw_jmp_r(r2);
6209 }
6210 else if (was_comp && isconst(PC_P)) {
6211 uae_u32 v=live.state[PC_P].val;
6212 uae_u32* tba;
6213 blockinfo* tbi;
6214
6215 tbi=get_blockinfo_addr_new((void*)v,1);
6216 match_states(tbi);
6217
6218 raw_cmp_l_mi((uae_u32)specflags,0);
6219 raw_jcc_l_oponly(4);
6220 tba=(uae_u32*)get_target();
6221 emit_long(get_handler(v)-((uae_u32)tba+4));
6222 raw_mov_l_mi((uae_u32)&regs.pc_p,v);
6223 raw_jmp((uae_u32)popall_do_nothing);
6224 create_jmpdep(bi,0,tba,v);
6225 }
6226 else {
6227 r=REG_PC_TMP;
6228 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
6229 raw_and_l_ri(r,TAGMASK);
6230 int r2 = (r==0) ? 1 : 0;
6231 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6232 raw_cmp_l_mi((uae_u32)specflags,0);
6233 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6234 raw_jmp_r(r2);
6235 }
6236 }
6237 }
6238
6239 #if USE_MATCH
6240 if (callers_need_recompile(&live,&(bi->env))) {
6241 mark_callers_recompile(bi);
6242 }
6243
6244 big_to_small_state(&live,&(bi->env));
6245 #endif
6246
6247 if (next_pc_p+extra_len>=max_pcp &&
6248 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6249 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6250 else
6251 max_pcp+=LONGEST_68K_INST;
6252
6253 #if USE_CHECKSUM_INFO
6254 checksum_info *csi = (bi->csi = ChecksumInfoAllocator.acquire());
6255 csi->next = NULL;
6256 csi->length = max_pcp - min_pcp;
6257 csi->start_p = (uae_u8 *)min_pcp;
6258 #else
6259 bi->len=max_pcp-min_pcp;
6260 bi->min_pcp=min_pcp;
6261 #endif
6262
6263 remove_from_list(bi);
6264 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6265 add_to_dormant(bi); /* No need to checksum it on cache flush.
6266 Please don't start changing ROMs in
6267 flight! */
6268 }
6269 else {
6270 #if USE_CHECKSUM_INFO
6271 calc_checksum(csi,&csi->c1,&csi->c2);
6272 #else
6273 calc_checksum(bi,&(bi->c1),&(bi->c2));
6274 #endif
6275 add_to_active(bi);
6276 }
6277
6278 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6279
6280 #if JIT_DEBUG
6281 if (JITDebug)
6282 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6283
6284 if (JITDebug && disasm_block) {
6285 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6286 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6287 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6288 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6289 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6290 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6291 getchar();
6292 }
6293 #endif
6294
6295 log_dump();
6296 align_target(align_jumps);
6297
6298 /* This is the non-direct handler */
6299 bi->handler=
6300 bi->handler_to_use=(cpuop_func *)get_target();
6301 raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6302 raw_jnz((uae_u32)popall_cache_miss);
6303 comp_pc_p=(uae_u8*)pc_hist[0].location;
6304
6305 bi->status=BI_FINALIZING;
6306 init_comp();
6307 match_states(bi);
6308 flush(1);
6309
6310 raw_jmp((uae_u32)bi->direct_handler);
6311
6312 current_compile_p=get_target();
6313 raise_in_cl_list(bi);
6314
6315 /* We will flush soon, anyway, so let's do it now */
6316 if (current_compile_p>=max_compile_start)
6317 flush_icache_hard(7);
6318
6319 bi->status=BI_ACTIVE;
6320 if (redo_current_block)
6321 block_need_recompile(bi);
6322
6323 #if PROFILE_COMPILE_TIME
6324 compile_time += (clock() - start_time);
6325 #endif
6326 }
6327 }
6328
6329 void do_nothing(void)
6330 {
6331 /* What did you expect this to do? */
6332 }
6333
6334 void exec_nostats(void)
6335 {
6336 for (;;) {
6337 uae_u32 opcode = GET_OPCODE;
6338 (*cpufunctbl[opcode])(opcode);
6339 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6340 return; /* We will deal with the spcflags in the caller */
6341 }
6342 }
6343 }
6344
6345 void execute_normal(void)
6346 {
6347 if (!check_for_cache_miss()) {
6348 cpu_history pc_hist[MAXRUN];
6349 int blocklen = 0;
6350 #if REAL_ADDRESSING || DIRECT_ADDRESSING
6351 start_pc_p = regs.pc_p;
6352 start_pc = get_virtual_address(regs.pc_p);
6353 #else
6354 start_pc_p = regs.pc_oldp;
6355 start_pc = regs.pc;
6356 #endif
6357 for (;;) { /* Take note: This is the do-it-normal loop */
6358 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6359 uae_u32 opcode = GET_OPCODE;
6360 #if FLIGHT_RECORDER
6361 m68k_record_step(m68k_getpc());
6362 #endif
6363 (*cpufunctbl[opcode])(opcode);
6364 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6365 compile_block(pc_hist, blocklen);
6366 return; /* We will deal with the spcflags in the caller */
6367 }
6368 /* No need to check regs.spcflags, because if they were set,
6369 we'd have ended up inside that "if" */
6370 }
6371 }
6372 }
6373
6374 typedef void (*compiled_handler)(void);
6375
6376 #ifdef X86_ASSEMBLY
6377 void (*m68k_compile_execute)(void) = NULL;
6378 #else
6379 void m68k_do_compile_execute(void)
6380 {
6381 for (;;) {
6382 ((compiled_handler)(pushall_call_handler))();
6383 /* Whenever we return from that, we should check spcflags */
6384 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6385 if (m68k_do_specialties ())
6386 return;
6387 }
6388 }
6389 }
6390 #endif