ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.38
Committed: 2006-05-08T16:56:07Z (18 years, 2 months ago) by gbeauche
Branch: MAIN
CVS Tags: nigel-build-19
Changes since 1.37: +29 -14 lines
Log Message:
Fix for LAZY_FLUSH_ICACHE_RANGE. Blocks are indexed by native addresses.

File Contents

# Content
1 /*
2 * compiler/compemu_support.cpp - Core dynamic translation engine
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2005 Christian Bauer
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28 #endif
29
30 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31 #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32 #endif
33
34 /* NOTE: support for AMD64 assumes translation cache and other code
35 * buffers are allocated into a 32-bit address space because (i) B2/JIT
36 * code is not 64-bit clean and (ii) it's faster to resolve branches
37 * that way.
38 */
39 #if !defined(__i386__) && !defined(__x86_64__)
40 #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41 #endif
42
43 #define USE_MATCH 0
44
45 /* kludge for Brian, so he can compile under MSVC++ */
46 #define USE_NORMAL_CALLING_CONVENTION 0
47
48 #ifndef WIN32
49 #include <unistd.h>
50 #include <sys/types.h>
51 #include <sys/mman.h>
52 #endif
53
54 #include <stdlib.h>
55 #include <fcntl.h>
56 #include <errno.h>
57
58 #include "sysdeps.h"
59 #include "cpu_emulation.h"
60 #include "main.h"
61 #include "prefs.h"
62 #include "user_strings.h"
63 #include "vm_alloc.h"
64
65 #include "m68k.h"
66 #include "memory.h"
67 #include "readcpu.h"
68 #include "newcpu.h"
69 #include "comptbl.h"
70 #include "compiler/compemu.h"
71 #include "fpu/fpu.h"
72 #include "fpu/flags.h"
73
74 #define DEBUG 1
75 #include "debug.h"
76
77 #ifdef ENABLE_MON
78 #include "mon.h"
79 #endif
80
81 #ifndef WIN32
82 #define PROFILE_COMPILE_TIME 1
83 #define PROFILE_UNTRANSLATED_INSNS 1
84 #endif
85
86 #if defined(__x86_64__) && 0
87 #define RECORD_REGISTER_USAGE 1
88 #endif
89
90 #ifdef WIN32
91 #undef write_log
92 #define write_log dummy_write_log
93 static void dummy_write_log(const char *, ...) { }
94 #endif
95
96 #if JIT_DEBUG
97 #undef abort
98 #define abort() do { \
99 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100 exit(EXIT_FAILURE); \
101 } while (0)
102 #endif
103
104 #if RECORD_REGISTER_USAGE
105 static uint64 reg_count[16];
106 static int reg_count_local[16];
107
108 static int reg_count_compare(const void *ap, const void *bp)
109 {
110 const int a = *((int *)ap);
111 const int b = *((int *)bp);
112 return reg_count[b] - reg_count[a];
113 }
114 #endif
115
116 #if PROFILE_COMPILE_TIME
117 #include <time.h>
118 static uae_u32 compile_count = 0;
119 static clock_t compile_time = 0;
120 static clock_t emul_start_time = 0;
121 static clock_t emul_end_time = 0;
122 #endif
123
124 #if PROFILE_UNTRANSLATED_INSNS
125 const int untranslated_top_ten = 20;
126 static uae_u32 raw_cputbl_count[65536] = { 0, };
127 static uae_u16 opcode_nums[65536];
128
129 static int untranslated_compfn(const void *e1, const void *e2)
130 {
131 return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132 }
133 #endif
134
135 static compop_func *compfunctbl[65536];
136 static compop_func *nfcompfunctbl[65536];
137 static cpuop_func *nfcpufunctbl[65536];
138 uae_u8* comp_pc_p;
139
140 // From main_unix.cpp
141 extern bool ThirtyThreeBitAddressing;
142
143 // From newcpu.cpp
144 extern bool quit_program;
145
146 // gb-- Extra data for Basilisk II/JIT
147 #if JIT_DEBUG
148 static bool JITDebug = false; // Enable runtime disassemblers through mon?
149 #else
150 const bool JITDebug = false; // Don't use JIT debug mode at all
151 #endif
152 #if USE_INLINING
153 static bool follow_const_jumps = true; // Flag: translation through constant jumps
154 #else
155 const bool follow_const_jumps = false;
156 #endif
157
158 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
159 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
160 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
161 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
162 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
163 static bool have_cmov = false; // target has CMOV instructions ?
164 static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
165 static bool have_rat_stall = true; // target has partial register stalls ?
166 const bool tune_alignment = true; // Tune code alignments for running CPU ?
167 const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
168 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
169 static int align_loops = 32; // Align the start of loops
170 static int align_jumps = 32; // Align the start of jumps
171 static int optcount[10] = {
172 10, // How often a block has to be executed before it is translated
173 0, // How often to use naive translation
174 0, 0, 0, 0,
175 -1, -1, -1, -1
176 };
177
178 struct op_properties {
179 uae_u8 use_flags;
180 uae_u8 set_flags;
181 uae_u8 is_addx;
182 uae_u8 cflow;
183 };
184 static op_properties prop[65536];
185
186 static inline int end_block(uae_u32 opcode)
187 {
188 return (prop[opcode].cflow & fl_end_block);
189 }
190
191 static inline bool is_const_jump(uae_u32 opcode)
192 {
193 return (prop[opcode].cflow == fl_const_jump);
194 }
195
196 static inline bool may_trap(uae_u32 opcode)
197 {
198 return (prop[opcode].cflow & fl_trap);
199 }
200
201 static inline unsigned int cft_map (unsigned int f)
202 {
203 #ifndef HAVE_GET_WORD_UNSWAPPED
204 return f;
205 #else
206 return ((f >> 8) & 255) | ((f & 255) << 8);
207 #endif
208 }
209
210 uae_u8* start_pc_p;
211 uae_u32 start_pc;
212 uae_u32 current_block_pc_p;
213 static uintptr current_block_start_target;
214 uae_u32 needed_flags;
215 static uintptr next_pc_p;
216 static uintptr taken_pc_p;
217 static int branch_cc;
218 static int redo_current_block;
219
220 int segvcount=0;
221 int soft_flush_count=0;
222 int hard_flush_count=0;
223 int checksum_count=0;
224 static uae_u8* current_compile_p=NULL;
225 static uae_u8* max_compile_start;
226 static uae_u8* compiled_code=NULL;
227 static uae_s32 reg_alloc_run;
228 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
229 static uae_u8* popallspace=NULL;
230
231 void* pushall_call_handler=NULL;
232 static void* popall_do_nothing=NULL;
233 static void* popall_exec_nostats=NULL;
234 static void* popall_execute_normal=NULL;
235 static void* popall_cache_miss=NULL;
236 static void* popall_recompile_block=NULL;
237 static void* popall_check_checksum=NULL;
238
239 /* The 68k only ever executes from even addresses. So right now, we
240 * waste half the entries in this array
241 * UPDATE: We now use those entries to store the start of the linked
242 * lists that we maintain for each hash result.
243 */
244 cacheline cache_tags[TAGSIZE];
245 int letit=0;
246 blockinfo* hold_bi[MAX_HOLD_BI];
247 blockinfo* active;
248 blockinfo* dormant;
249
250 /* 68040 */
251 extern struct cputbl op_smalltbl_0_nf[];
252 extern struct comptbl op_smalltbl_0_comp_nf[];
253 extern struct comptbl op_smalltbl_0_comp_ff[];
254
255 /* 68020 + 68881 */
256 extern struct cputbl op_smalltbl_1_nf[];
257
258 /* 68020 */
259 extern struct cputbl op_smalltbl_2_nf[];
260
261 /* 68010 */
262 extern struct cputbl op_smalltbl_3_nf[];
263
264 /* 68000 */
265 extern struct cputbl op_smalltbl_4_nf[];
266
267 /* 68000 slow but compatible. */
268 extern struct cputbl op_smalltbl_5_nf[];
269
270 static void flush_icache_hard(int n);
271 static void flush_icache_lazy(int n);
272 static void flush_icache_none(int n);
273 void (*flush_icache)(int n) = flush_icache_none;
274
275
276
277 bigstate live;
278 smallstate empty_ss;
279 smallstate default_ss;
280 static int optlev;
281
282 static int writereg(int r, int size);
283 static void unlock2(int r);
284 static void setlock(int r);
285 static int readreg_specific(int r, int size, int spec);
286 static int writereg_specific(int r, int size, int spec);
287 static void prepare_for_call_1(void);
288 static void prepare_for_call_2(void);
289 static void align_target(uae_u32 a);
290
291 static uae_s32 nextused[VREGS];
292
293 uae_u32 m68k_pc_offset;
294
295 /* Some arithmetic ooperations can be optimized away if the operands
296 * are known to be constant. But that's only a good idea when the
297 * side effects they would have on the flags are not important. This
298 * variable indicates whether we need the side effects or not
299 */
300 uae_u32 needflags=0;
301
302 /* Flag handling is complicated.
303 *
304 * x86 instructions create flags, which quite often are exactly what we
305 * want. So at times, the "68k" flags are actually in the x86 flags.
306 *
307 * Then again, sometimes we do x86 instructions that clobber the x86
308 * flags, but don't represent a corresponding m68k instruction. In that
309 * case, we have to save them.
310 *
311 * We used to save them to the stack, but now store them back directly
312 * into the regflags.cznv of the traditional emulation. Thus some odd
313 * names.
314 *
315 * So flags can be in either of two places (used to be three; boy were
316 * things complicated back then!); And either place can contain either
317 * valid flags or invalid trash (and on the stack, there was also the
318 * option of "nothing at all", now gone). A couple of variables keep
319 * track of the respective states.
320 *
321 * To make things worse, we might or might not be interested in the flags.
322 * by default, we are, but a call to dont_care_flags can change that
323 * until the next call to live_flags. If we are not, pretty much whatever
324 * is in the register and/or the native flags is seen as valid.
325 */
326
327 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
328 {
329 return cache_tags[cl+1].bi;
330 }
331
332 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
333 {
334 blockinfo* bi=get_blockinfo(cacheline(addr));
335
336 while (bi) {
337 if (bi->pc_p==addr)
338 return bi;
339 bi=bi->next_same_cl;
340 }
341 return NULL;
342 }
343
344
345 /*******************************************************************
346 * All sorts of list related functions for all of the lists *
347 *******************************************************************/
348
349 static __inline__ void remove_from_cl_list(blockinfo* bi)
350 {
351 uae_u32 cl=cacheline(bi->pc_p);
352
353 if (bi->prev_same_cl_p)
354 *(bi->prev_same_cl_p)=bi->next_same_cl;
355 if (bi->next_same_cl)
356 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
357 if (cache_tags[cl+1].bi)
358 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
359 else
360 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
361 }
362
363 static __inline__ void remove_from_list(blockinfo* bi)
364 {
365 if (bi->prev_p)
366 *(bi->prev_p)=bi->next;
367 if (bi->next)
368 bi->next->prev_p=bi->prev_p;
369 }
370
371 static __inline__ void remove_from_lists(blockinfo* bi)
372 {
373 remove_from_list(bi);
374 remove_from_cl_list(bi);
375 }
376
377 static __inline__ void add_to_cl_list(blockinfo* bi)
378 {
379 uae_u32 cl=cacheline(bi->pc_p);
380
381 if (cache_tags[cl+1].bi)
382 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
383 bi->next_same_cl=cache_tags[cl+1].bi;
384
385 cache_tags[cl+1].bi=bi;
386 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
387
388 cache_tags[cl].handler=bi->handler_to_use;
389 }
390
391 static __inline__ void raise_in_cl_list(blockinfo* bi)
392 {
393 remove_from_cl_list(bi);
394 add_to_cl_list(bi);
395 }
396
397 static __inline__ void add_to_active(blockinfo* bi)
398 {
399 if (active)
400 active->prev_p=&(bi->next);
401 bi->next=active;
402
403 active=bi;
404 bi->prev_p=&active;
405 }
406
407 static __inline__ void add_to_dormant(blockinfo* bi)
408 {
409 if (dormant)
410 dormant->prev_p=&(bi->next);
411 bi->next=dormant;
412
413 dormant=bi;
414 bi->prev_p=&dormant;
415 }
416
417 static __inline__ void remove_dep(dependency* d)
418 {
419 if (d->prev_p)
420 *(d->prev_p)=d->next;
421 if (d->next)
422 d->next->prev_p=d->prev_p;
423 d->prev_p=NULL;
424 d->next=NULL;
425 }
426
427 /* This block's code is about to be thrown away, so it no longer
428 depends on anything else */
429 static __inline__ void remove_deps(blockinfo* bi)
430 {
431 remove_dep(&(bi->dep[0]));
432 remove_dep(&(bi->dep[1]));
433 }
434
435 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
436 {
437 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
438 }
439
440 /********************************************************************
441 * Soft flush handling support functions *
442 ********************************************************************/
443
444 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
445 {
446 //write_log("bi is %p\n",bi);
447 if (dh!=bi->direct_handler_to_use) {
448 dependency* x=bi->deplist;
449 //write_log("bi->deplist=%p\n",bi->deplist);
450 while (x) {
451 //write_log("x is %p\n",x);
452 //write_log("x->next is %p\n",x->next);
453 //write_log("x->prev_p is %p\n",x->prev_p);
454
455 if (x->jmp_off) {
456 adjust_jmpdep(x,dh);
457 }
458 x=x->next;
459 }
460 bi->direct_handler_to_use=dh;
461 }
462 }
463
464 static __inline__ void invalidate_block(blockinfo* bi)
465 {
466 int i;
467
468 bi->optlevel=0;
469 bi->count=optcount[0]-1;
470 bi->handler=NULL;
471 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
472 bi->direct_handler=NULL;
473 set_dhtu(bi,bi->direct_pen);
474 bi->needed_flags=0xff;
475 bi->status=BI_INVALID;
476 for (i=0;i<2;i++) {
477 bi->dep[i].jmp_off=NULL;
478 bi->dep[i].target=NULL;
479 }
480 remove_deps(bi);
481 }
482
483 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
484 {
485 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
486
487 Dif(!tbi) {
488 write_log("Could not create jmpdep!\n");
489 abort();
490 }
491 bi->dep[i].jmp_off=jmpaddr;
492 bi->dep[i].source=bi;
493 bi->dep[i].target=tbi;
494 bi->dep[i].next=tbi->deplist;
495 if (bi->dep[i].next)
496 bi->dep[i].next->prev_p=&(bi->dep[i].next);
497 bi->dep[i].prev_p=&(tbi->deplist);
498 tbi->deplist=&(bi->dep[i]);
499 }
500
501 static __inline__ void block_need_recompile(blockinfo * bi)
502 {
503 uae_u32 cl = cacheline(bi->pc_p);
504
505 set_dhtu(bi, bi->direct_pen);
506 bi->direct_handler = bi->direct_pen;
507
508 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
509 bi->handler = (cpuop_func *)popall_execute_normal;
510 if (bi == cache_tags[cl + 1].bi)
511 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
512 bi->status = BI_NEED_RECOMP;
513 }
514
515 static __inline__ void mark_callers_recompile(blockinfo * bi)
516 {
517 dependency *x = bi->deplist;
518
519 while (x) {
520 dependency *next = x->next; /* This disappears when we mark for
521 * recompilation and thus remove the
522 * blocks from the lists */
523 if (x->jmp_off) {
524 blockinfo *cbi = x->source;
525
526 Dif(cbi->status == BI_INVALID) {
527 // write_log("invalid block in dependency list\n"); // FIXME?
528 // abort();
529 }
530 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
531 block_need_recompile(cbi);
532 mark_callers_recompile(cbi);
533 }
534 else if (cbi->status == BI_COMPILING) {
535 redo_current_block = 1;
536 }
537 else if (cbi->status == BI_NEED_RECOMP) {
538 /* nothing */
539 }
540 else {
541 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
542 }
543 }
544 x = next;
545 }
546 }
547
548 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
549 {
550 blockinfo* bi=get_blockinfo_addr(addr);
551 int i;
552
553 if (!bi) {
554 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
555 if (hold_bi[i]) {
556 uae_u32 cl=cacheline(addr);
557
558 bi=hold_bi[i];
559 hold_bi[i]=NULL;
560 bi->pc_p=(uae_u8 *)addr;
561 invalidate_block(bi);
562 add_to_active(bi);
563 add_to_cl_list(bi);
564
565 }
566 }
567 }
568 if (!bi) {
569 write_log("Looking for blockinfo, can't find free one\n");
570 abort();
571 }
572 return bi;
573 }
574
575 static void prepare_block(blockinfo* bi);
576
577 /* Managment of blockinfos.
578
579 A blockinfo struct is allocated whenever a new block has to be
580 compiled. If the list of free blockinfos is empty, we allocate a new
581 pool of blockinfos and link the newly created blockinfos altogether
582 into the list of free blockinfos. Otherwise, we simply pop a structure
583 off the free list.
584
585 Blockinfo are lazily deallocated, i.e. chained altogether in the
586 list of free blockinfos whenvever a translation cache flush (hard or
587 soft) request occurs.
588 */
589
590 template< class T >
591 class LazyBlockAllocator
592 {
593 enum {
594 kPoolSize = 1 + 4096 / sizeof(T)
595 };
596 struct Pool {
597 T chunk[kPoolSize];
598 Pool * next;
599 };
600 Pool * mPools;
601 T * mChunks;
602 public:
603 LazyBlockAllocator() : mPools(0), mChunks(0) { }
604 ~LazyBlockAllocator();
605 T * acquire();
606 void release(T * const);
607 };
608
609 template< class T >
610 LazyBlockAllocator<T>::~LazyBlockAllocator()
611 {
612 Pool * currentPool = mPools;
613 while (currentPool) {
614 Pool * deadPool = currentPool;
615 currentPool = currentPool->next;
616 free(deadPool);
617 }
618 }
619
620 template< class T >
621 T * LazyBlockAllocator<T>::acquire()
622 {
623 if (!mChunks) {
624 // There is no chunk left, allocate a new pool and link the
625 // chunks into the free list
626 Pool * newPool = (Pool *)malloc(sizeof(Pool));
627 for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
628 chunk->next = mChunks;
629 mChunks = chunk;
630 }
631 newPool->next = mPools;
632 mPools = newPool;
633 }
634 T * chunk = mChunks;
635 mChunks = chunk->next;
636 return chunk;
637 }
638
639 template< class T >
640 void LazyBlockAllocator<T>::release(T * const chunk)
641 {
642 chunk->next = mChunks;
643 mChunks = chunk;
644 }
645
646 template< class T >
647 class HardBlockAllocator
648 {
649 public:
650 T * acquire() {
651 T * data = (T *)current_compile_p;
652 current_compile_p += sizeof(T);
653 return data;
654 }
655
656 void release(T * const chunk) {
657 // Deallocated on invalidation
658 }
659 };
660
661 #if USE_SEPARATE_BIA
662 static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
663 static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
664 #else
665 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
666 static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
667 #endif
668
669 static __inline__ checksum_info *alloc_checksum_info(void)
670 {
671 checksum_info *csi = ChecksumInfoAllocator.acquire();
672 csi->next = NULL;
673 return csi;
674 }
675
676 static __inline__ void free_checksum_info(checksum_info *csi)
677 {
678 csi->next = NULL;
679 ChecksumInfoAllocator.release(csi);
680 }
681
682 static __inline__ void free_checksum_info_chain(checksum_info *csi)
683 {
684 while (csi != NULL) {
685 checksum_info *csi2 = csi->next;
686 free_checksum_info(csi);
687 csi = csi2;
688 }
689 }
690
691 static __inline__ blockinfo *alloc_blockinfo(void)
692 {
693 blockinfo *bi = BlockInfoAllocator.acquire();
694 #if USE_CHECKSUM_INFO
695 bi->csi = NULL;
696 #endif
697 return bi;
698 }
699
700 static __inline__ void free_blockinfo(blockinfo *bi)
701 {
702 #if USE_CHECKSUM_INFO
703 free_checksum_info_chain(bi->csi);
704 bi->csi = NULL;
705 #endif
706 BlockInfoAllocator.release(bi);
707 }
708
709 static __inline__ void alloc_blockinfos(void)
710 {
711 int i;
712 blockinfo* bi;
713
714 for (i=0;i<MAX_HOLD_BI;i++) {
715 if (hold_bi[i])
716 return;
717 bi=hold_bi[i]=alloc_blockinfo();
718 prepare_block(bi);
719 }
720 }
721
722 /********************************************************************
723 * Functions to emit data into memory, and other general support *
724 ********************************************************************/
725
726 static uae_u8* target;
727
728 static void emit_init(void)
729 {
730 }
731
732 static __inline__ void emit_byte(uae_u8 x)
733 {
734 *target++=x;
735 }
736
737 static __inline__ void emit_word(uae_u16 x)
738 {
739 *((uae_u16*)target)=x;
740 target+=2;
741 }
742
743 static __inline__ void emit_long(uae_u32 x)
744 {
745 *((uae_u32*)target)=x;
746 target+=4;
747 }
748
749 static __inline__ void emit_quad(uae_u64 x)
750 {
751 *((uae_u64*)target)=x;
752 target+=8;
753 }
754
755 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
756 {
757 memcpy((uae_u8 *)target,block,blocklen);
758 target+=blocklen;
759 }
760
761 static __inline__ uae_u32 reverse32(uae_u32 v)
762 {
763 #if 1
764 // gb-- We have specialized byteswapping functions, just use them
765 return do_byteswap_32(v);
766 #else
767 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
768 #endif
769 }
770
771 /********************************************************************
772 * Getting the information about the target CPU *
773 ********************************************************************/
774
775 #include "codegen_x86.cpp"
776
777 void set_target(uae_u8* t)
778 {
779 target=t;
780 }
781
782 static __inline__ uae_u8* get_target_noopt(void)
783 {
784 return target;
785 }
786
787 __inline__ uae_u8* get_target(void)
788 {
789 return get_target_noopt();
790 }
791
792
793 /********************************************************************
794 * Flags status handling. EMIT TIME! *
795 ********************************************************************/
796
797 static void bt_l_ri_noclobber(R4 r, IMM i);
798
799 static void make_flags_live_internal(void)
800 {
801 if (live.flags_in_flags==VALID)
802 return;
803 Dif (live.flags_on_stack==TRASH) {
804 write_log("Want flags, got something on stack, but it is TRASH\n");
805 abort();
806 }
807 if (live.flags_on_stack==VALID) {
808 int tmp;
809 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
810 raw_reg_to_flags(tmp);
811 unlock2(tmp);
812
813 live.flags_in_flags=VALID;
814 return;
815 }
816 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
817 live.flags_in_flags,live.flags_on_stack);
818 abort();
819 }
820
821 static void flags_to_stack(void)
822 {
823 if (live.flags_on_stack==VALID)
824 return;
825 if (!live.flags_are_important) {
826 live.flags_on_stack=VALID;
827 return;
828 }
829 Dif (live.flags_in_flags!=VALID)
830 abort();
831 else {
832 int tmp;
833 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
834 raw_flags_to_reg(tmp);
835 unlock2(tmp);
836 }
837 live.flags_on_stack=VALID;
838 }
839
840 static __inline__ void clobber_flags(void)
841 {
842 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
843 flags_to_stack();
844 live.flags_in_flags=TRASH;
845 }
846
847 /* Prepare for leaving the compiled stuff */
848 static __inline__ void flush_flags(void)
849 {
850 flags_to_stack();
851 return;
852 }
853
854 int touchcnt;
855
856 /********************************************************************
857 * Partial register flushing for optimized calls *
858 ********************************************************************/
859
860 struct regusage {
861 uae_u16 rmask;
862 uae_u16 wmask;
863 };
864
865 static inline void ru_set(uae_u16 *mask, int reg)
866 {
867 #if USE_OPTIMIZED_CALLS
868 *mask |= 1 << reg;
869 #endif
870 }
871
872 static inline bool ru_get(const uae_u16 *mask, int reg)
873 {
874 #if USE_OPTIMIZED_CALLS
875 return (*mask & (1 << reg));
876 #else
877 /* Default: instruction reads & write to register */
878 return true;
879 #endif
880 }
881
882 static inline void ru_set_read(regusage *ru, int reg)
883 {
884 ru_set(&ru->rmask, reg);
885 }
886
887 static inline void ru_set_write(regusage *ru, int reg)
888 {
889 ru_set(&ru->wmask, reg);
890 }
891
892 static inline bool ru_read_p(const regusage *ru, int reg)
893 {
894 return ru_get(&ru->rmask, reg);
895 }
896
897 static inline bool ru_write_p(const regusage *ru, int reg)
898 {
899 return ru_get(&ru->wmask, reg);
900 }
901
902 static void ru_fill_ea(regusage *ru, int reg, amodes mode,
903 wordsizes size, int write_mode)
904 {
905 switch (mode) {
906 case Areg:
907 reg += 8;
908 /* fall through */
909 case Dreg:
910 ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
911 break;
912 case Ad16:
913 /* skip displacment */
914 m68k_pc_offset += 2;
915 case Aind:
916 case Aipi:
917 case Apdi:
918 ru_set_read(ru, reg+8);
919 break;
920 case Ad8r:
921 ru_set_read(ru, reg+8);
922 /* fall through */
923 case PC8r: {
924 uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
925 reg = (dp >> 12) & 15;
926 ru_set_read(ru, reg);
927 if (dp & 0x100)
928 m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
929 break;
930 }
931 case PC16:
932 case absw:
933 case imm0:
934 case imm1:
935 m68k_pc_offset += 2;
936 break;
937 case absl:
938 case imm2:
939 m68k_pc_offset += 4;
940 break;
941 case immi:
942 m68k_pc_offset += (size == sz_long) ? 4 : 2;
943 break;
944 }
945 }
946
947 /* TODO: split into a static initialization part and a dynamic one
948 (instructions depending on extension words) */
949 static void ru_fill(regusage *ru, uae_u32 opcode)
950 {
951 m68k_pc_offset += 2;
952
953 /* Default: no register is used or written to */
954 ru->rmask = 0;
955 ru->wmask = 0;
956
957 uae_u32 real_opcode = cft_map(opcode);
958 struct instr *dp = &table68k[real_opcode];
959
960 bool rw_dest = true;
961 bool handled = false;
962
963 /* Handle some instructions specifically */
964 uae_u16 reg, ext;
965 switch (dp->mnemo) {
966 case i_BFCHG:
967 case i_BFCLR:
968 case i_BFEXTS:
969 case i_BFEXTU:
970 case i_BFFFO:
971 case i_BFINS:
972 case i_BFSET:
973 case i_BFTST:
974 ext = comp_get_iword((m68k_pc_offset+=2)-2);
975 if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
976 if (ext & 0x020) ru_set_read(ru, ext & 7);
977 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
978 if (dp->dmode == Dreg)
979 ru_set_read(ru, dp->dreg);
980 switch (dp->mnemo) {
981 case i_BFEXTS:
982 case i_BFEXTU:
983 case i_BFFFO:
984 ru_set_write(ru, (ext >> 12) & 7);
985 break;
986 case i_BFINS:
987 ru_set_read(ru, (ext >> 12) & 7);
988 /* fall through */
989 case i_BFCHG:
990 case i_BFCLR:
991 case i_BSET:
992 if (dp->dmode == Dreg)
993 ru_set_write(ru, dp->dreg);
994 break;
995 }
996 handled = true;
997 rw_dest = false;
998 break;
999
1000 case i_BTST:
1001 rw_dest = false;
1002 break;
1003
1004 case i_CAS:
1005 {
1006 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1007 int Du = ext & 7;
1008 ru_set_read(ru, Du);
1009 int Dc = (ext >> 6) & 7;
1010 ru_set_read(ru, Dc);
1011 ru_set_write(ru, Dc);
1012 break;
1013 }
1014 case i_CAS2:
1015 {
1016 int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1017 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1018 Rn1 = (ext >> 12) & 15;
1019 Du1 = (ext >> 6) & 7;
1020 Dc1 = ext & 7;
1021 ru_set_read(ru, Rn1);
1022 ru_set_read(ru, Du1);
1023 ru_set_read(ru, Dc1);
1024 ru_set_write(ru, Dc1);
1025 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1026 Rn2 = (ext >> 12) & 15;
1027 Du2 = (ext >> 6) & 7;
1028 Dc2 = ext & 7;
1029 ru_set_read(ru, Rn2);
1030 ru_set_read(ru, Du2);
1031 ru_set_write(ru, Dc2);
1032 break;
1033 }
1034 case i_DIVL: case i_MULL:
1035 m68k_pc_offset += 2;
1036 break;
1037 case i_LEA:
1038 case i_MOVE: case i_MOVEA: case i_MOVE16:
1039 rw_dest = false;
1040 break;
1041 case i_PACK: case i_UNPK:
1042 rw_dest = false;
1043 m68k_pc_offset += 2;
1044 break;
1045 case i_TRAPcc:
1046 m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1047 break;
1048 case i_RTR:
1049 /* do nothing, just for coverage debugging */
1050 break;
1051 /* TODO: handle EXG instruction */
1052 }
1053
1054 /* Handle A-Traps better */
1055 if ((real_opcode & 0xf000) == 0xa000) {
1056 handled = true;
1057 }
1058
1059 /* Handle EmulOps better */
1060 if ((real_opcode & 0xff00) == 0x7100) {
1061 handled = true;
1062 ru->rmask = 0xffff;
1063 ru->wmask = 0;
1064 }
1065
1066 if (dp->suse && !handled)
1067 ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1068
1069 if (dp->duse && !handled)
1070 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1071
1072 if (rw_dest)
1073 ru->rmask |= ru->wmask;
1074
1075 handled = handled || dp->suse || dp->duse;
1076
1077 /* Mark all registers as used/written if the instruction may trap */
1078 if (may_trap(opcode)) {
1079 handled = true;
1080 ru->rmask = 0xffff;
1081 ru->wmask = 0xffff;
1082 }
1083
1084 if (!handled) {
1085 write_log("ru_fill: %04x = { %04x, %04x }\n",
1086 real_opcode, ru->rmask, ru->wmask);
1087 abort();
1088 }
1089 }
1090
1091 /********************************************************************
1092 * register allocation per block logging *
1093 ********************************************************************/
1094
1095 static uae_s8 vstate[VREGS];
1096 static uae_s8 vwritten[VREGS];
1097 static uae_s8 nstate[N_REGS];
1098
1099 #define L_UNKNOWN -127
1100 #define L_UNAVAIL -1
1101 #define L_NEEDED -2
1102 #define L_UNNEEDED -3
1103
1104 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1105 {
1106 int i;
1107
1108 for (i = 0; i < VREGS; i++)
1109 s->virt[i] = vstate[i];
1110 for (i = 0; i < N_REGS; i++)
1111 s->nat[i] = nstate[i];
1112 }
1113
1114 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1115 {
1116 int i;
1117 int reverse = 0;
1118
1119 for (i = 0; i < VREGS; i++) {
1120 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1121 return 1;
1122 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1123 reverse++;
1124 }
1125 for (i = 0; i < N_REGS; i++) {
1126 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1127 return 1;
1128 if (nstate[i] < 0 && s->nat[i] >= 0)
1129 reverse++;
1130 }
1131 if (reverse >= 2 && USE_MATCH)
1132 return 1; /* In this case, it might be worth recompiling the
1133 * callers */
1134 return 0;
1135 }
1136
1137 static __inline__ void log_startblock(void)
1138 {
1139 int i;
1140
1141 for (i = 0; i < VREGS; i++) {
1142 vstate[i] = L_UNKNOWN;
1143 vwritten[i] = 0;
1144 }
1145 for (i = 0; i < N_REGS; i++)
1146 nstate[i] = L_UNKNOWN;
1147 }
1148
1149 /* Using an n-reg for a temp variable */
1150 static __inline__ void log_isused(int n)
1151 {
1152 if (nstate[n] == L_UNKNOWN)
1153 nstate[n] = L_UNAVAIL;
1154 }
1155
1156 static __inline__ void log_visused(int r)
1157 {
1158 if (vstate[r] == L_UNKNOWN)
1159 vstate[r] = L_NEEDED;
1160 }
1161
1162 static __inline__ void do_load_reg(int n, int r)
1163 {
1164 if (r == FLAGTMP)
1165 raw_load_flagreg(n, r);
1166 else if (r == FLAGX)
1167 raw_load_flagx(n, r);
1168 else
1169 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1170 }
1171
1172 static __inline__ void check_load_reg(int n, int r)
1173 {
1174 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1175 }
1176
1177 static __inline__ void log_vwrite(int r)
1178 {
1179 vwritten[r] = 1;
1180 }
1181
1182 /* Using an n-reg to hold a v-reg */
1183 static __inline__ void log_isreg(int n, int r)
1184 {
1185 static int count = 0;
1186
1187 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1188 nstate[n] = r;
1189 else {
1190 do_load_reg(n, r);
1191 if (nstate[n] == L_UNKNOWN)
1192 nstate[n] = L_UNAVAIL;
1193 }
1194 if (vstate[r] == L_UNKNOWN)
1195 vstate[r] = L_NEEDED;
1196 }
1197
1198 static __inline__ void log_clobberreg(int r)
1199 {
1200 if (vstate[r] == L_UNKNOWN)
1201 vstate[r] = L_UNNEEDED;
1202 }
1203
1204 /* This ends all possibility of clever register allocation */
1205
1206 static __inline__ void log_flush(void)
1207 {
1208 int i;
1209
1210 for (i = 0; i < VREGS; i++)
1211 if (vstate[i] == L_UNKNOWN)
1212 vstate[i] = L_NEEDED;
1213 for (i = 0; i < N_REGS; i++)
1214 if (nstate[i] == L_UNKNOWN)
1215 nstate[i] = L_UNAVAIL;
1216 }
1217
1218 static __inline__ void log_dump(void)
1219 {
1220 int i;
1221
1222 return;
1223
1224 write_log("----------------------\n");
1225 for (i = 0; i < N_REGS; i++) {
1226 switch (nstate[i]) {
1227 case L_UNKNOWN:
1228 write_log("Nat %d : UNKNOWN\n", i);
1229 break;
1230 case L_UNAVAIL:
1231 write_log("Nat %d : UNAVAIL\n", i);
1232 break;
1233 default:
1234 write_log("Nat %d : %d\n", i, nstate[i]);
1235 break;
1236 }
1237 }
1238 for (i = 0; i < VREGS; i++) {
1239 if (vstate[i] == L_UNNEEDED)
1240 write_log("Virt %d: UNNEEDED\n", i);
1241 }
1242 }
1243
1244 /********************************************************************
1245 * register status handling. EMIT TIME! *
1246 ********************************************************************/
1247
1248 static __inline__ void set_status(int r, int status)
1249 {
1250 if (status == ISCONST)
1251 log_clobberreg(r);
1252 live.state[r].status=status;
1253 }
1254
1255 static __inline__ int isinreg(int r)
1256 {
1257 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1258 }
1259
1260 static __inline__ void adjust_nreg(int r, uae_u32 val)
1261 {
1262 if (!val)
1263 return;
1264 raw_lea_l_brr(r,r,val);
1265 }
1266
1267 static void tomem(int r)
1268 {
1269 int rr=live.state[r].realreg;
1270
1271 if (isinreg(r)) {
1272 if (live.state[r].val && live.nat[rr].nholds==1
1273 && !live.nat[rr].locked) {
1274 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1275 // live.state[r].val,r,rr,target);
1276 adjust_nreg(rr,live.state[r].val);
1277 live.state[r].val=0;
1278 live.state[r].dirtysize=4;
1279 set_status(r,DIRTY);
1280 }
1281 }
1282
1283 if (live.state[r].status==DIRTY) {
1284 switch (live.state[r].dirtysize) {
1285 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1286 case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1287 case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1288 default: abort();
1289 }
1290 log_vwrite(r);
1291 set_status(r,CLEAN);
1292 live.state[r].dirtysize=0;
1293 }
1294 }
1295
1296 static __inline__ int isconst(int r)
1297 {
1298 return live.state[r].status==ISCONST;
1299 }
1300
1301 int is_const(int r)
1302 {
1303 return isconst(r);
1304 }
1305
1306 static __inline__ void writeback_const(int r)
1307 {
1308 if (!isconst(r))
1309 return;
1310 Dif (live.state[r].needflush==NF_HANDLER) {
1311 write_log("Trying to write back constant NF_HANDLER!\n");
1312 abort();
1313 }
1314
1315 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1316 log_vwrite(r);
1317 live.state[r].val=0;
1318 set_status(r,INMEM);
1319 }
1320
1321 static __inline__ void tomem_c(int r)
1322 {
1323 if (isconst(r)) {
1324 writeback_const(r);
1325 }
1326 else
1327 tomem(r);
1328 }
1329
1330 static void evict(int r)
1331 {
1332 int rr;
1333
1334 if (!isinreg(r))
1335 return;
1336 tomem(r);
1337 rr=live.state[r].realreg;
1338
1339 Dif (live.nat[rr].locked &&
1340 live.nat[rr].nholds==1) {
1341 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1342 abort();
1343 }
1344
1345 live.nat[rr].nholds--;
1346 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1347 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1348 int thisind=live.state[r].realind;
1349
1350 live.nat[rr].holds[thisind]=topreg;
1351 live.state[topreg].realind=thisind;
1352 }
1353 live.state[r].realreg=-1;
1354 set_status(r,INMEM);
1355 }
1356
1357 static __inline__ void free_nreg(int r)
1358 {
1359 int i=live.nat[r].nholds;
1360
1361 while (i) {
1362 int vr;
1363
1364 --i;
1365 vr=live.nat[r].holds[i];
1366 evict(vr);
1367 }
1368 Dif (live.nat[r].nholds!=0) {
1369 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1370 abort();
1371 }
1372 }
1373
1374 /* Use with care! */
1375 static __inline__ void isclean(int r)
1376 {
1377 if (!isinreg(r))
1378 return;
1379 live.state[r].validsize=4;
1380 live.state[r].dirtysize=0;
1381 live.state[r].val=0;
1382 set_status(r,CLEAN);
1383 }
1384
1385 static __inline__ void disassociate(int r)
1386 {
1387 isclean(r);
1388 evict(r);
1389 }
1390
1391 static __inline__ void set_const(int r, uae_u32 val)
1392 {
1393 disassociate(r);
1394 live.state[r].val=val;
1395 set_status(r,ISCONST);
1396 }
1397
1398 static __inline__ uae_u32 get_offset(int r)
1399 {
1400 return live.state[r].val;
1401 }
1402
1403 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1404 {
1405 int bestreg;
1406 uae_s32 when;
1407 int i;
1408 uae_s32 badness=0; /* to shut up gcc */
1409 bestreg=-1;
1410 when=2000000000;
1411
1412 /* XXX use a regalloc_order table? */
1413 for (i=0;i<N_REGS;i++) {
1414 badness=live.nat[i].touched;
1415 if (live.nat[i].nholds==0)
1416 badness=0;
1417 if (i==hint)
1418 badness-=200000000;
1419 if (!live.nat[i].locked && badness<when) {
1420 if ((size==1 && live.nat[i].canbyte) ||
1421 (size==2 && live.nat[i].canword) ||
1422 (size==4)) {
1423 bestreg=i;
1424 when=badness;
1425 if (live.nat[i].nholds==0 && hint<0)
1426 break;
1427 if (i==hint)
1428 break;
1429 }
1430 }
1431 }
1432 Dif (bestreg==-1)
1433 abort();
1434
1435 if (live.nat[bestreg].nholds>0) {
1436 free_nreg(bestreg);
1437 }
1438 if (isinreg(r)) {
1439 int rr=live.state[r].realreg;
1440 /* This will happen if we read a partially dirty register at a
1441 bigger size */
1442 Dif (willclobber || live.state[r].validsize>=size)
1443 abort();
1444 Dif (live.nat[rr].nholds!=1)
1445 abort();
1446 if (size==4 && live.state[r].validsize==2) {
1447 log_isused(bestreg);
1448 log_visused(r);
1449 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1450 raw_bswap_32(bestreg);
1451 raw_zero_extend_16_rr(rr,rr);
1452 raw_zero_extend_16_rr(bestreg,bestreg);
1453 raw_bswap_32(bestreg);
1454 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1455 live.state[r].validsize=4;
1456 live.nat[rr].touched=touchcnt++;
1457 return rr;
1458 }
1459 if (live.state[r].validsize==1) {
1460 /* Nothing yet */
1461 }
1462 evict(r);
1463 }
1464
1465 if (!willclobber) {
1466 if (live.state[r].status!=UNDEF) {
1467 if (isconst(r)) {
1468 raw_mov_l_ri(bestreg,live.state[r].val);
1469 live.state[r].val=0;
1470 live.state[r].dirtysize=4;
1471 set_status(r,DIRTY);
1472 log_isused(bestreg);
1473 }
1474 else {
1475 log_isreg(bestreg, r); /* This will also load it! */
1476 live.state[r].dirtysize=0;
1477 set_status(r,CLEAN);
1478 }
1479 }
1480 else {
1481 live.state[r].val=0;
1482 live.state[r].dirtysize=0;
1483 set_status(r,CLEAN);
1484 log_isused(bestreg);
1485 }
1486 live.state[r].validsize=4;
1487 }
1488 else { /* this is the easiest way, but not optimal. FIXME! */
1489 /* Now it's trickier, but hopefully still OK */
1490 if (!isconst(r) || size==4) {
1491 live.state[r].validsize=size;
1492 live.state[r].dirtysize=size;
1493 live.state[r].val=0;
1494 set_status(r,DIRTY);
1495 if (size == 4) {
1496 log_clobberreg(r);
1497 log_isused(bestreg);
1498 }
1499 else {
1500 log_visused(r);
1501 log_isused(bestreg);
1502 }
1503 }
1504 else {
1505 if (live.state[r].status!=UNDEF)
1506 raw_mov_l_ri(bestreg,live.state[r].val);
1507 live.state[r].val=0;
1508 live.state[r].validsize=4;
1509 live.state[r].dirtysize=4;
1510 set_status(r,DIRTY);
1511 log_isused(bestreg);
1512 }
1513 }
1514 live.state[r].realreg=bestreg;
1515 live.state[r].realind=live.nat[bestreg].nholds;
1516 live.nat[bestreg].touched=touchcnt++;
1517 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1518 live.nat[bestreg].nholds++;
1519
1520 return bestreg;
1521 }
1522
1523 static int alloc_reg(int r, int size, int willclobber)
1524 {
1525 return alloc_reg_hinted(r,size,willclobber,-1);
1526 }
1527
1528 static void unlock2(int r)
1529 {
1530 Dif (!live.nat[r].locked)
1531 abort();
1532 live.nat[r].locked--;
1533 }
1534
1535 static void setlock(int r)
1536 {
1537 live.nat[r].locked++;
1538 }
1539
1540
1541 static void mov_nregs(int d, int s)
1542 {
1543 int ns=live.nat[s].nholds;
1544 int nd=live.nat[d].nholds;
1545 int i;
1546
1547 if (s==d)
1548 return;
1549
1550 if (nd>0)
1551 free_nreg(d);
1552
1553 log_isused(d);
1554 raw_mov_l_rr(d,s);
1555
1556 for (i=0;i<live.nat[s].nholds;i++) {
1557 int vs=live.nat[s].holds[i];
1558
1559 live.state[vs].realreg=d;
1560 live.state[vs].realind=i;
1561 live.nat[d].holds[i]=vs;
1562 }
1563 live.nat[d].nholds=live.nat[s].nholds;
1564
1565 live.nat[s].nholds=0;
1566 }
1567
1568
1569 static __inline__ void make_exclusive(int r, int size, int spec)
1570 {
1571 int clobber;
1572 reg_status oldstate;
1573 int rr=live.state[r].realreg;
1574 int nr;
1575 int nind;
1576 int ndirt=0;
1577 int i;
1578
1579 if (!isinreg(r))
1580 return;
1581 if (live.nat[rr].nholds==1)
1582 return;
1583 for (i=0;i<live.nat[rr].nholds;i++) {
1584 int vr=live.nat[rr].holds[i];
1585 if (vr!=r &&
1586 (live.state[vr].status==DIRTY || live.state[vr].val))
1587 ndirt++;
1588 }
1589 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1590 /* Everything else is clean, so let's keep this register */
1591 for (i=0;i<live.nat[rr].nholds;i++) {
1592 int vr=live.nat[rr].holds[i];
1593 if (vr!=r) {
1594 evict(vr);
1595 i--; /* Try that index again! */
1596 }
1597 }
1598 Dif (live.nat[rr].nholds!=1) {
1599 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1600 rr,live.nat[rr].nholds,r);
1601 abort();
1602 }
1603 return;
1604 }
1605
1606 /* We have to split the register */
1607 oldstate=live.state[r];
1608
1609 setlock(rr); /* Make sure this doesn't go away */
1610 /* Forget about r being in the register rr */
1611 disassociate(r);
1612 /* Get a new register, that we will clobber completely */
1613 if (oldstate.status==DIRTY) {
1614 /* If dirtysize is <4, we need a register that can handle the
1615 eventual smaller memory store! Thanks to Quake68k for exposing
1616 this detail ;-) */
1617 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1618 }
1619 else {
1620 nr=alloc_reg_hinted(r,4,1,spec);
1621 }
1622 nind=live.state[r].realind;
1623 live.state[r]=oldstate; /* Keep all the old state info */
1624 live.state[r].realreg=nr;
1625 live.state[r].realind=nind;
1626
1627 if (size<live.state[r].validsize) {
1628 if (live.state[r].val) {
1629 /* Might as well compensate for the offset now */
1630 raw_lea_l_brr(nr,rr,oldstate.val);
1631 live.state[r].val=0;
1632 live.state[r].dirtysize=4;
1633 set_status(r,DIRTY);
1634 }
1635 else
1636 raw_mov_l_rr(nr,rr); /* Make another copy */
1637 }
1638 unlock2(rr);
1639 }
1640
1641 static __inline__ void add_offset(int r, uae_u32 off)
1642 {
1643 live.state[r].val+=off;
1644 }
1645
1646 static __inline__ void remove_offset(int r, int spec)
1647 {
1648 reg_status oldstate;
1649 int rr;
1650
1651 if (isconst(r))
1652 return;
1653 if (live.state[r].val==0)
1654 return;
1655 if (isinreg(r) && live.state[r].validsize<4)
1656 evict(r);
1657
1658 if (!isinreg(r))
1659 alloc_reg_hinted(r,4,0,spec);
1660
1661 Dif (live.state[r].validsize!=4) {
1662 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1663 abort();
1664 }
1665 make_exclusive(r,0,-1);
1666 /* make_exclusive might have done the job already */
1667 if (live.state[r].val==0)
1668 return;
1669
1670 rr=live.state[r].realreg;
1671
1672 if (live.nat[rr].nholds==1) {
1673 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1674 // live.state[r].val,r,rr,target);
1675 adjust_nreg(rr,live.state[r].val);
1676 live.state[r].dirtysize=4;
1677 live.state[r].val=0;
1678 set_status(r,DIRTY);
1679 return;
1680 }
1681 write_log("Failed in remove_offset\n");
1682 abort();
1683 }
1684
1685 static __inline__ void remove_all_offsets(void)
1686 {
1687 int i;
1688
1689 for (i=0;i<VREGS;i++)
1690 remove_offset(i,-1);
1691 }
1692
1693 static inline void flush_reg_count(void)
1694 {
1695 #if RECORD_REGISTER_USAGE
1696 for (int r = 0; r < 16; r++)
1697 if (reg_count_local[r])
1698 ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1699 #endif
1700 }
1701
1702 static inline void record_register(int r)
1703 {
1704 #if RECORD_REGISTER_USAGE
1705 if (r < 16)
1706 reg_count_local[r]++;
1707 #endif
1708 }
1709
1710 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1711 {
1712 int n;
1713 int answer=-1;
1714
1715 record_register(r);
1716 if (live.state[r].status==UNDEF) {
1717 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1718 }
1719 if (!can_offset)
1720 remove_offset(r,spec);
1721
1722 if (isinreg(r) && live.state[r].validsize>=size) {
1723 n=live.state[r].realreg;
1724 switch(size) {
1725 case 1:
1726 if (live.nat[n].canbyte || spec>=0) {
1727 answer=n;
1728 }
1729 break;
1730 case 2:
1731 if (live.nat[n].canword || spec>=0) {
1732 answer=n;
1733 }
1734 break;
1735 case 4:
1736 answer=n;
1737 break;
1738 default: abort();
1739 }
1740 if (answer<0)
1741 evict(r);
1742 }
1743 /* either the value was in memory to start with, or it was evicted and
1744 is in memory now */
1745 if (answer<0) {
1746 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1747 }
1748
1749 if (spec>=0 && spec!=answer) {
1750 /* Too bad */
1751 mov_nregs(spec,answer);
1752 answer=spec;
1753 }
1754 live.nat[answer].locked++;
1755 live.nat[answer].touched=touchcnt++;
1756 return answer;
1757 }
1758
1759
1760
1761 static int readreg(int r, int size)
1762 {
1763 return readreg_general(r,size,-1,0);
1764 }
1765
1766 static int readreg_specific(int r, int size, int spec)
1767 {
1768 return readreg_general(r,size,spec,0);
1769 }
1770
1771 static int readreg_offset(int r, int size)
1772 {
1773 return readreg_general(r,size,-1,1);
1774 }
1775
1776 /* writereg_general(r, size, spec)
1777 *
1778 * INPUT
1779 * - r : mid-layer register
1780 * - size : requested size (1/2/4)
1781 * - spec : -1 if find or make a register free, otherwise specifies
1782 * the physical register to use in any case
1783 *
1784 * OUTPUT
1785 * - hard (physical, x86 here) register allocated to virtual register r
1786 */
1787 static __inline__ int writereg_general(int r, int size, int spec)
1788 {
1789 int n;
1790 int answer=-1;
1791
1792 record_register(r);
1793 if (size<4) {
1794 remove_offset(r,spec);
1795 }
1796
1797 make_exclusive(r,size,spec);
1798 if (isinreg(r)) {
1799 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1800 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1801 n=live.state[r].realreg;
1802
1803 Dif (live.nat[n].nholds!=1)
1804 abort();
1805 switch(size) {
1806 case 1:
1807 if (live.nat[n].canbyte || spec>=0) {
1808 live.state[r].dirtysize=ndsize;
1809 live.state[r].validsize=nvsize;
1810 answer=n;
1811 }
1812 break;
1813 case 2:
1814 if (live.nat[n].canword || spec>=0) {
1815 live.state[r].dirtysize=ndsize;
1816 live.state[r].validsize=nvsize;
1817 answer=n;
1818 }
1819 break;
1820 case 4:
1821 live.state[r].dirtysize=ndsize;
1822 live.state[r].validsize=nvsize;
1823 answer=n;
1824 break;
1825 default: abort();
1826 }
1827 if (answer<0)
1828 evict(r);
1829 }
1830 /* either the value was in memory to start with, or it was evicted and
1831 is in memory now */
1832 if (answer<0) {
1833 answer=alloc_reg_hinted(r,size,1,spec);
1834 }
1835 if (spec>=0 && spec!=answer) {
1836 mov_nregs(spec,answer);
1837 answer=spec;
1838 }
1839 if (live.state[r].status==UNDEF)
1840 live.state[r].validsize=4;
1841 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1842 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1843
1844 live.nat[answer].locked++;
1845 live.nat[answer].touched=touchcnt++;
1846 if (size==4) {
1847 live.state[r].val=0;
1848 }
1849 else {
1850 Dif (live.state[r].val) {
1851 write_log("Problem with val\n");
1852 abort();
1853 }
1854 }
1855 set_status(r,DIRTY);
1856 return answer;
1857 }
1858
1859 static int writereg(int r, int size)
1860 {
1861 return writereg_general(r,size,-1);
1862 }
1863
1864 static int writereg_specific(int r, int size, int spec)
1865 {
1866 return writereg_general(r,size,spec);
1867 }
1868
1869 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1870 {
1871 int n;
1872 int answer=-1;
1873
1874 record_register(r);
1875 if (live.state[r].status==UNDEF) {
1876 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1877 }
1878 remove_offset(r,spec);
1879 make_exclusive(r,0,spec);
1880
1881 Dif (wsize<rsize) {
1882 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1883 abort();
1884 }
1885 if (isinreg(r) && live.state[r].validsize>=rsize) {
1886 n=live.state[r].realreg;
1887 Dif (live.nat[n].nholds!=1)
1888 abort();
1889
1890 switch(rsize) {
1891 case 1:
1892 if (live.nat[n].canbyte || spec>=0) {
1893 answer=n;
1894 }
1895 break;
1896 case 2:
1897 if (live.nat[n].canword || spec>=0) {
1898 answer=n;
1899 }
1900 break;
1901 case 4:
1902 answer=n;
1903 break;
1904 default: abort();
1905 }
1906 if (answer<0)
1907 evict(r);
1908 }
1909 /* either the value was in memory to start with, or it was evicted and
1910 is in memory now */
1911 if (answer<0) {
1912 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1913 }
1914
1915 if (spec>=0 && spec!=answer) {
1916 /* Too bad */
1917 mov_nregs(spec,answer);
1918 answer=spec;
1919 }
1920 if (wsize>live.state[r].dirtysize)
1921 live.state[r].dirtysize=wsize;
1922 if (wsize>live.state[r].validsize)
1923 live.state[r].validsize=wsize;
1924 set_status(r,DIRTY);
1925
1926 live.nat[answer].locked++;
1927 live.nat[answer].touched=touchcnt++;
1928
1929 Dif (live.state[r].val) {
1930 write_log("Problem with val(rmw)\n");
1931 abort();
1932 }
1933 return answer;
1934 }
1935
1936 static int rmw(int r, int wsize, int rsize)
1937 {
1938 return rmw_general(r,wsize,rsize,-1);
1939 }
1940
1941 static int rmw_specific(int r, int wsize, int rsize, int spec)
1942 {
1943 return rmw_general(r,wsize,rsize,spec);
1944 }
1945
1946
1947 /* needed for restoring the carry flag on non-P6 cores */
1948 static void bt_l_ri_noclobber(R4 r, IMM i)
1949 {
1950 int size=4;
1951 if (i<16)
1952 size=2;
1953 r=readreg(r,size);
1954 raw_bt_l_ri(r,i);
1955 unlock2(r);
1956 }
1957
1958 /********************************************************************
1959 * FPU register status handling. EMIT TIME! *
1960 ********************************************************************/
1961
1962 static void f_tomem(int r)
1963 {
1964 if (live.fate[r].status==DIRTY) {
1965 #if USE_LONG_DOUBLE
1966 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1967 #else
1968 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1969 #endif
1970 live.fate[r].status=CLEAN;
1971 }
1972 }
1973
1974 static void f_tomem_drop(int r)
1975 {
1976 if (live.fate[r].status==DIRTY) {
1977 #if USE_LONG_DOUBLE
1978 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1979 #else
1980 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1981 #endif
1982 live.fate[r].status=INMEM;
1983 }
1984 }
1985
1986
1987 static __inline__ int f_isinreg(int r)
1988 {
1989 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1990 }
1991
1992 static void f_evict(int r)
1993 {
1994 int rr;
1995
1996 if (!f_isinreg(r))
1997 return;
1998 rr=live.fate[r].realreg;
1999 if (live.fat[rr].nholds==1)
2000 f_tomem_drop(r);
2001 else
2002 f_tomem(r);
2003
2004 Dif (live.fat[rr].locked &&
2005 live.fat[rr].nholds==1) {
2006 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2007 abort();
2008 }
2009
2010 live.fat[rr].nholds--;
2011 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2012 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2013 int thisind=live.fate[r].realind;
2014 live.fat[rr].holds[thisind]=topreg;
2015 live.fate[topreg].realind=thisind;
2016 }
2017 live.fate[r].status=INMEM;
2018 live.fate[r].realreg=-1;
2019 }
2020
2021 static __inline__ void f_free_nreg(int r)
2022 {
2023 int i=live.fat[r].nholds;
2024
2025 while (i) {
2026 int vr;
2027
2028 --i;
2029 vr=live.fat[r].holds[i];
2030 f_evict(vr);
2031 }
2032 Dif (live.fat[r].nholds!=0) {
2033 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2034 abort();
2035 }
2036 }
2037
2038
2039 /* Use with care! */
2040 static __inline__ void f_isclean(int r)
2041 {
2042 if (!f_isinreg(r))
2043 return;
2044 live.fate[r].status=CLEAN;
2045 }
2046
2047 static __inline__ void f_disassociate(int r)
2048 {
2049 f_isclean(r);
2050 f_evict(r);
2051 }
2052
2053
2054
2055 static int f_alloc_reg(int r, int willclobber)
2056 {
2057 int bestreg;
2058 uae_s32 when;
2059 int i;
2060 uae_s32 badness;
2061 bestreg=-1;
2062 when=2000000000;
2063 for (i=N_FREGS;i--;) {
2064 badness=live.fat[i].touched;
2065 if (live.fat[i].nholds==0)
2066 badness=0;
2067
2068 if (!live.fat[i].locked && badness<when) {
2069 bestreg=i;
2070 when=badness;
2071 if (live.fat[i].nholds==0)
2072 break;
2073 }
2074 }
2075 Dif (bestreg==-1)
2076 abort();
2077
2078 if (live.fat[bestreg].nholds>0) {
2079 f_free_nreg(bestreg);
2080 }
2081 if (f_isinreg(r)) {
2082 f_evict(r);
2083 }
2084
2085 if (!willclobber) {
2086 if (live.fate[r].status!=UNDEF) {
2087 #if USE_LONG_DOUBLE
2088 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2089 #else
2090 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2091 #endif
2092 }
2093 live.fate[r].status=CLEAN;
2094 }
2095 else {
2096 live.fate[r].status=DIRTY;
2097 }
2098 live.fate[r].realreg=bestreg;
2099 live.fate[r].realind=live.fat[bestreg].nholds;
2100 live.fat[bestreg].touched=touchcnt++;
2101 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2102 live.fat[bestreg].nholds++;
2103
2104 return bestreg;
2105 }
2106
2107 static void f_unlock(int r)
2108 {
2109 Dif (!live.fat[r].locked)
2110 abort();
2111 live.fat[r].locked--;
2112 }
2113
2114 static void f_setlock(int r)
2115 {
2116 live.fat[r].locked++;
2117 }
2118
2119 static __inline__ int f_readreg(int r)
2120 {
2121 int n;
2122 int answer=-1;
2123
2124 if (f_isinreg(r)) {
2125 n=live.fate[r].realreg;
2126 answer=n;
2127 }
2128 /* either the value was in memory to start with, or it was evicted and
2129 is in memory now */
2130 if (answer<0)
2131 answer=f_alloc_reg(r,0);
2132
2133 live.fat[answer].locked++;
2134 live.fat[answer].touched=touchcnt++;
2135 return answer;
2136 }
2137
2138 static __inline__ void f_make_exclusive(int r, int clobber)
2139 {
2140 freg_status oldstate;
2141 int rr=live.fate[r].realreg;
2142 int nr;
2143 int nind;
2144 int ndirt=0;
2145 int i;
2146
2147 if (!f_isinreg(r))
2148 return;
2149 if (live.fat[rr].nholds==1)
2150 return;
2151 for (i=0;i<live.fat[rr].nholds;i++) {
2152 int vr=live.fat[rr].holds[i];
2153 if (vr!=r && live.fate[vr].status==DIRTY)
2154 ndirt++;
2155 }
2156 if (!ndirt && !live.fat[rr].locked) {
2157 /* Everything else is clean, so let's keep this register */
2158 for (i=0;i<live.fat[rr].nholds;i++) {
2159 int vr=live.fat[rr].holds[i];
2160 if (vr!=r) {
2161 f_evict(vr);
2162 i--; /* Try that index again! */
2163 }
2164 }
2165 Dif (live.fat[rr].nholds!=1) {
2166 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2167 for (i=0;i<live.fat[rr].nholds;i++) {
2168 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2169 live.fate[live.fat[rr].holds[i]].realreg,
2170 live.fate[live.fat[rr].holds[i]].realind);
2171 }
2172 write_log("\n");
2173 abort();
2174 }
2175 return;
2176 }
2177
2178 /* We have to split the register */
2179 oldstate=live.fate[r];
2180
2181 f_setlock(rr); /* Make sure this doesn't go away */
2182 /* Forget about r being in the register rr */
2183 f_disassociate(r);
2184 /* Get a new register, that we will clobber completely */
2185 nr=f_alloc_reg(r,1);
2186 nind=live.fate[r].realind;
2187 if (!clobber)
2188 raw_fmov_rr(nr,rr); /* Make another copy */
2189 live.fate[r]=oldstate; /* Keep all the old state info */
2190 live.fate[r].realreg=nr;
2191 live.fate[r].realind=nind;
2192 f_unlock(rr);
2193 }
2194
2195
2196 static __inline__ int f_writereg(int r)
2197 {
2198 int n;
2199 int answer=-1;
2200
2201 f_make_exclusive(r,1);
2202 if (f_isinreg(r)) {
2203 n=live.fate[r].realreg;
2204 answer=n;
2205 }
2206 if (answer<0) {
2207 answer=f_alloc_reg(r,1);
2208 }
2209 live.fate[r].status=DIRTY;
2210 live.fat[answer].locked++;
2211 live.fat[answer].touched=touchcnt++;
2212 return answer;
2213 }
2214
2215 static int f_rmw(int r)
2216 {
2217 int n;
2218
2219 f_make_exclusive(r,0);
2220 if (f_isinreg(r)) {
2221 n=live.fate[r].realreg;
2222 }
2223 else
2224 n=f_alloc_reg(r,0);
2225 live.fate[r].status=DIRTY;
2226 live.fat[n].locked++;
2227 live.fat[n].touched=touchcnt++;
2228 return n;
2229 }
2230
2231 static void fflags_into_flags_internal(uae_u32 tmp)
2232 {
2233 int r;
2234
2235 clobber_flags();
2236 r=f_readreg(FP_RESULT);
2237 if (FFLAG_NREG_CLOBBER_CONDITION) {
2238 int tmp2=tmp;
2239 tmp=writereg_specific(tmp,4,FFLAG_NREG);
2240 raw_fflags_into_flags(r);
2241 unlock2(tmp);
2242 forget_about(tmp2);
2243 }
2244 else
2245 raw_fflags_into_flags(r);
2246 f_unlock(r);
2247 live_flags();
2248 }
2249
2250
2251
2252
2253 /********************************************************************
2254 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2255 ********************************************************************/
2256
2257 /*
2258 * RULES FOR HANDLING REGISTERS:
2259 *
2260 * * In the function headers, order the parameters
2261 * - 1st registers written to
2262 * - 2nd read/modify/write registers
2263 * - 3rd registers read from
2264 * * Before calling raw_*, you must call readreg, writereg or rmw for
2265 * each register
2266 * * The order for this is
2267 * - 1st call remove_offset for all registers written to with size<4
2268 * - 2nd call readreg for all registers read without offset
2269 * - 3rd call rmw for all rmw registers
2270 * - 4th call readreg_offset for all registers that can handle offsets
2271 * - 5th call get_offset for all the registers from the previous step
2272 * - 6th call writereg for all written-to registers
2273 * - 7th call raw_*
2274 * - 8th unlock2 all registers that were locked
2275 */
2276
2277 MIDFUNC(0,live_flags,(void))
2278 {
2279 live.flags_on_stack=TRASH;
2280 live.flags_in_flags=VALID;
2281 live.flags_are_important=1;
2282 }
2283 MENDFUNC(0,live_flags,(void))
2284
2285 MIDFUNC(0,dont_care_flags,(void))
2286 {
2287 live.flags_are_important=0;
2288 }
2289 MENDFUNC(0,dont_care_flags,(void))
2290
2291
2292 MIDFUNC(0,duplicate_carry,(void))
2293 {
2294 evict(FLAGX);
2295 make_flags_live_internal();
2296 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2297 log_vwrite(FLAGX);
2298 }
2299 MENDFUNC(0,duplicate_carry,(void))
2300
2301 MIDFUNC(0,restore_carry,(void))
2302 {
2303 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2304 bt_l_ri_noclobber(FLAGX,0);
2305 }
2306 else { /* Avoid the stall the above creates.
2307 This is slow on non-P6, though.
2308 */
2309 COMPCALL(rol_b_ri(FLAGX,8));
2310 isclean(FLAGX);
2311 }
2312 }
2313 MENDFUNC(0,restore_carry,(void))
2314
2315 MIDFUNC(0,start_needflags,(void))
2316 {
2317 needflags=1;
2318 }
2319 MENDFUNC(0,start_needflags,(void))
2320
2321 MIDFUNC(0,end_needflags,(void))
2322 {
2323 needflags=0;
2324 }
2325 MENDFUNC(0,end_needflags,(void))
2326
2327 MIDFUNC(0,make_flags_live,(void))
2328 {
2329 make_flags_live_internal();
2330 }
2331 MENDFUNC(0,make_flags_live,(void))
2332
2333 MIDFUNC(1,fflags_into_flags,(W2 tmp))
2334 {
2335 clobber_flags();
2336 fflags_into_flags_internal(tmp);
2337 }
2338 MENDFUNC(1,fflags_into_flags,(W2 tmp))
2339
2340
2341 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2342 {
2343 int size=4;
2344 if (i<16)
2345 size=2;
2346 CLOBBER_BT;
2347 r=readreg(r,size);
2348 raw_bt_l_ri(r,i);
2349 unlock2(r);
2350 }
2351 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2352
2353 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2354 {
2355 CLOBBER_BT;
2356 r=readreg(r,4);
2357 b=readreg(b,4);
2358 raw_bt_l_rr(r,b);
2359 unlock2(r);
2360 unlock2(b);
2361 }
2362 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2363
2364 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2365 {
2366 int size=4;
2367 if (i<16)
2368 size=2;
2369 CLOBBER_BT;
2370 r=rmw(r,size,size);
2371 raw_btc_l_ri(r,i);
2372 unlock2(r);
2373 }
2374 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2375
2376 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2377 {
2378 CLOBBER_BT;
2379 b=readreg(b,4);
2380 r=rmw(r,4,4);
2381 raw_btc_l_rr(r,b);
2382 unlock2(r);
2383 unlock2(b);
2384 }
2385 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2386
2387
2388 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2389 {
2390 int size=4;
2391 if (i<16)
2392 size=2;
2393 CLOBBER_BT;
2394 r=rmw(r,size,size);
2395 raw_btr_l_ri(r,i);
2396 unlock2(r);
2397 }
2398 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2399
2400 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2401 {
2402 CLOBBER_BT;
2403 b=readreg(b,4);
2404 r=rmw(r,4,4);
2405 raw_btr_l_rr(r,b);
2406 unlock2(r);
2407 unlock2(b);
2408 }
2409 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2410
2411
2412 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2413 {
2414 int size=4;
2415 if (i<16)
2416 size=2;
2417 CLOBBER_BT;
2418 r=rmw(r,size,size);
2419 raw_bts_l_ri(r,i);
2420 unlock2(r);
2421 }
2422 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2423
2424 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2425 {
2426 CLOBBER_BT;
2427 b=readreg(b,4);
2428 r=rmw(r,4,4);
2429 raw_bts_l_rr(r,b);
2430 unlock2(r);
2431 unlock2(b);
2432 }
2433 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2434
2435 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2436 {
2437 CLOBBER_MOV;
2438 d=writereg(d,4);
2439 raw_mov_l_rm(d,s);
2440 unlock2(d);
2441 }
2442 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2443
2444
2445 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2446 {
2447 r=readreg(r,4);
2448 raw_call_r(r);
2449 unlock2(r);
2450 }
2451 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2452
2453 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2454 {
2455 CLOBBER_SUB;
2456 raw_sub_l_mi(d,s) ;
2457 }
2458 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2459
2460 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2461 {
2462 CLOBBER_MOV;
2463 raw_mov_l_mi(d,s) ;
2464 }
2465 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2466
2467 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2468 {
2469 CLOBBER_MOV;
2470 raw_mov_w_mi(d,s) ;
2471 }
2472 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2473
2474 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2475 {
2476 CLOBBER_MOV;
2477 raw_mov_b_mi(d,s) ;
2478 }
2479 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2480
2481 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2482 {
2483 if (!i && !needflags)
2484 return;
2485 CLOBBER_ROL;
2486 r=rmw(r,1,1);
2487 raw_rol_b_ri(r,i);
2488 unlock2(r);
2489 }
2490 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2491
2492 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2493 {
2494 if (!i && !needflags)
2495 return;
2496 CLOBBER_ROL;
2497 r=rmw(r,2,2);
2498 raw_rol_w_ri(r,i);
2499 unlock2(r);
2500 }
2501 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2502
2503 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2504 {
2505 if (!i && !needflags)
2506 return;
2507 CLOBBER_ROL;
2508 r=rmw(r,4,4);
2509 raw_rol_l_ri(r,i);
2510 unlock2(r);
2511 }
2512 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2513
2514 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2515 {
2516 if (isconst(r)) {
2517 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2518 return;
2519 }
2520 CLOBBER_ROL;
2521 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2522 d=rmw(d,4,4);
2523 Dif (r!=1) {
2524 write_log("Illegal register %d in raw_rol_b\n",r);
2525 abort();
2526 }
2527 raw_rol_l_rr(d,r) ;
2528 unlock2(r);
2529 unlock2(d);
2530 }
2531 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2532
2533 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2534 { /* Can only do this with r==1, i.e. cl */
2535
2536 if (isconst(r)) {
2537 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2538 return;
2539 }
2540 CLOBBER_ROL;
2541 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2542 d=rmw(d,2,2);
2543 Dif (r!=1) {
2544 write_log("Illegal register %d in raw_rol_b\n",r);
2545 abort();
2546 }
2547 raw_rol_w_rr(d,r) ;
2548 unlock2(r);
2549 unlock2(d);
2550 }
2551 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2552
2553 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2554 { /* Can only do this with r==1, i.e. cl */
2555
2556 if (isconst(r)) {
2557 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2558 return;
2559 }
2560
2561 CLOBBER_ROL;
2562 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2563 d=rmw(d,1,1);
2564 Dif (r!=1) {
2565 write_log("Illegal register %d in raw_rol_b\n",r);
2566 abort();
2567 }
2568 raw_rol_b_rr(d,r) ;
2569 unlock2(r);
2570 unlock2(d);
2571 }
2572 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2573
2574
2575 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2576 {
2577 if (isconst(r)) {
2578 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2579 return;
2580 }
2581 CLOBBER_SHLL;
2582 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2583 d=rmw(d,4,4);
2584 Dif (r!=1) {
2585 write_log("Illegal register %d in raw_rol_b\n",r);
2586 abort();
2587 }
2588 raw_shll_l_rr(d,r) ;
2589 unlock2(r);
2590 unlock2(d);
2591 }
2592 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2593
2594 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2595 { /* Can only do this with r==1, i.e. cl */
2596
2597 if (isconst(r)) {
2598 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2599 return;
2600 }
2601 CLOBBER_SHLL;
2602 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2603 d=rmw(d,2,2);
2604 Dif (r!=1) {
2605 write_log("Illegal register %d in raw_shll_b\n",r);
2606 abort();
2607 }
2608 raw_shll_w_rr(d,r) ;
2609 unlock2(r);
2610 unlock2(d);
2611 }
2612 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2613
2614 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2615 { /* Can only do this with r==1, i.e. cl */
2616
2617 if (isconst(r)) {
2618 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2619 return;
2620 }
2621
2622 CLOBBER_SHLL;
2623 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2624 d=rmw(d,1,1);
2625 Dif (r!=1) {
2626 write_log("Illegal register %d in raw_shll_b\n",r);
2627 abort();
2628 }
2629 raw_shll_b_rr(d,r) ;
2630 unlock2(r);
2631 unlock2(d);
2632 }
2633 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2634
2635
2636 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2637 {
2638 if (!i && !needflags)
2639 return;
2640 CLOBBER_ROR;
2641 r=rmw(r,1,1);
2642 raw_ror_b_ri(r,i);
2643 unlock2(r);
2644 }
2645 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2646
2647 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2648 {
2649 if (!i && !needflags)
2650 return;
2651 CLOBBER_ROR;
2652 r=rmw(r,2,2);
2653 raw_ror_w_ri(r,i);
2654 unlock2(r);
2655 }
2656 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2657
2658 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2659 {
2660 if (!i && !needflags)
2661 return;
2662 CLOBBER_ROR;
2663 r=rmw(r,4,4);
2664 raw_ror_l_ri(r,i);
2665 unlock2(r);
2666 }
2667 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2668
2669 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2670 {
2671 if (isconst(r)) {
2672 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2673 return;
2674 }
2675 CLOBBER_ROR;
2676 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2677 d=rmw(d,4,4);
2678 raw_ror_l_rr(d,r) ;
2679 unlock2(r);
2680 unlock2(d);
2681 }
2682 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2683
2684 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2685 {
2686 if (isconst(r)) {
2687 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2688 return;
2689 }
2690 CLOBBER_ROR;
2691 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2692 d=rmw(d,2,2);
2693 raw_ror_w_rr(d,r) ;
2694 unlock2(r);
2695 unlock2(d);
2696 }
2697 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2698
2699 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2700 {
2701 if (isconst(r)) {
2702 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2703 return;
2704 }
2705
2706 CLOBBER_ROR;
2707 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2708 d=rmw(d,1,1);
2709 raw_ror_b_rr(d,r) ;
2710 unlock2(r);
2711 unlock2(d);
2712 }
2713 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2714
2715 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2716 {
2717 if (isconst(r)) {
2718 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2719 return;
2720 }
2721 CLOBBER_SHRL;
2722 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2723 d=rmw(d,4,4);
2724 Dif (r!=1) {
2725 write_log("Illegal register %d in raw_rol_b\n",r);
2726 abort();
2727 }
2728 raw_shrl_l_rr(d,r) ;
2729 unlock2(r);
2730 unlock2(d);
2731 }
2732 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2733
2734 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2735 { /* Can only do this with r==1, i.e. cl */
2736
2737 if (isconst(r)) {
2738 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2739 return;
2740 }
2741 CLOBBER_SHRL;
2742 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2743 d=rmw(d,2,2);
2744 Dif (r!=1) {
2745 write_log("Illegal register %d in raw_shrl_b\n",r);
2746 abort();
2747 }
2748 raw_shrl_w_rr(d,r) ;
2749 unlock2(r);
2750 unlock2(d);
2751 }
2752 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2753
2754 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2755 { /* Can only do this with r==1, i.e. cl */
2756
2757 if (isconst(r)) {
2758 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2759 return;
2760 }
2761
2762 CLOBBER_SHRL;
2763 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2764 d=rmw(d,1,1);
2765 Dif (r!=1) {
2766 write_log("Illegal register %d in raw_shrl_b\n",r);
2767 abort();
2768 }
2769 raw_shrl_b_rr(d,r) ;
2770 unlock2(r);
2771 unlock2(d);
2772 }
2773 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2774
2775
2776
2777 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2778 {
2779 if (!i && !needflags)
2780 return;
2781 if (isconst(r) && !needflags) {
2782 live.state[r].val<<=i;
2783 return;
2784 }
2785 CLOBBER_SHLL;
2786 r=rmw(r,4,4);
2787 raw_shll_l_ri(r,i);
2788 unlock2(r);
2789 }
2790 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2791
2792 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2793 {
2794 if (!i && !needflags)
2795 return;
2796 CLOBBER_SHLL;
2797 r=rmw(r,2,2);
2798 raw_shll_w_ri(r,i);
2799 unlock2(r);
2800 }
2801 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2802
2803 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2804 {
2805 if (!i && !needflags)
2806 return;
2807 CLOBBER_SHLL;
2808 r=rmw(r,1,1);
2809 raw_shll_b_ri(r,i);
2810 unlock2(r);
2811 }
2812 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2813
2814 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2815 {
2816 if (!i && !needflags)
2817 return;
2818 if (isconst(r) && !needflags) {
2819 live.state[r].val>>=i;
2820 return;
2821 }
2822 CLOBBER_SHRL;
2823 r=rmw(r,4,4);
2824 raw_shrl_l_ri(r,i);
2825 unlock2(r);
2826 }
2827 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2828
2829 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2830 {
2831 if (!i && !needflags)
2832 return;
2833 CLOBBER_SHRL;
2834 r=rmw(r,2,2);
2835 raw_shrl_w_ri(r,i);
2836 unlock2(r);
2837 }
2838 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2839
2840 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2841 {
2842 if (!i && !needflags)
2843 return;
2844 CLOBBER_SHRL;
2845 r=rmw(r,1,1);
2846 raw_shrl_b_ri(r,i);
2847 unlock2(r);
2848 }
2849 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2850
2851 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2852 {
2853 if (!i && !needflags)
2854 return;
2855 CLOBBER_SHRA;
2856 r=rmw(r,4,4);
2857 raw_shra_l_ri(r,i);
2858 unlock2(r);
2859 }
2860 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2861
2862 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2863 {
2864 if (!i && !needflags)
2865 return;
2866 CLOBBER_SHRA;
2867 r=rmw(r,2,2);
2868 raw_shra_w_ri(r,i);
2869 unlock2(r);
2870 }
2871 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2872
2873 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2874 {
2875 if (!i && !needflags)
2876 return;
2877 CLOBBER_SHRA;
2878 r=rmw(r,1,1);
2879 raw_shra_b_ri(r,i);
2880 unlock2(r);
2881 }
2882 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2883
2884 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2885 {
2886 if (isconst(r)) {
2887 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2888 return;
2889 }
2890 CLOBBER_SHRA;
2891 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2892 d=rmw(d,4,4);
2893 Dif (r!=1) {
2894 write_log("Illegal register %d in raw_rol_b\n",r);
2895 abort();
2896 }
2897 raw_shra_l_rr(d,r) ;
2898 unlock2(r);
2899 unlock2(d);
2900 }
2901 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2902
2903 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2904 { /* Can only do this with r==1, i.e. cl */
2905
2906 if (isconst(r)) {
2907 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2908 return;
2909 }
2910 CLOBBER_SHRA;
2911 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2912 d=rmw(d,2,2);
2913 Dif (r!=1) {
2914 write_log("Illegal register %d in raw_shra_b\n",r);
2915 abort();
2916 }
2917 raw_shra_w_rr(d,r) ;
2918 unlock2(r);
2919 unlock2(d);
2920 }
2921 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2922
2923 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2924 { /* Can only do this with r==1, i.e. cl */
2925
2926 if (isconst(r)) {
2927 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2928 return;
2929 }
2930
2931 CLOBBER_SHRA;
2932 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2933 d=rmw(d,1,1);
2934 Dif (r!=1) {
2935 write_log("Illegal register %d in raw_shra_b\n",r);
2936 abort();
2937 }
2938 raw_shra_b_rr(d,r) ;
2939 unlock2(r);
2940 unlock2(d);
2941 }
2942 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2943
2944
2945 MIDFUNC(2,setcc,(W1 d, IMM cc))
2946 {
2947 CLOBBER_SETCC;
2948 d=writereg(d,1);
2949 raw_setcc(d,cc);
2950 unlock2(d);
2951 }
2952 MENDFUNC(2,setcc,(W1 d, IMM cc))
2953
2954 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2955 {
2956 CLOBBER_SETCC;
2957 raw_setcc_m(d,cc);
2958 }
2959 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2960
2961 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2962 {
2963 if (d==s)
2964 return;
2965 CLOBBER_CMOV;
2966 s=readreg(s,4);
2967 d=rmw(d,4,4);
2968 raw_cmov_l_rr(d,s,cc);
2969 unlock2(s);
2970 unlock2(d);
2971 }
2972 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2973
2974 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2975 {
2976 CLOBBER_CMOV;
2977 d=rmw(d,4,4);
2978 raw_cmov_l_rm(d,s,cc);
2979 unlock2(d);
2980 }
2981 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2982
2983 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2984 {
2985 CLOBBER_BSF;
2986 s = readreg(s, 4);
2987 d = writereg(d, 4);
2988 raw_bsf_l_rr(d, s);
2989 unlock2(s);
2990 unlock2(d);
2991 }
2992 MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2993
2994 /* Set the Z flag depending on the value in s. Note that the
2995 value has to be 0 or -1 (or, more precisely, for non-zero
2996 values, bit 14 must be set)! */
2997 MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2998 {
2999 CLOBBER_BSF;
3000 s=rmw_specific(s,4,4,FLAG_NREG3);
3001 tmp=writereg(tmp,4);
3002 raw_flags_set_zero(s, tmp);
3003 unlock2(tmp);
3004 unlock2(s);
3005 }
3006 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3007
3008 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3009 {
3010 CLOBBER_MUL;
3011 s=readreg(s,4);
3012 d=rmw(d,4,4);
3013 raw_imul_32_32(d,s);
3014 unlock2(s);
3015 unlock2(d);
3016 }
3017 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3018
3019 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3020 {
3021 CLOBBER_MUL;
3022 s=rmw_specific(s,4,4,MUL_NREG2);
3023 d=rmw_specific(d,4,4,MUL_NREG1);
3024 raw_imul_64_32(d,s);
3025 unlock2(s);
3026 unlock2(d);
3027 }
3028 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3029
3030 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3031 {
3032 CLOBBER_MUL;
3033 s=rmw_specific(s,4,4,MUL_NREG2);
3034 d=rmw_specific(d,4,4,MUL_NREG1);
3035 raw_mul_64_32(d,s);
3036 unlock2(s);
3037 unlock2(d);
3038 }
3039 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3040
3041 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3042 {
3043 CLOBBER_MUL;
3044 s=readreg(s,4);
3045 d=rmw(d,4,4);
3046 raw_mul_32_32(d,s);
3047 unlock2(s);
3048 unlock2(d);
3049 }
3050 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3051
3052 #if SIZEOF_VOID_P == 8
3053 MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3054 {
3055 int isrmw;
3056
3057 if (isconst(s)) {
3058 set_const(d,(uae_s32)live.state[s].val);
3059 return;
3060 }
3061
3062 CLOBBER_SE32;
3063 isrmw=(s==d);
3064 if (!isrmw) {
3065 s=readreg(s,4);
3066 d=writereg(d,4);
3067 }
3068 else { /* If we try to lock this twice, with different sizes, we
3069 are int trouble! */
3070 s=d=rmw(s,4,4);
3071 }
3072 raw_sign_extend_32_rr(d,s);
3073 if (!isrmw) {
3074 unlock2(d);
3075 unlock2(s);
3076 }
3077 else {
3078 unlock2(s);
3079 }
3080 }
3081 MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3082 #endif
3083
3084 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3085 {
3086 int isrmw;
3087
3088 if (isconst(s)) {
3089 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3090 return;
3091 }
3092
3093 CLOBBER_SE16;
3094 isrmw=(s==d);
3095 if (!isrmw) {
3096 s=readreg(s,2);
3097 d=writereg(d,4);
3098 }
3099 else { /* If we try to lock this twice, with different sizes, we
3100 are int trouble! */
3101 s=d=rmw(s,4,2);
3102 }
3103 raw_sign_extend_16_rr(d,s);
3104 if (!isrmw) {
3105 unlock2(d);
3106 unlock2(s);
3107 }
3108 else {
3109 unlock2(s);
3110 }
3111 }
3112 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3113
3114 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3115 {
3116 int isrmw;
3117
3118 if (isconst(s)) {
3119 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3120 return;
3121 }
3122
3123 isrmw=(s==d);
3124 CLOBBER_SE8;
3125 if (!isrmw) {
3126 s=readreg(s,1);
3127 d=writereg(d,4);
3128 }
3129 else { /* If we try to lock this twice, with different sizes, we
3130 are int trouble! */
3131 s=d=rmw(s,4,1);
3132 }
3133
3134 raw_sign_extend_8_rr(d,s);
3135
3136 if (!isrmw) {
3137 unlock2(d);
3138 unlock2(s);
3139 }
3140 else {
3141 unlock2(s);
3142 }
3143 }
3144 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3145
3146
3147 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3148 {
3149 int isrmw;
3150
3151 if (isconst(s)) {
3152 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3153 return;
3154 }
3155
3156 isrmw=(s==d);
3157 CLOBBER_ZE16;
3158 if (!isrmw) {
3159 s=readreg(s,2);
3160 d=writereg(d,4);
3161 }
3162 else { /* If we try to lock this twice, with different sizes, we
3163 are int trouble! */
3164 s=d=rmw(s,4,2);
3165 }
3166 raw_zero_extend_16_rr(d,s);
3167 if (!isrmw) {
3168 unlock2(d);
3169 unlock2(s);
3170 }
3171 else {
3172 unlock2(s);
3173 }
3174 }
3175 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3176
3177 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3178 {
3179 int isrmw;
3180 if (isconst(s)) {
3181 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3182 return;
3183 }
3184
3185 isrmw=(s==d);
3186 CLOBBER_ZE8;
3187 if (!isrmw) {
3188 s=readreg(s,1);
3189 d=writereg(d,4);
3190 }
3191 else { /* If we try to lock this twice, with different sizes, we
3192 are int trouble! */
3193 s=d=rmw(s,4,1);
3194 }
3195
3196 raw_zero_extend_8_rr(d,s);
3197
3198 if (!isrmw) {
3199 unlock2(d);
3200 unlock2(s);
3201 }
3202 else {
3203 unlock2(s);
3204 }
3205 }
3206 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3207
3208 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3209 {
3210 if (d==s)
3211 return;
3212 if (isconst(s)) {
3213 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3214 return;
3215 }
3216
3217 CLOBBER_MOV;
3218 s=readreg(s,1);
3219 d=writereg(d,1);
3220 raw_mov_b_rr(d,s);
3221 unlock2(d);
3222 unlock2(s);
3223 }
3224 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3225
3226 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3227 {
3228 if (d==s)
3229 return;
3230 if (isconst(s)) {
3231 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3232 return;
3233 }
3234
3235 CLOBBER_MOV;
3236 s=readreg(s,2);
3237 d=writereg(d,2);
3238 raw_mov_w_rr(d,s);
3239 unlock2(d);
3240 unlock2(s);
3241 }
3242 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3243
3244
3245 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3246 {
3247 CLOBBER_MOV;
3248 baser=readreg(baser,4);
3249 index=readreg(index,4);
3250 d=writereg(d,4);
3251
3252 raw_mov_l_rrm_indexed(d,baser,index,factor);
3253 unlock2(d);
3254 unlock2(baser);
3255 unlock2(index);
3256 }
3257 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3258
3259 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3260 {
3261 CLOBBER_MOV;
3262 baser=readreg(baser,4);
3263 index=readreg(index,4);
3264 d=writereg(d,2);
3265
3266 raw_mov_w_rrm_indexed(d,baser,index,factor);
3267 unlock2(d);
3268 unlock2(baser);
3269 unlock2(index);
3270 }
3271 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3272
3273 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3274 {
3275 CLOBBER_MOV;
3276 baser=readreg(baser,4);
3277 index=readreg(index,4);
3278 d=writereg(d,1);
3279
3280 raw_mov_b_rrm_indexed(d,baser,index,factor);
3281
3282 unlock2(d);
3283 unlock2(baser);
3284 unlock2(index);
3285 }
3286 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3287
3288
3289 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3290 {
3291 CLOBBER_MOV;
3292 baser=readreg(baser,4);
3293 index=readreg(index,4);
3294 s=readreg(s,4);
3295
3296 Dif (baser==s || index==s)
3297 abort();
3298
3299
3300 raw_mov_l_mrr_indexed(baser,index,factor,s);
3301 unlock2(s);
3302 unlock2(baser);
3303 unlock2(index);
3304 }
3305 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3306
3307 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3308 {
3309 CLOBBER_MOV;
3310 baser=readreg(baser,4);
3311 index=readreg(index,4);
3312 s=readreg(s,2);
3313
3314 raw_mov_w_mrr_indexed(baser,index,factor,s);
3315 unlock2(s);
3316 unlock2(baser);
3317 unlock2(index);
3318 }
3319 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3320
3321 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3322 {
3323 CLOBBER_MOV;
3324 s=readreg(s,1);
3325 baser=readreg(baser,4);
3326 index=readreg(index,4);
3327
3328 raw_mov_b_mrr_indexed(baser,index,factor,s);
3329 unlock2(s);
3330 unlock2(baser);
3331 unlock2(index);
3332 }
3333 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3334
3335
3336 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3337 {
3338 int basereg=baser;
3339 int indexreg=index;
3340
3341 CLOBBER_MOV;
3342 s=readreg(s,4);
3343 baser=readreg_offset(baser,4);
3344 index=readreg_offset(index,4);
3345
3346 base+=get_offset(basereg);
3347 base+=factor*get_offset(indexreg);
3348
3349 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3350 unlock2(s);
3351 unlock2(baser);
3352 unlock2(index);
3353 }
3354 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3355
3356 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3357 {
3358 int basereg=baser;
3359 int indexreg=index;
3360
3361 CLOBBER_MOV;
3362 s=readreg(s,2);
3363 baser=readreg_offset(baser,4);
3364 index=readreg_offset(index,4);
3365
3366 base+=get_offset(basereg);
3367 base+=factor*get_offset(indexreg);
3368
3369 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3370 unlock2(s);
3371 unlock2(baser);
3372 unlock2(index);
3373 }
3374 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3375
3376 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3377 {
3378 int basereg=baser;
3379 int indexreg=index;
3380
3381 CLOBBER_MOV;
3382 s=readreg(s,1);
3383 baser=readreg_offset(baser,4);
3384 index=readreg_offset(index,4);
3385
3386 base+=get_offset(basereg);
3387 base+=factor*get_offset(indexreg);
3388
3389 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3390 unlock2(s);
3391 unlock2(baser);
3392 unlock2(index);
3393 }
3394 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3395
3396
3397
3398 /* Read a long from base+baser+factor*index */
3399 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3400 {
3401 int basereg=baser;
3402 int indexreg=index;
3403
3404 CLOBBER_MOV;
3405 baser=readreg_offset(baser,4);
3406 index=readreg_offset(index,4);
3407 base+=get_offset(basereg);
3408 base+=factor*get_offset(indexreg);
3409 d=writereg(d,4);
3410 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3411 unlock2(d);
3412 unlock2(baser);
3413 unlock2(index);
3414 }
3415 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3416
3417
3418 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3419 {
3420 int basereg=baser;
3421 int indexreg=index;
3422
3423 CLOBBER_MOV;
3424 remove_offset(d,-1);
3425 baser=readreg_offset(baser,4);
3426 index=readreg_offset(index,4);
3427 base+=get_offset(basereg);
3428 base+=factor*get_offset(indexreg);
3429 d=writereg(d,2);
3430 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3431 unlock2(d);
3432 unlock2(baser);
3433 unlock2(index);
3434 }
3435 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3436
3437
3438 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3439 {
3440 int basereg=baser;
3441 int indexreg=index;
3442
3443 CLOBBER_MOV;
3444 remove_offset(d,-1);
3445 baser=readreg_offset(baser,4);
3446 index=readreg_offset(index,4);
3447 base+=get_offset(basereg);
3448 base+=factor*get_offset(indexreg);
3449 d=writereg(d,1);
3450 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3451 unlock2(d);
3452 unlock2(baser);
3453 unlock2(index);
3454 }
3455 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3456
3457 /* Read a long from base+factor*index */
3458 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3459 {
3460 int indexreg=index;
3461
3462 if (isconst(index)) {
3463 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3464 return;
3465 }
3466
3467 CLOBBER_MOV;
3468 index=readreg_offset(index,4);
3469 base+=get_offset(indexreg)*factor;
3470 d=writereg(d,4);
3471
3472 raw_mov_l_rm_indexed(d,base,index,factor);
3473 unlock2(index);
3474 unlock2(d);
3475 }
3476 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3477
3478
3479 /* read the long at the address contained in s+offset and store in d */
3480 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3481 {
3482 if (isconst(s)) {
3483 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3484 return;
3485 }
3486 CLOBBER_MOV;
3487 s=readreg(s,4);
3488 d=writereg(d,4);
3489
3490 raw_mov_l_rR(d,s,offset);
3491 unlock2(d);
3492 unlock2(s);
3493 }
3494 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3495
3496 /* read the word at the address contained in s+offset and store in d */
3497 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3498 {
3499 if (isconst(s)) {
3500 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3501 return;
3502 }
3503 CLOBBER_MOV;
3504 s=readreg(s,4);
3505 d=writereg(d,2);
3506
3507 raw_mov_w_rR(d,s,offset);
3508 unlock2(d);
3509 unlock2(s);
3510 }
3511 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3512
3513 /* read the word at the address contained in s+offset and store in d */
3514 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3515 {
3516 if (isconst(s)) {
3517 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3518 return;
3519 }
3520 CLOBBER_MOV;
3521 s=readreg(s,4);
3522 d=writereg(d,1);
3523
3524 raw_mov_b_rR(d,s,offset);
3525 unlock2(d);
3526 unlock2(s);
3527 }
3528 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3529
3530 /* read the long at the address contained in s+offset and store in d */
3531 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3532 {
3533 int sreg=s;
3534 if (isconst(s)) {
3535 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3536 return;
3537 }
3538 CLOBBER_MOV;
3539 s=readreg_offset(s,4);
3540 offset+=get_offset(sreg);
3541 d=writereg(d,4);
3542
3543 raw_mov_l_brR(d,s,offset);
3544 unlock2(d);
3545 unlock2(s);
3546 }
3547 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3548
3549 /* read the word at the address contained in s+offset and store in d */
3550 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3551 {
3552 int sreg=s;
3553 if (isconst(s)) {
3554 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3555 return;
3556 }
3557 CLOBBER_MOV;
3558 remove_offset(d,-1);
3559 s=readreg_offset(s,4);
3560 offset+=get_offset(sreg);
3561 d=writereg(d,2);
3562
3563 raw_mov_w_brR(d,s,offset);
3564 unlock2(d);
3565 unlock2(s);
3566 }
3567 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3568
3569 /* read the word at the address contained in s+offset and store in d */
3570 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3571 {
3572 int sreg=s;
3573 if (isconst(s)) {
3574 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3575 return;
3576 }
3577 CLOBBER_MOV;
3578 remove_offset(d,-1);
3579 s=readreg_offset(s,4);
3580 offset+=get_offset(sreg);
3581 d=writereg(d,1);
3582
3583 raw_mov_b_brR(d,s,offset);
3584 unlock2(d);
3585 unlock2(s);
3586 }
3587 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3588
3589 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3590 {
3591 int dreg=d;
3592 if (isconst(d)) {
3593 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3594 return;
3595 }
3596
3597 CLOBBER_MOV;
3598 d=readreg_offset(d,4);
3599 offset+=get_offset(dreg);
3600 raw_mov_l_Ri(d,i,offset);
3601 unlock2(d);
3602 }
3603 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3604
3605 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3606 {
3607 int dreg=d;
3608 if (isconst(d)) {
3609 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3610 return;
3611 }
3612
3613 CLOBBER_MOV;
3614 d=readreg_offset(d,4);
3615 offset+=get_offset(dreg);
3616 raw_mov_w_Ri(d,i,offset);
3617 unlock2(d);
3618 }
3619 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3620
3621 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3622 {
3623 int dreg=d;
3624 if (isconst(d)) {
3625 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3626 return;
3627 }
3628
3629 CLOBBER_MOV;
3630 d=readreg_offset(d,4);
3631 offset+=get_offset(dreg);
3632 raw_mov_b_Ri(d,i,offset);
3633 unlock2(d);
3634 }
3635 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3636
3637 /* Warning! OFFSET is byte sized only! */
3638 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3639 {
3640 if (isconst(d)) {
3641 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3642 return;
3643 }
3644 if (isconst(s)) {
3645 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3646 return;
3647 }
3648
3649 CLOBBER_MOV;
3650 s=readreg(s,4);
3651 d=readreg(d,4);
3652
3653 raw_mov_l_Rr(d,s,offset);
3654 unlock2(d);
3655 unlock2(s);
3656 }
3657 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3658
3659 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3660 {
3661 if (isconst(d)) {
3662 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3663 return;
3664 }
3665 if (isconst(s)) {
3666 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3667 return;
3668 }
3669
3670 CLOBBER_MOV;
3671 s=readreg(s,2);
3672 d=readreg(d,4);
3673 raw_mov_w_Rr(d,s,offset);
3674 unlock2(d);
3675 unlock2(s);
3676 }
3677 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3678
3679 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3680 {
3681 if (isconst(d)) {
3682 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3683 return;
3684 }
3685 if (isconst(s)) {
3686 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3687 return;
3688 }
3689
3690 CLOBBER_MOV;
3691 s=readreg(s,1);
3692 d=readreg(d,4);
3693 raw_mov_b_Rr(d,s,offset);
3694 unlock2(d);
3695 unlock2(s);
3696 }
3697 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3698
3699 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3700 {
3701 if (isconst(s)) {
3702 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3703 return;
3704 }
3705 #if USE_OFFSET
3706 if (d==s) {
3707 add_offset(d,offset);
3708 return;
3709 }
3710 #endif
3711 CLOBBER_LEA;
3712 s=readreg(s,4);
3713 d=writereg(d,4);
3714 raw_lea_l_brr(d,s,offset);
3715 unlock2(d);
3716 unlock2(s);
3717 }
3718 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3719
3720 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3721 {
3722 if (!offset) {
3723 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3724 return;
3725 }
3726 CLOBBER_LEA;
3727 s=readreg(s,4);
3728 index=readreg(index,4);
3729 d=writereg(d,4);
3730
3731 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3732 unlock2(d);
3733 unlock2(index);
3734 unlock2(s);
3735 }
3736 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3737
3738 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3739 {
3740 CLOBBER_LEA;
3741 s=readreg(s,4);
3742 index=readreg(index,4);
3743 d=writereg(d,4);
3744
3745 raw_lea_l_rr_indexed(d,s,index,factor);
3746 unlock2(d);
3747 unlock2(index);
3748 unlock2(s);
3749 }
3750 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3751
3752 /* write d to the long at the address contained in s+offset */
3753 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3754 {
3755 int dreg=d;
3756 if (isconst(d)) {
3757 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3758 return;
3759 }
3760
3761 CLOBBER_MOV;
3762 s=readreg(s,4);
3763 d=readreg_offset(d,4);
3764 offset+=get_offset(dreg);
3765
3766 raw_mov_l_bRr(d,s,offset);
3767 unlock2(d);
3768 unlock2(s);
3769 }
3770 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3771
3772 /* write the word at the address contained in s+offset and store in d */
3773 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3774 {
3775 int dreg=d;
3776
3777 if (isconst(d)) {
3778 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3779 return;
3780 }
3781
3782 CLOBBER_MOV;
3783 s=readreg(s,2);
3784 d=readreg_offset(d,4);
3785 offset+=get_offset(dreg);
3786 raw_mov_w_bRr(d,s,offset);
3787 unlock2(d);
3788 unlock2(s);
3789 }
3790 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3791
3792 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3793 {
3794 int dreg=d;
3795 if (isconst(d)) {
3796 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3797 return;
3798 }
3799
3800 CLOBBER_MOV;
3801 s=readreg(s,1);
3802 d=readreg_offset(d,4);
3803 offset+=get_offset(dreg);
3804 raw_mov_b_bRr(d,s,offset);
3805 unlock2(d);
3806 unlock2(s);
3807 }
3808 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3809
3810 MIDFUNC(1,bswap_32,(RW4 r))
3811 {
3812 int reg=r;
3813
3814 if (isconst(r)) {
3815 uae_u32 oldv=live.state[r].val;
3816 live.state[r].val=reverse32(oldv);
3817 return;
3818 }
3819
3820 CLOBBER_SW32;
3821 r=rmw(r,4,4);
3822 raw_bswap_32(r);
3823 unlock2(r);
3824 }
3825 MENDFUNC(1,bswap_32,(RW4 r))
3826
3827 MIDFUNC(1,bswap_16,(RW2 r))
3828 {
3829 if (isconst(r)) {
3830 uae_u32 oldv=live.state[r].val;
3831 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3832 (oldv&0xffff0000);
3833 return;
3834 }
3835
3836 CLOBBER_SW16;
3837 r=rmw(r,2,2);
3838
3839 raw_bswap_16(r);
3840 unlock2(r);
3841 }
3842 MENDFUNC(1,bswap_16,(RW2 r))
3843
3844
3845
3846 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3847 {
3848 int olds;
3849
3850 if (d==s) { /* How pointless! */
3851 return;
3852 }
3853 if (isconst(s)) {
3854 COMPCALL(mov_l_ri)(d,live.state[s].val);
3855 return;
3856 }
3857 olds=s;
3858 disassociate(d);
3859 s=readreg_offset(s,4);
3860 live.state[d].realreg=s;
3861 live.state[d].realind=live.nat[s].nholds;
3862 live.state[d].val=live.state[olds].val;
3863 live.state[d].validsize=4;
3864 live.state[d].dirtysize=4;
3865 set_status(d,DIRTY);
3866
3867 live.nat[s].holds[live.nat[s].nholds]=d;
3868 live.nat[s].nholds++;
3869 log_clobberreg(d);
3870 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3871 d,s,live.state[d].realind,live.nat[s].nholds); */
3872 unlock2(s);
3873 }
3874 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3875
3876 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3877 {
3878 if (isconst(s)) {
3879 COMPCALL(mov_l_mi)(d,live.state[s].val);
3880 return;
3881 }
3882 CLOBBER_MOV;
3883 s=readreg(s,4);
3884
3885 raw_mov_l_mr(d,s);
3886 unlock2(s);
3887 }
3888 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3889
3890
3891 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3892 {
3893 if (isconst(s)) {
3894 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3895 return;
3896 }
3897 CLOBBER_MOV;
3898 s=readreg(s,2);
3899
3900 raw_mov_w_mr(d,s);
3901 unlock2(s);
3902 }
3903 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3904
3905 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3906 {
3907 CLOBBER_MOV;
3908 d=writereg(d,2);
3909
3910 raw_mov_w_rm(d,s);
3911 unlock2(d);
3912 }
3913 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3914
3915 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3916 {
3917 if (isconst(s)) {
3918 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3919 return;
3920 }
3921
3922 CLOBBER_MOV;
3923 s=readreg(s,1);
3924
3925 raw_mov_b_mr(d,s);
3926 unlock2(s);
3927 }
3928 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3929
3930 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3931 {
3932 CLOBBER_MOV;
3933 d=writereg(d,1);
3934
3935 raw_mov_b_rm(d,s);
3936 unlock2(d);
3937 }
3938 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3939
3940 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3941 {
3942 set_const(d,s);
3943 return;
3944 }
3945 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3946
3947 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3948 {
3949 CLOBBER_MOV;
3950 d=writereg(d,2);
3951
3952 raw_mov_w_ri(d,s);
3953 unlock2(d);
3954 }
3955 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3956
3957 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3958 {
3959 CLOBBER_MOV;
3960 d=writereg(d,1);
3961
3962 raw_mov_b_ri(d,s);
3963 unlock2(d);
3964 }
3965 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3966
3967
3968 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3969 {
3970 CLOBBER_ADD;
3971 raw_add_l_mi(d,s) ;
3972 }
3973 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3974
3975 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3976 {
3977 CLOBBER_ADD;
3978 raw_add_w_mi(d,s) ;
3979 }
3980 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3981
3982 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3983 {
3984 CLOBBER_ADD;
3985 raw_add_b_mi(d,s) ;
3986 }
3987 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3988
3989
3990 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3991 {
3992 CLOBBER_TEST;
3993 d=readreg(d,4);
3994
3995 raw_test_l_ri(d,i);
3996 unlock2(d);
3997 }
3998 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3999
4000 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
4001 {
4002 CLOBBER_TEST;
4003 d=readreg(d,4);
4004 s=readreg(s,4);
4005
4006 raw_test_l_rr(d,s);;
4007 unlock2(d);
4008 unlock2(s);
4009 }
4010 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4011
4012 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4013 {
4014 CLOBBER_TEST;
4015 d=readreg(d,2);
4016 s=readreg(s,2);
4017
4018 raw_test_w_rr(d,s);
4019 unlock2(d);
4020 unlock2(s);
4021 }
4022 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4023
4024 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4025 {
4026 CLOBBER_TEST;
4027 d=readreg(d,1);
4028 s=readreg(s,1);
4029
4030 raw_test_b_rr(d,s);
4031 unlock2(d);
4032 unlock2(s);
4033 }
4034 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4035
4036
4037 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4038 {
4039 if (isconst(d) && !needflags) {
4040 live.state[d].val &= i;
4041 return;
4042 }
4043
4044 CLOBBER_AND;
4045 d=rmw(d,4,4);
4046
4047 raw_and_l_ri(d,i);
4048 unlock2(d);
4049 }
4050 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4051
4052 MIDFUNC(2,and_l,(RW4 d, R4 s))
4053 {
4054 CLOBBER_AND;
4055 s=readreg(s,4);
4056 d=rmw(d,4,4);
4057
4058 raw_and_l(d,s);
4059 unlock2(d);
4060 unlock2(s);
4061 }
4062 MENDFUNC(2,and_l,(RW4 d, R4 s))
4063
4064 MIDFUNC(2,and_w,(RW2 d, R2 s))
4065 {
4066 CLOBBER_AND;
4067 s=readreg(s,2);
4068 d=rmw(d,2,2);
4069
4070 raw_and_w(d,s);
4071 unlock2(d);
4072 unlock2(s);
4073 }
4074 MENDFUNC(2,and_w,(RW2 d, R2 s))
4075
4076 MIDFUNC(2,and_b,(RW1 d, R1 s))
4077 {
4078 CLOBBER_AND;
4079 s=readreg(s,1);
4080 d=rmw(d,1,1);
4081
4082 raw_and_b(d,s);
4083 unlock2(d);
4084 unlock2(s);
4085 }
4086 MENDFUNC(2,and_b,(RW1 d, R1 s))
4087
4088 // gb-- used for making an fpcr value in compemu_fpp.cpp
4089 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4090 {
4091 CLOBBER_OR;
4092 d=rmw(d,4,4);
4093
4094 raw_or_l_rm(d,s);
4095 unlock2(d);
4096 }
4097 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4098
4099 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4100 {
4101 if (isconst(d) && !needflags) {
4102 live.state[d].val|=i;
4103 return;
4104 }
4105 CLOBBER_OR;
4106 d=rmw(d,4,4);
4107
4108 raw_or_l_ri(d,i);
4109 unlock2(d);
4110 }
4111 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4112
4113 MIDFUNC(2,or_l,(RW4 d, R4 s))
4114 {
4115 if (isconst(d) && isconst(s) && !needflags) {
4116 live.state[d].val|=live.state[s].val;
4117 return;
4118 }
4119 CLOBBER_OR;
4120 s=readreg(s,4);
4121 d=rmw(d,4,4);
4122
4123 raw_or_l(d,s);
4124 unlock2(d);
4125 unlock2(s);
4126 }
4127 MENDFUNC(2,or_l,(RW4 d, R4 s))
4128
4129 MIDFUNC(2,or_w,(RW2 d, R2 s))
4130 {
4131 CLOBBER_OR;
4132 s=readreg(s,2);
4133 d=rmw(d,2,2);
4134
4135 raw_or_w(d,s);
4136 unlock2(d);
4137 unlock2(s);
4138 }
4139 MENDFUNC(2,or_w,(RW2 d, R2 s))
4140
4141 MIDFUNC(2,or_b,(RW1 d, R1 s))
4142 {
4143 CLOBBER_OR;
4144 s=readreg(s,1);
4145 d=rmw(d,1,1);
4146
4147 raw_or_b(d,s);
4148 unlock2(d);
4149 unlock2(s);
4150 }
4151 MENDFUNC(2,or_b,(RW1 d, R1 s))
4152
4153 MIDFUNC(2,adc_l,(RW4 d, R4 s))
4154 {
4155 CLOBBER_ADC;
4156 s=readreg(s,4);
4157 d=rmw(d,4,4);
4158
4159 raw_adc_l(d,s);
4160
4161 unlock2(d);
4162 unlock2(s);
4163 }
4164 MENDFUNC(2,adc_l,(RW4 d, R4 s))
4165
4166 MIDFUNC(2,adc_w,(RW2 d, R2 s))
4167 {
4168 CLOBBER_ADC;
4169 s=readreg(s,2);
4170 d=rmw(d,2,2);
4171
4172 raw_adc_w(d,s);
4173 unlock2(d);
4174 unlock2(s);
4175 }
4176 MENDFUNC(2,adc_w,(RW2 d, R2 s))
4177
4178 MIDFUNC(2,adc_b,(RW1 d, R1 s))
4179 {
4180 CLOBBER_ADC;
4181 s=readreg(s,1);
4182 d=rmw(d,1,1);
4183
4184 raw_adc_b(d,s);
4185 unlock2(d);
4186 unlock2(s);
4187 }
4188 MENDFUNC(2,adc_b,(RW1 d, R1 s))
4189
4190 MIDFUNC(2,add_l,(RW4 d, R4 s))
4191 {
4192 if (isconst(s)) {
4193 COMPCALL(add_l_ri)(d,live.state[s].val);
4194 return;
4195 }
4196
4197 CLOBBER_ADD;
4198 s=readreg(s,4);
4199 d=rmw(d,4,4);
4200
4201 raw_add_l(d,s);
4202
4203 unlock2(d);
4204 unlock2(s);
4205 }
4206 MENDFUNC(2,add_l,(RW4 d, R4 s))
4207
4208 MIDFUNC(2,add_w,(RW2 d, R2 s))
4209 {
4210 if (isconst(s)) {
4211 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4212 return;
4213 }
4214
4215 CLOBBER_ADD;
4216 s=readreg(s,2);
4217 d=rmw(d,2,2);
4218
4219 raw_add_w(d,s);
4220 unlock2(d);
4221 unlock2(s);
4222 }
4223 MENDFUNC(2,add_w,(RW2 d, R2 s))
4224
4225 MIDFUNC(2,add_b,(RW1 d, R1 s))
4226 {
4227 if (isconst(s)) {
4228 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4229 return;
4230 }
4231
4232 CLOBBER_ADD;
4233 s=readreg(s,1);
4234 d=rmw(d,1,1);
4235
4236 raw_add_b(d,s);
4237 unlock2(d);
4238 unlock2(s);
4239 }
4240 MENDFUNC(2,add_b,(RW1 d, R1 s))
4241
4242 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4243 {
4244 if (!i && !needflags)
4245 return;
4246 if (isconst(d) && !needflags) {
4247 live.state[d].val-=i;
4248 return;
4249 }
4250 #if USE_OFFSET
4251 if (!needflags) {
4252 add_offset(d,-i);
4253 return;
4254 }
4255 #endif
4256
4257 CLOBBER_SUB;
4258 d=rmw(d,4,4);
4259
4260 raw_sub_l_ri(d,i);
4261 unlock2(d);
4262 }
4263 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4264
4265 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4266 {
4267 if (!i && !needflags)
4268 return;
4269
4270 CLOBBER_SUB;
4271 d=rmw(d,2,2);
4272
4273 raw_sub_w_ri(d,i);
4274 unlock2(d);
4275 }
4276 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4277
4278 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4279 {
4280 if (!i && !needflags)
4281 return;
4282
4283 CLOBBER_SUB;
4284 d=rmw(d,1,1);
4285
4286 raw_sub_b_ri(d,i);
4287
4288 unlock2(d);
4289 }
4290 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4291
4292 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4293 {
4294 if (!i && !needflags)
4295 return;
4296 if (isconst(d) && !needflags) {
4297 live.state[d].val+=i;
4298 return;
4299 }
4300 #if USE_OFFSET
4301 if (!needflags) {
4302 add_offset(d,i);
4303 return;
4304 }
4305 #endif
4306 CLOBBER_ADD;
4307 d=rmw(d,4,4);
4308 raw_add_l_ri(d,i);
4309 unlock2(d);
4310 }
4311 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4312
4313 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4314 {
4315 if (!i && !needflags)
4316 return;
4317
4318 CLOBBER_ADD;
4319 d=rmw(d,2,2);
4320
4321 raw_add_w_ri(d,i);
4322 unlock2(d);
4323 }
4324 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4325
4326 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4327 {
4328 if (!i && !needflags)
4329 return;
4330
4331 CLOBBER_ADD;
4332 d=rmw(d,1,1);
4333
4334 raw_add_b_ri(d,i);
4335
4336 unlock2(d);
4337 }
4338 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4339
4340 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4341 {
4342 CLOBBER_SBB;
4343 s=readreg(s,4);
4344 d=rmw(d,4,4);
4345
4346 raw_sbb_l(d,s);
4347 unlock2(d);
4348 unlock2(s);
4349 }
4350 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4351
4352 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4353 {
4354 CLOBBER_SBB;
4355 s=readreg(s,2);
4356 d=rmw(d,2,2);
4357
4358 raw_sbb_w(d,s);
4359 unlock2(d);
4360 unlock2(s);
4361 }
4362 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4363
4364 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4365 {
4366 CLOBBER_SBB;
4367 s=readreg(s,1);
4368 d=rmw(d,1,1);
4369
4370 raw_sbb_b(d,s);
4371 unlock2(d);
4372 unlock2(s);
4373 }
4374 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4375
4376 MIDFUNC(2,sub_l,(RW4 d, R4 s))
4377 {
4378 if (isconst(s)) {
4379 COMPCALL(sub_l_ri)(d,live.state[s].val);
4380 return;
4381 }
4382
4383 CLOBBER_SUB;
4384 s=readreg(s,4);
4385 d=rmw(d,4,4);
4386
4387 raw_sub_l(d,s);
4388 unlock2(d);
4389 unlock2(s);
4390 }
4391 MENDFUNC(2,sub_l,(RW4 d, R4 s))
4392
4393 MIDFUNC(2,sub_w,(RW2 d, R2 s))
4394 {
4395 if (isconst(s)) {
4396 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4397 return;
4398 }
4399
4400 CLOBBER_SUB;
4401 s=readreg(s,2);
4402 d=rmw(d,2,2);
4403
4404 raw_sub_w(d,s);
4405 unlock2(d);
4406 unlock2(s);
4407 }
4408 MENDFUNC(2,sub_w,(RW2 d, R2 s))
4409
4410 MIDFUNC(2,sub_b,(RW1 d, R1 s))
4411 {
4412 if (isconst(s)) {
4413 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4414 return;
4415 }
4416
4417 CLOBBER_SUB;
4418 s=readreg(s,1);
4419 d=rmw(d,1,1);
4420
4421 raw_sub_b(d,s);
4422 unlock2(d);
4423 unlock2(s);
4424 }
4425 MENDFUNC(2,sub_b,(RW1 d, R1 s))
4426
4427 MIDFUNC(2,cmp_l,(R4 d, R4 s))
4428 {
4429 CLOBBER_CMP;
4430 s=readreg(s,4);
4431 d=readreg(d,4);
4432
4433 raw_cmp_l(d,s);
4434 unlock2(d);
4435 unlock2(s);
4436 }
4437 MENDFUNC(2,cmp_l,(R4 d, R4 s))
4438
4439 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4440 {
4441 CLOBBER_CMP;
4442 r=readreg(r,4);
4443
4444 raw_cmp_l_ri(r,i);
4445 unlock2(r);
4446 }
4447 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4448
4449 MIDFUNC(2,cmp_w,(R2 d, R2 s))
4450 {
4451 CLOBBER_CMP;
4452 s=readreg(s,2);
4453 d=readreg(d,2);
4454
4455 raw_cmp_w(d,s);
4456 unlock2(d);
4457 unlock2(s);
4458 }
4459 MENDFUNC(2,cmp_w,(R2 d, R2 s))
4460
4461 MIDFUNC(2,cmp_b,(R1 d, R1 s))
4462 {
4463 CLOBBER_CMP;
4464 s=readreg(s,1);
4465 d=readreg(d,1);
4466
4467 raw_cmp_b(d,s);
4468 unlock2(d);
4469 unlock2(s);
4470 }
4471 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4472
4473
4474 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4475 {
4476 CLOBBER_XOR;
4477 s=readreg(s,4);
4478 d=rmw(d,4,4);
4479
4480 raw_xor_l(d,s);
4481 unlock2(d);
4482 unlock2(s);
4483 }
4484 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4485
4486 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4487 {
4488 CLOBBER_XOR;
4489 s=readreg(s,2);
4490 d=rmw(d,2,2);
4491
4492 raw_xor_w(d,s);
4493 unlock2(d);
4494 unlock2(s);
4495 }
4496 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4497
4498 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4499 {
4500 CLOBBER_XOR;
4501 s=readreg(s,1);
4502 d=rmw(d,1,1);
4503
4504 raw_xor_b(d,s);
4505 unlock2(d);
4506 unlock2(s);
4507 }
4508 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4509
4510 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4511 {
4512 clobber_flags();
4513 remove_all_offsets();
4514 if (osize==4) {
4515 if (out1!=in1 && out1!=r) {
4516 COMPCALL(forget_about)(out1);
4517 }
4518 }
4519 else {
4520 tomem_c(out1);
4521 }
4522
4523 in1=readreg_specific(in1,isize,REG_PAR1);
4524 r=readreg(r,4);
4525 prepare_for_call_1(); /* This should ensure that there won't be
4526 any need for swapping nregs in prepare_for_call_2
4527 */
4528 #if USE_NORMAL_CALLING_CONVENTION
4529 raw_push_l_r(in1);
4530 #endif
4531 unlock2(in1);
4532 unlock2(r);
4533
4534 prepare_for_call_2();
4535 raw_call_r(r);
4536
4537 #if USE_NORMAL_CALLING_CONVENTION
4538 raw_inc_sp(4);
4539 #endif
4540
4541
4542 live.nat[REG_RESULT].holds[0]=out1;
4543 live.nat[REG_RESULT].nholds=1;
4544 live.nat[REG_RESULT].touched=touchcnt++;
4545
4546 live.state[out1].realreg=REG_RESULT;
4547 live.state[out1].realind=0;
4548 live.state[out1].val=0;
4549 live.state[out1].validsize=osize;
4550 live.state[out1].dirtysize=osize;
4551 set_status(out1,DIRTY);
4552 }
4553 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4554
4555 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4556 {
4557 clobber_flags();
4558 remove_all_offsets();
4559 in1=readreg_specific(in1,isize1,REG_PAR1);
4560 in2=readreg_specific(in2,isize2,REG_PAR2);
4561 r=readreg(r,4);
4562 prepare_for_call_1(); /* This should ensure that there won't be
4563 any need for swapping nregs in prepare_for_call_2
4564 */
4565 #if USE_NORMAL_CALLING_CONVENTION
4566 raw_push_l_r(in2);
4567 raw_push_l_r(in1);
4568 #endif
4569 unlock2(r);
4570 unlock2(in1);
4571 unlock2(in2);
4572 prepare_for_call_2();
4573 raw_call_r(r);
4574 #if USE_NORMAL_CALLING_CONVENTION
4575 raw_inc_sp(8);
4576 #endif
4577 }
4578 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4579
4580 /* forget_about() takes a mid-layer register */
4581 MIDFUNC(1,forget_about,(W4 r))
4582 {
4583 if (isinreg(r))
4584 disassociate(r);
4585 live.state[r].val=0;
4586 set_status(r,UNDEF);
4587 }
4588 MENDFUNC(1,forget_about,(W4 r))
4589
4590 MIDFUNC(0,nop,(void))
4591 {
4592 raw_nop();
4593 }
4594 MENDFUNC(0,nop,(void))
4595
4596
4597 MIDFUNC(1,f_forget_about,(FW r))
4598 {
4599 if (f_isinreg(r))
4600 f_disassociate(r);
4601 live.fate[r].status=UNDEF;
4602 }
4603 MENDFUNC(1,f_forget_about,(FW r))
4604
4605 MIDFUNC(1,fmov_pi,(FW r))
4606 {
4607 r=f_writereg(r);
4608 raw_fmov_pi(r);
4609 f_unlock(r);
4610 }
4611 MENDFUNC(1,fmov_pi,(FW r))
4612
4613 MIDFUNC(1,fmov_log10_2,(FW r))
4614 {
4615 r=f_writereg(r);
4616 raw_fmov_log10_2(r);
4617 f_unlock(r);
4618 }
4619 MENDFUNC(1,fmov_log10_2,(FW r))
4620
4621 MIDFUNC(1,fmov_log2_e,(FW r))
4622 {
4623 r=f_writereg(r);
4624 raw_fmov_log2_e(r);
4625 f_unlock(r);
4626 }
4627 MENDFUNC(1,fmov_log2_e,(FW r))
4628
4629 MIDFUNC(1,fmov_loge_2,(FW r))
4630 {
4631 r=f_writereg(r);
4632 raw_fmov_loge_2(r);
4633 f_unlock(r);
4634 }
4635 MENDFUNC(1,fmov_loge_2,(FW r))
4636
4637 MIDFUNC(1,fmov_1,(FW r))
4638 {
4639 r=f_writereg(r);
4640 raw_fmov_1(r);
4641 f_unlock(r);
4642 }
4643 MENDFUNC(1,fmov_1,(FW r))
4644
4645 MIDFUNC(1,fmov_0,(FW r))
4646 {
4647 r=f_writereg(r);
4648 raw_fmov_0(r);
4649 f_unlock(r);
4650 }
4651 MENDFUNC(1,fmov_0,(FW r))
4652
4653 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4654 {
4655 r=f_writereg(r);
4656 raw_fmov_rm(r,m);
4657 f_unlock(r);
4658 }
4659 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4660
4661 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4662 {
4663 r=f_writereg(r);
4664 raw_fmovi_rm(r,m);
4665 f_unlock(r);
4666 }
4667 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4668
4669 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4670 {
4671 r=f_readreg(r);
4672 raw_fmovi_mr(m,r);
4673 f_unlock(r);
4674 }
4675 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4676
4677 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4678 {
4679 r=f_writereg(r);
4680 raw_fmovs_rm(r,m);
4681 f_unlock(r);
4682 }
4683 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4684
4685 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4686 {
4687 r=f_readreg(r);
4688 raw_fmovs_mr(m,r);
4689 f_unlock(r);
4690 }
4691 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4692
4693 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4694 {
4695 r=f_readreg(r);
4696 raw_fmov_ext_mr(m,r);
4697 f_unlock(r);
4698 }
4699 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4700
4701 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4702 {
4703 r=f_readreg(r);
4704 raw_fmov_mr(m,r);
4705 f_unlock(r);
4706 }
4707 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4708
4709 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4710 {
4711 r=f_writereg(r);
4712 raw_fmov_ext_rm(r,m);
4713 f_unlock(r);
4714 }
4715 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4716
4717 MIDFUNC(2,fmov_rr,(FW d, FR s))
4718 {
4719 if (d==s) { /* How pointless! */
4720 return;
4721 }
4722 #if USE_F_ALIAS
4723 f_disassociate(d);
4724 s=f_readreg(s);
4725 live.fate[d].realreg=s;
4726 live.fate[d].realind=live.fat[s].nholds;
4727 live.fate[d].status=DIRTY;
4728 live.fat[s].holds[live.fat[s].nholds]=d;
4729 live.fat[s].nholds++;
4730 f_unlock(s);
4731 #else
4732 s=f_readreg(s);
4733 d=f_writereg(d);
4734 raw_fmov_rr(d,s);
4735 f_unlock(s);
4736 f_unlock(d);
4737 #endif
4738 }
4739 MENDFUNC(2,fmov_rr,(FW d, FR s))
4740
4741 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4742 {
4743 index=readreg(index,4);
4744
4745 raw_fldcw_m_indexed(index,base);
4746 unlock2(index);
4747 }
4748 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4749
4750 MIDFUNC(1,ftst_r,(FR r))
4751 {
4752 r=f_readreg(r);
4753 raw_ftst_r(r);
4754 f_unlock(r);
4755 }
4756 MENDFUNC(1,ftst_r,(FR r))
4757
4758 MIDFUNC(0,dont_care_fflags,(void))
4759 {
4760 f_disassociate(FP_RESULT);
4761 }
4762 MENDFUNC(0,dont_care_fflags,(void))
4763
4764 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4765 {
4766 s=f_readreg(s);
4767 d=f_writereg(d);
4768 raw_fsqrt_rr(d,s);
4769 f_unlock(s);
4770 f_unlock(d);
4771 }
4772 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4773
4774 MIDFUNC(2,fabs_rr,(FW d, FR s))
4775 {
4776 s=f_readreg(s);
4777 d=f_writereg(d);
4778 raw_fabs_rr(d,s);
4779 f_unlock(s);
4780 f_unlock(d);
4781 }
4782 MENDFUNC(2,fabs_rr,(FW d, FR s))
4783
4784 MIDFUNC(2,fsin_rr,(FW d, FR s))
4785 {
4786 s=f_readreg(s);
4787 d=f_writereg(d);
4788 raw_fsin_rr(d,s);
4789 f_unlock(s);
4790 f_unlock(d);
4791 }
4792 MENDFUNC(2,fsin_rr,(FW d, FR s))
4793
4794 MIDFUNC(2,fcos_rr,(FW d, FR s))
4795 {
4796 s=f_readreg(s);
4797 d=f_writereg(d);
4798 raw_fcos_rr(d,s);
4799 f_unlock(s);
4800 f_unlock(d);
4801 }
4802 MENDFUNC(2,fcos_rr,(FW d, FR s))
4803
4804 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4805 {
4806 s=f_readreg(s);
4807 d=f_writereg(d);
4808 raw_ftwotox_rr(d,s);
4809 f_unlock(s);
4810 f_unlock(d);
4811 }
4812 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4813
4814 MIDFUNC(2,fetox_rr,(FW d, FR s))
4815 {
4816 s=f_readreg(s);
4817 d=f_writereg(d);
4818 raw_fetox_rr(d,s);
4819 f_unlock(s);
4820 f_unlock(d);
4821 }
4822 MENDFUNC(2,fetox_rr,(FW d, FR s))
4823
4824 MIDFUNC(2,frndint_rr,(FW d, FR s))
4825 {
4826 s=f_readreg(s);
4827 d=f_writereg(d);
4828 raw_frndint_rr(d,s);
4829 f_unlock(s);
4830 f_unlock(d);
4831 }
4832 MENDFUNC(2,frndint_rr,(FW d, FR s))
4833
4834 MIDFUNC(2,flog2_rr,(FW d, FR s))
4835 {
4836 s=f_readreg(s);
4837 d=f_writereg(d);
4838 raw_flog2_rr(d,s);
4839 f_unlock(s);
4840 f_unlock(d);
4841 }
4842 MENDFUNC(2,flog2_rr,(FW d, FR s))
4843
4844 MIDFUNC(2,fneg_rr,(FW d, FR s))
4845 {
4846 s=f_readreg(s);
4847 d=f_writereg(d);
4848 raw_fneg_rr(d,s);
4849 f_unlock(s);
4850 f_unlock(d);
4851 }
4852 MENDFUNC(2,fneg_rr,(FW d, FR s))
4853
4854 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4855 {
4856 s=f_readreg(s);
4857 d=f_rmw(d);
4858 raw_fadd_rr(d,s);
4859 f_unlock(s);
4860 f_unlock(d);
4861 }
4862 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4863
4864 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4865 {
4866 s=f_readreg(s);
4867 d=f_rmw(d);
4868 raw_fsub_rr(d,s);
4869 f_unlock(s);
4870 f_unlock(d);
4871 }
4872 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4873
4874 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4875 {
4876 d=f_readreg(d);
4877 s=f_readreg(s);
4878 raw_fcmp_rr(d,s);
4879 f_unlock(s);
4880 f_unlock(d);
4881 }
4882 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4883
4884 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4885 {
4886 s=f_readreg(s);
4887 d=f_rmw(d);
4888 raw_fdiv_rr(d,s);
4889 f_unlock(s);
4890 f_unlock(d);
4891 }
4892 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4893
4894 MIDFUNC(2,frem_rr,(FRW d, FR s))
4895 {
4896 s=f_readreg(s);
4897 d=f_rmw(d);
4898 raw_frem_rr(d,s);
4899 f_unlock(s);
4900 f_unlock(d);
4901 }
4902 MENDFUNC(2,frem_rr,(FRW d, FR s))
4903
4904 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4905 {
4906 s=f_readreg(s);
4907 d=f_rmw(d);
4908 raw_frem1_rr(d,s);
4909 f_unlock(s);
4910 f_unlock(d);
4911 }
4912 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4913
4914 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4915 {
4916 s=f_readreg(s);
4917 d=f_rmw(d);
4918 raw_fmul_rr(d,s);
4919 f_unlock(s);
4920 f_unlock(d);
4921 }
4922 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4923
4924 /********************************************************************
4925 * Support functions exposed to gencomp. CREATE time *
4926 ********************************************************************/
4927
4928 void set_zero(int r, int tmp)
4929 {
4930 if (setzflg_uses_bsf)
4931 bsf_l_rr(r,r);
4932 else
4933 simulate_bsf(tmp,r);
4934 }
4935
4936 int kill_rodent(int r)
4937 {
4938 return KILLTHERAT &&
4939 have_rat_stall &&
4940 (live.state[r].status==INMEM ||
4941 live.state[r].status==CLEAN ||
4942 live.state[r].status==ISCONST ||
4943 live.state[r].dirtysize==4);
4944 }
4945
4946 uae_u32 get_const(int r)
4947 {
4948 Dif (!isconst(r)) {
4949 write_log("Register %d should be constant, but isn't\n",r);
4950 abort();
4951 }
4952 return live.state[r].val;
4953 }
4954
4955 void sync_m68k_pc(void)
4956 {
4957 if (m68k_pc_offset) {
4958 add_l_ri(PC_P,m68k_pc_offset);
4959 comp_pc_p+=m68k_pc_offset;
4960 m68k_pc_offset=0;
4961 }
4962 }
4963
4964 /********************************************************************
4965 * Scratch registers management *
4966 ********************************************************************/
4967
4968 struct scratch_t {
4969 uae_u32 regs[VREGS];
4970 fpu_register fregs[VFREGS];
4971 };
4972
4973 static scratch_t scratch;
4974
4975 /********************************************************************
4976 * Support functions exposed to newcpu *
4977 ********************************************************************/
4978
4979 static inline const char *str_on_off(bool b)
4980 {
4981 return b ? "on" : "off";
4982 }
4983
4984 void compiler_init(void)
4985 {
4986 static bool initialized = false;
4987 if (initialized)
4988 return;
4989
4990 #if JIT_DEBUG
4991 // JIT debug mode ?
4992 JITDebug = PrefsFindBool("jitdebug");
4993 #endif
4994 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4995
4996 #ifdef USE_JIT_FPU
4997 // Use JIT compiler for FPU instructions ?
4998 avoid_fpu = !PrefsFindBool("jitfpu");
4999 #else
5000 // JIT FPU is always disabled
5001 avoid_fpu = true;
5002 #endif
5003 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
5004
5005 // Get size of the translation cache (in KB)
5006 cache_size = PrefsFindInt32("jitcachesize");
5007 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5008
5009 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5010 raw_init_cpu();
5011 setzflg_uses_bsf = target_check_bsf();
5012 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5013 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5014 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5015
5016 // Translation cache flush mechanism
5017 lazy_flush = PrefsFindBool("jitlazyflush");
5018 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5019 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5020
5021 // Compiler features
5022 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5023 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5024 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5025 #if USE_INLINING
5026 follow_const_jumps = PrefsFindBool("jitinline");
5027 #endif
5028 write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
5029 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5030
5031 // Build compiler tables
5032 build_comp();
5033
5034 initialized = true;
5035
5036 #if PROFILE_UNTRANSLATED_INSNS
5037 write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5038 #endif
5039
5040 #if PROFILE_COMPILE_TIME
5041 write_log("<JIT compiler> : gather statistics on translation time\n");
5042 emul_start_time = clock();
5043 #endif
5044 }
5045
5046 void compiler_exit(void)
5047 {
5048 #if PROFILE_COMPILE_TIME
5049 emul_end_time = clock();
5050 #endif
5051
5052 // Deallocate translation cache
5053 if (compiled_code) {
5054 vm_release(compiled_code, cache_size * 1024);
5055 compiled_code = 0;
5056 }
5057
5058 // Deallocate popallspace
5059 if (popallspace) {
5060 vm_release(popallspace, POPALLSPACE_SIZE);
5061 popallspace = 0;
5062 }
5063
5064 #if PROFILE_COMPILE_TIME
5065 write_log("### Compile Block statistics\n");
5066 write_log("Number of calls to compile_block : %d\n", compile_count);
5067 uae_u32 emul_time = emul_end_time - emul_start_time;
5068 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5069 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5070 100.0*double(compile_time)/double(emul_time));
5071 write_log("\n");
5072 #endif
5073
5074 #if PROFILE_UNTRANSLATED_INSNS
5075 uae_u64 untranslated_count = 0;
5076 for (int i = 0; i < 65536; i++) {
5077 opcode_nums[i] = i;
5078 untranslated_count += raw_cputbl_count[i];
5079 }
5080 write_log("Sorting out untranslated instructions count...\n");
5081 qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5082 write_log("\nRank Opc Count Name\n");
5083 for (int i = 0; i < untranslated_top_ten; i++) {
5084 uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5085 struct instr *dp;
5086 struct mnemolookup *lookup;
5087 if (!count)
5088 break;
5089 dp = table68k + opcode_nums[i];
5090 for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5091 ;
5092 write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5093 }
5094 #endif
5095
5096 #if RECORD_REGISTER_USAGE
5097 int reg_count_ids[16];
5098 uint64 tot_reg_count = 0;
5099 for (int i = 0; i < 16; i++) {
5100 reg_count_ids[i] = i;
5101 tot_reg_count += reg_count[i];
5102 }
5103 qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5104 uint64 cum_reg_count = 0;
5105 for (int i = 0; i < 16; i++) {
5106 int r = reg_count_ids[i];
5107 cum_reg_count += reg_count[r];
5108 printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5109 reg_count[r],
5110 100.0*double(reg_count[r])/double(tot_reg_count),
5111 100.0*double(cum_reg_count)/double(tot_reg_count));
5112 }
5113 #endif
5114 }
5115
5116 bool compiler_use_jit(void)
5117 {
5118 // Check for the "jit" prefs item
5119 if (!PrefsFindBool("jit"))
5120 return false;
5121
5122 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5123 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5124 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5125 return false;
5126 }
5127
5128 // FIXME: there are currently problems with JIT compilation and anything below a 68040
5129 if (CPUType < 4) {
5130 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5131 return false;
5132 }
5133
5134 return true;
5135 }
5136
5137 void init_comp(void)
5138 {
5139 int i;
5140 uae_s8* cb=can_byte;
5141 uae_s8* cw=can_word;
5142 uae_s8* au=always_used;
5143
5144 #if RECORD_REGISTER_USAGE
5145 for (i=0;i<16;i++)
5146 reg_count_local[i] = 0;
5147 #endif
5148
5149 for (i=0;i<VREGS;i++) {
5150 live.state[i].realreg=-1;
5151 live.state[i].needflush=NF_SCRATCH;
5152 live.state[i].val=0;
5153 set_status(i,UNDEF);
5154 }
5155
5156 for (i=0;i<VFREGS;i++) {
5157 live.fate[i].status=UNDEF;
5158 live.fate[i].realreg=-1;
5159 live.fate[i].needflush=NF_SCRATCH;
5160 }
5161
5162 for (i=0;i<VREGS;i++) {
5163 if (i<16) { /* First 16 registers map to 68k registers */
5164 live.state[i].mem=((uae_u32*)&regs)+i;
5165 live.state[i].needflush=NF_TOMEM;
5166 set_status(i,INMEM);
5167 }
5168 else
5169 live.state[i].mem=scratch.regs+i;
5170 }
5171 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5172 live.state[PC_P].needflush=NF_TOMEM;
5173 set_const(PC_P,(uintptr)comp_pc_p);
5174
5175 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5176 live.state[FLAGX].needflush=NF_TOMEM;
5177 set_status(FLAGX,INMEM);
5178
5179 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5180 live.state[FLAGTMP].needflush=NF_TOMEM;
5181 set_status(FLAGTMP,INMEM);
5182
5183 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5184 set_status(NEXT_HANDLER,UNDEF);
5185
5186 for (i=0;i<VFREGS;i++) {
5187 if (i<8) { /* First 8 registers map to 68k FPU registers */
5188 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5189 live.fate[i].needflush=NF_TOMEM;
5190 live.fate[i].status=INMEM;
5191 }
5192 else if (i==FP_RESULT) {
5193 live.fate[i].mem=(uae_u32*)(&fpu.result);
5194 live.fate[i].needflush=NF_TOMEM;
5195 live.fate[i].status=INMEM;
5196 }
5197 else
5198 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5199 }
5200
5201
5202 for (i=0;i<N_REGS;i++) {
5203 live.nat[i].touched=0;
5204 live.nat[i].nholds=0;
5205 live.nat[i].locked=0;
5206 if (*cb==i) {
5207 live.nat[i].canbyte=1; cb++;
5208 } else live.nat[i].canbyte=0;
5209 if (*cw==i) {
5210 live.nat[i].canword=1; cw++;
5211 } else live.nat[i].canword=0;
5212 if (*au==i) {
5213 live.nat[i].locked=1; au++;
5214 }
5215 }
5216
5217 for (i=0;i<N_FREGS;i++) {
5218 live.fat[i].touched=0;
5219 live.fat[i].nholds=0;
5220 live.fat[i].locked=0;
5221 }
5222
5223 touchcnt=1;
5224 m68k_pc_offset=0;
5225 live.flags_in_flags=TRASH;
5226 live.flags_on_stack=VALID;
5227 live.flags_are_important=1;
5228
5229 raw_fp_init();
5230 }
5231
5232 /* Only do this if you really mean it! The next call should be to init!*/
5233 void flush(int save_regs)
5234 {
5235 int fi,i;
5236
5237 log_flush();
5238 flush_flags(); /* low level */
5239 sync_m68k_pc(); /* mid level */
5240
5241 if (save_regs) {
5242 for (i=0;i<VFREGS;i++) {
5243 if (live.fate[i].needflush==NF_SCRATCH ||
5244 live.fate[i].status==CLEAN) {
5245 f_disassociate(i);
5246 }
5247 }
5248 for (i=0;i<VREGS;i++) {
5249 if (live.state[i].needflush==NF_TOMEM) {
5250 switch(live.state[i].status) {
5251 case INMEM:
5252 if (live.state[i].val) {
5253 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5254 log_vwrite(i);
5255 live.state[i].val=0;
5256 }
5257 break;
5258 case CLEAN:
5259 case DIRTY:
5260 remove_offset(i,-1); tomem(i); break;
5261 case ISCONST:
5262 if (i!=PC_P)
5263 writeback_const(i);
5264 break;
5265 default: break;
5266 }
5267 Dif (live.state[i].val && i!=PC_P) {
5268 write_log("Register %d still has val %x\n",
5269 i,live.state[i].val);
5270 }
5271 }
5272 }
5273 for (i=0;i<VFREGS;i++) {
5274 if (live.fate[i].needflush==NF_TOMEM &&
5275 live.fate[i].status==DIRTY) {
5276 f_evict(i);
5277 }
5278 }
5279 raw_fp_cleanup_drop();
5280 }
5281 if (needflags) {
5282 write_log("Warning! flush with needflags=1!\n");
5283 }
5284 }
5285
5286 static void flush_keepflags(void)
5287 {
5288 int fi,i;
5289
5290 for (i=0;i<VFREGS;i++) {
5291 if (live.fate[i].needflush==NF_SCRATCH ||
5292 live.fate[i].status==CLEAN) {
5293 f_disassociate(i);
5294 }
5295 }
5296 for (i=0;i<VREGS;i++) {
5297 if (live.state[i].needflush==NF_TOMEM) {
5298 switch(live.state[i].status) {
5299 case INMEM:
5300 /* Can't adjust the offset here --- that needs "add" */
5301 break;
5302 case CLEAN:
5303 case DIRTY:
5304 remove_offset(i,-1); tomem(i); break;
5305 case ISCONST:
5306 if (i!=PC_P)
5307 writeback_const(i);
5308 break;
5309 default: break;
5310 }
5311 }
5312 }
5313 for (i=0;i<VFREGS;i++) {
5314 if (live.fate[i].needflush==NF_TOMEM &&
5315 live.fate[i].status==DIRTY) {
5316 f_evict(i);
5317 }
5318 }
5319 raw_fp_cleanup_drop();
5320 }
5321
5322 void freescratch(void)
5323 {
5324 int i;
5325 for (i=0;i<N_REGS;i++)
5326 if (live.nat[i].locked && i!=4)
5327 write_log("Warning! %d is locked\n",i);
5328
5329 for (i=0;i<VREGS;i++)
5330 if (live.state[i].needflush==NF_SCRATCH) {
5331 forget_about(i);
5332 }
5333
5334 for (i=0;i<VFREGS;i++)
5335 if (live.fate[i].needflush==NF_SCRATCH) {
5336 f_forget_about(i);
5337 }
5338 }
5339
5340 /********************************************************************
5341 * Support functions, internal *
5342 ********************************************************************/
5343
5344
5345 static void align_target(uae_u32 a)
5346 {
5347 if (!a)
5348 return;
5349
5350 if (tune_nop_fillers)
5351 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5352 else {
5353 /* Fill with NOPs --- makes debugging with gdb easier */
5354 while ((uintptr)target&(a-1))
5355 *target++=0x90;
5356 }
5357 }
5358
5359 static __inline__ int isinrom(uintptr addr)
5360 {
5361 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5362 }
5363
5364 static void flush_all(void)
5365 {
5366 int i;
5367
5368 log_flush();
5369 for (i=0;i<VREGS;i++)
5370 if (live.state[i].status==DIRTY) {
5371 if (!call_saved[live.state[i].realreg]) {
5372 tomem(i);
5373 }
5374 }
5375 for (i=0;i<VFREGS;i++)
5376 if (f_isinreg(i))
5377 f_evict(i);
5378 raw_fp_cleanup_drop();
5379 }
5380
5381 /* Make sure all registers that will get clobbered by a call are
5382 save and sound in memory */
5383 static void prepare_for_call_1(void)
5384 {
5385 flush_all(); /* If there are registers that don't get clobbered,
5386 * we should be a bit more selective here */
5387 }
5388
5389 /* We will call a C routine in a moment. That will clobber all registers,
5390 so we need to disassociate everything */
5391 static void prepare_for_call_2(void)
5392 {
5393 int i;
5394 for (i=0;i<N_REGS;i++)
5395 if (!call_saved[i] && live.nat[i].nholds>0)
5396 free_nreg(i);
5397
5398 for (i=0;i<N_FREGS;i++)
5399 if (live.fat[i].nholds>0)
5400 f_free_nreg(i);
5401
5402 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5403 flags at the very start of the call_r
5404 functions! */
5405 }
5406
5407 /********************************************************************
5408 * Memory access and related functions, CREATE time *
5409 ********************************************************************/
5410
5411 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5412 {
5413 next_pc_p=not_taken;
5414 taken_pc_p=taken;
5415 branch_cc=cond;
5416 }
5417
5418
5419 static uae_u32 get_handler_address(uae_u32 addr)
5420 {
5421 uae_u32 cl=cacheline(addr);
5422 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5423 return (uintptr)&(bi->direct_handler_to_use);
5424 }
5425
5426 static uae_u32 get_handler(uae_u32 addr)
5427 {
5428 uae_u32 cl=cacheline(addr);
5429 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5430 return (uintptr)bi->direct_handler_to_use;
5431 }
5432
5433 static void load_handler(int reg, uae_u32 addr)
5434 {
5435 mov_l_rm(reg,get_handler_address(addr));
5436 }
5437
5438 /* This version assumes that it is writing *real* memory, and *will* fail
5439 * if that assumption is wrong! No branches, no second chances, just
5440 * straight go-for-it attitude */
5441
5442 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5443 {
5444 int f=tmp;
5445
5446 if (clobber)
5447 f=source;
5448
5449 #if SIZEOF_VOID_P == 8
5450 if (!ThirtyThreeBitAddressing)
5451 sign_extend_32_rr(address, address);
5452 #endif
5453
5454 switch(size) {
5455 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5456 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5457 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5458 }
5459 forget_about(tmp);
5460 forget_about(f);
5461 }
5462
5463 void writebyte(int address, int source, int tmp)
5464 {
5465 writemem_real(address,source,1,tmp,0);
5466 }
5467
5468 static __inline__ void writeword_general(int address, int source, int tmp,
5469 int clobber)
5470 {
5471 writemem_real(address,source,2,tmp,clobber);
5472 }
5473
5474 void writeword_clobber(int address, int source, int tmp)
5475 {
5476 writeword_general(address,source,tmp,1);
5477 }
5478
5479 void writeword(int address, int source, int tmp)
5480 {
5481 writeword_general(address,source,tmp,0);
5482 }
5483
5484 static __inline__ void writelong_general(int address, int source, int tmp,
5485 int clobber)
5486 {
5487 writemem_real(address,source,4,tmp,clobber);
5488 }
5489
5490 void writelong_clobber(int address, int source, int tmp)
5491 {
5492 writelong_general(address,source,tmp,1);
5493 }
5494
5495 void writelong(int address, int source, int tmp)
5496 {
5497 writelong_general(address,source,tmp,0);
5498 }
5499
5500
5501
5502 /* This version assumes that it is reading *real* memory, and *will* fail
5503 * if that assumption is wrong! No branches, no second chances, just
5504 * straight go-for-it attitude */
5505
5506 static void readmem_real(int address, int dest, int size, int tmp)
5507 {
5508 int f=tmp;
5509
5510 if (size==4 && address!=dest)
5511 f=dest;
5512
5513 #if SIZEOF_VOID_P == 8
5514 if (!ThirtyThreeBitAddressing)
5515 sign_extend_32_rr(address, address);
5516 #endif
5517
5518 switch(size) {
5519 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5520 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5521 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5522 }
5523 forget_about(tmp);
5524 }
5525
5526 void readbyte(int address, int dest, int tmp)
5527 {
5528 readmem_real(address,dest,1,tmp);
5529 }
5530
5531 void readword(int address, int dest, int tmp)
5532 {
5533 readmem_real(address,dest,2,tmp);
5534 }
5535
5536 void readlong(int address, int dest, int tmp)
5537 {
5538 readmem_real(address,dest,4,tmp);
5539 }
5540
5541 void get_n_addr(int address, int dest, int tmp)
5542 {
5543 // a is the register containing the virtual address
5544 // after the offset had been fetched
5545 int a=tmp;
5546
5547 // f is the register that will contain the offset
5548 int f=tmp;
5549
5550 // a == f == tmp if (address == dest)
5551 if (address!=dest) {
5552 a=address;
5553 f=dest;
5554 }
5555
5556 #if REAL_ADDRESSING
5557 mov_l_rr(dest, address);
5558 #elif DIRECT_ADDRESSING
5559 lea_l_brr(dest,address,MEMBaseDiff);
5560 #endif
5561 forget_about(tmp);
5562 }
5563
5564 void get_n_addr_jmp(int address, int dest, int tmp)
5565 {
5566 /* For this, we need to get the same address as the rest of UAE
5567 would --- otherwise we end up translating everything twice */
5568 get_n_addr(address,dest,tmp);
5569 }
5570
5571
5572 /* base is a register, but dp is an actual value.
5573 target is a register, as is tmp */
5574 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5575 {
5576 int reg = (dp >> 12) & 15;
5577 int regd_shift=(dp >> 9) & 3;
5578
5579 if (dp & 0x100) {
5580 int ignorebase=(dp&0x80);
5581 int ignorereg=(dp&0x40);
5582 int addbase=0;
5583 int outer=0;
5584
5585 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5586 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5587
5588 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5589 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5590
5591 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5592 if (!ignorereg) {
5593 if ((dp & 0x800) == 0)
5594 sign_extend_16_rr(target,reg);
5595 else
5596 mov_l_rr(target,reg);
5597 shll_l_ri(target,regd_shift);
5598 }
5599 else
5600 mov_l_ri(target,0);
5601
5602 /* target is now regd */
5603 if (!ignorebase)
5604 add_l(target,base);
5605 add_l_ri(target,addbase);
5606 if (dp&0x03) readlong(target,target,tmp);
5607 } else { /* do the getlong first, then add regd */
5608 if (!ignorebase) {
5609 mov_l_rr(target,base);
5610 add_l_ri(target,addbase);
5611 }
5612 else
5613 mov_l_ri(target,addbase);
5614 if (dp&0x03) readlong(target,target,tmp);
5615
5616 if (!ignorereg) {
5617 if ((dp & 0x800) == 0)
5618 sign_extend_16_rr(tmp,reg);
5619 else
5620 mov_l_rr(tmp,reg);
5621 shll_l_ri(tmp,regd_shift);
5622 /* tmp is now regd */
5623 add_l(target,tmp);
5624 }
5625 }
5626 add_l_ri(target,outer);
5627 }
5628 else { /* 68000 version */
5629 if ((dp & 0x800) == 0) { /* Sign extend */
5630 sign_extend_16_rr(target,reg);
5631 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5632 }
5633 else {
5634 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5635 }
5636 }
5637 forget_about(tmp);
5638 }
5639
5640
5641
5642
5643
5644 void set_cache_state(int enabled)
5645 {
5646 if (enabled!=letit)
5647 flush_icache_hard(77);
5648 letit=enabled;
5649 }
5650
5651 int get_cache_state(void)
5652 {
5653 return letit;
5654 }
5655
5656 uae_u32 get_jitted_size(void)
5657 {
5658 if (compiled_code)
5659 return current_compile_p-compiled_code;
5660 return 0;
5661 }
5662
5663 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5664 const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5665
5666 static uint8 *do_alloc_code(uint32 size, int depth)
5667 {
5668 #if defined(__linux__) && 0
5669 /*
5670 This is a really awful hack that is known to work on Linux at
5671 least.
5672
5673 The trick here is to make sure the allocated cache is nearby
5674 code segment, and more precisely in the positive half of a
5675 32-bit address space. i.e. addr < 0x80000000. Actually, it
5676 turned out that a 32-bit binary run on AMD64 yields a cache
5677 allocated around 0xa0000000, thus causing some troubles when
5678 translating addresses from m68k to x86.
5679 */
5680 static uint8 * code_base = NULL;
5681 if (code_base == NULL) {
5682 uintptr page_size = getpagesize();
5683 uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5684 if (boundaries < page_size)
5685 boundaries = page_size;
5686 code_base = (uint8 *)sbrk(0);
5687 for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5688 if (vm_acquire_fixed(code_base, size) == 0) {
5689 uint8 *code = code_base;
5690 code_base += size;
5691 return code;
5692 }
5693 code_base += boundaries;
5694 }
5695 return NULL;
5696 }
5697
5698 if (vm_acquire_fixed(code_base, size) == 0) {
5699 uint8 *code = code_base;
5700 code_base += size;
5701 return code;
5702 }
5703
5704 if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5705 return NULL;
5706
5707 return do_alloc_code(size, depth + 1);
5708 #else
5709 uint8 *code = (uint8 *)vm_acquire(size);
5710 return code == VM_MAP_FAILED ? NULL : code;
5711 #endif
5712 }
5713
5714 static inline uint8 *alloc_code(uint32 size)
5715 {
5716 uint8 *ptr = do_alloc_code(size, 0);
5717 /* allocated code must fit in 32-bit boundaries */
5718 assert((uintptr)ptr <= 0xffffffff);
5719 return ptr;
5720 }
5721
5722 void alloc_cache(void)
5723 {
5724 if (compiled_code) {
5725 flush_icache_hard(6);
5726 vm_release(compiled_code, cache_size * 1024);
5727 compiled_code = 0;
5728 }
5729
5730 if (cache_size == 0)
5731 return;
5732
5733 while (!compiled_code && cache_size) {
5734 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5735 compiled_code = 0;
5736 cache_size /= 2;
5737 }
5738 }
5739 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5740
5741 if (compiled_code) {
5742 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5743 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5744 current_compile_p = compiled_code;
5745 current_cache_size = 0;
5746 }
5747 }
5748
5749
5750
5751 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5752
5753 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5754 {
5755 uae_u32 k1 = 0;
5756 uae_u32 k2 = 0;
5757
5758 #if USE_CHECKSUM_INFO
5759 checksum_info *csi = bi->csi;
5760 Dif(!csi) abort();
5761 while (csi) {
5762 uae_s32 len = csi->length;
5763 uintptr tmp = (uintptr)csi->start_p;
5764 #else
5765 uae_s32 len = bi->len;
5766 uintptr tmp = (uintptr)bi->min_pcp;
5767 #endif
5768 uae_u32*pos;
5769
5770 len += (tmp & 3);
5771 tmp &= ~((uintptr)3);
5772 pos = (uae_u32 *)tmp;
5773
5774 if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5775 while (len > 0) {
5776 k1 += *pos;
5777 k2 ^= *pos;
5778 pos++;
5779 len -= 4;
5780 }
5781 }
5782
5783 #if USE_CHECKSUM_INFO
5784 csi = csi->next;
5785 }
5786 #endif
5787
5788 *c1 = k1;
5789 *c2 = k2;
5790 }
5791
5792 #if 0
5793 static void show_checksum(CSI_TYPE* csi)
5794 {
5795 uae_u32 k1=0;
5796 uae_u32 k2=0;
5797 uae_s32 len=CSI_LENGTH(csi);
5798 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5799 uae_u32* pos;
5800
5801 len+=(tmp&3);
5802 tmp&=(~3);
5803 pos=(uae_u32*)tmp;
5804
5805 if (len<0 || len>MAX_CHECKSUM_LEN) {
5806 return;
5807 }
5808 else {
5809 while (len>0) {
5810 write_log("%08x ",*pos);
5811 pos++;
5812 len-=4;
5813 }
5814 write_log(" bla\n");
5815 }
5816 }
5817 #endif
5818
5819
5820 int check_for_cache_miss(void)
5821 {
5822 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5823
5824 if (bi) {
5825 int cl=cacheline(regs.pc_p);
5826 if (bi!=cache_tags[cl+1].bi) {
5827 raise_in_cl_list(bi);
5828 return 1;
5829 }
5830 }
5831 return 0;
5832 }
5833
5834
5835 static void recompile_block(void)
5836 {
5837 /* An existing block's countdown code has expired. We need to make
5838 sure that execute_normal doesn't refuse to recompile due to a
5839 perceived cache miss... */
5840 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5841
5842 Dif (!bi)
5843 abort();
5844 raise_in_cl_list(bi);
5845 execute_normal();
5846 return;
5847 }
5848 static void cache_miss(void)
5849 {
5850 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5851 uae_u32 cl=cacheline(regs.pc_p);
5852 blockinfo* bi2=get_blockinfo(cl);
5853
5854 if (!bi) {
5855 execute_normal(); /* Compile this block now */
5856 return;
5857 }
5858 Dif (!bi2 || bi==bi2) {
5859 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5860 abort();
5861 }
5862 raise_in_cl_list(bi);
5863 return;
5864 }
5865
5866 static int called_check_checksum(blockinfo* bi);
5867
5868 static inline int block_check_checksum(blockinfo* bi)
5869 {
5870 uae_u32 c1,c2;
5871 bool isgood;
5872
5873 if (bi->status!=BI_NEED_CHECK)
5874 return 1; /* This block is in a checked state */
5875
5876 checksum_count++;
5877
5878 if (bi->c1 || bi->c2)
5879 calc_checksum(bi,&c1,&c2);
5880 else {
5881 c1=c2=1; /* Make sure it doesn't match */
5882 }
5883
5884 isgood=(c1==bi->c1 && c2==bi->c2);
5885
5886 if (isgood) {
5887 /* This block is still OK. So we reactivate. Of course, that
5888 means we have to move it into the needs-to-be-flushed list */
5889 bi->handler_to_use=bi->handler;
5890 set_dhtu(bi,bi->direct_handler);
5891 bi->status=BI_CHECKING;
5892 isgood=called_check_checksum(bi);
5893 }
5894 if (isgood) {
5895 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5896 c1,c2,bi->c1,bi->c2);*/
5897 remove_from_list(bi);
5898 add_to_active(bi);
5899 raise_in_cl_list(bi);
5900 bi->status=BI_ACTIVE;
5901 }
5902 else {
5903 /* This block actually changed. We need to invalidate it,
5904 and set it up to be recompiled */
5905 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5906 c1,c2,bi->c1,bi->c2); */
5907 invalidate_block(bi);
5908 raise_in_cl_list(bi);
5909 }
5910 return isgood;
5911 }
5912
5913 static int called_check_checksum(blockinfo* bi)
5914 {
5915 dependency* x=bi->deplist;
5916 int isgood=1;
5917 int i;
5918
5919 for (i=0;i<2 && isgood;i++) {
5920 if (bi->dep[i].jmp_off) {
5921 isgood=block_check_checksum(bi->dep[i].target);
5922 }
5923 }
5924 return isgood;
5925 }
5926
5927 static void check_checksum(void)
5928 {
5929 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5930 uae_u32 cl=cacheline(regs.pc_p);
5931 blockinfo* bi2=get_blockinfo(cl);
5932
5933 /* These are not the droids you are looking for... */
5934 if (!bi) {
5935 /* Whoever is the primary target is in a dormant state, but
5936 calling it was accidental, and we should just compile this
5937 new block */
5938 execute_normal();
5939 return;
5940 }
5941 if (bi!=bi2) {
5942 /* The block was hit accidentally, but it does exist. Cache miss */
5943 cache_miss();
5944 return;
5945 }
5946
5947 if (!block_check_checksum(bi))
5948 execute_normal();
5949 }
5950
5951 static __inline__ void match_states(blockinfo* bi)
5952 {
5953 int i;
5954 smallstate* s=&(bi->env);
5955
5956 if (bi->status==BI_NEED_CHECK) {
5957 block_check_checksum(bi);
5958 }
5959 if (bi->status==BI_ACTIVE ||
5960 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5961 block makes (about not using
5962 certain vregs) */
5963 for (i=0;i<16;i++) {
5964 if (s->virt[i]==L_UNNEEDED) {
5965 // write_log("unneeded reg %d at %p\n",i,target);
5966 COMPCALL(forget_about)(i); // FIXME
5967 }
5968 }
5969 }
5970 flush(1);
5971
5972 /* And now deal with the *demands* the block makes */
5973 for (i=0;i<N_REGS;i++) {
5974 int v=s->nat[i];
5975 if (v>=0) {
5976 // printf("Loading reg %d into %d at %p\n",v,i,target);
5977 readreg_specific(v,4,i);
5978 // do_load_reg(i,v);
5979 // setlock(i);
5980 }
5981 }
5982 for (i=0;i<N_REGS;i++) {
5983 int v=s->nat[i];
5984 if (v>=0) {
5985 unlock2(i);
5986 }
5987 }
5988 }
5989
5990 static __inline__ void create_popalls(void)
5991 {
5992 int i,r;
5993
5994 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5995 write_log("FATAL: Could not allocate popallspace!\n");
5996 abort();
5997 }
5998 vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5999
6000 int stack_space = STACK_OFFSET;
6001 for (i=0;i<N_REGS;i++) {
6002 if (need_to_preserve[i])
6003 stack_space += sizeof(void *);
6004 }
6005 stack_space %= STACK_ALIGN;
6006 if (stack_space)
6007 stack_space = STACK_ALIGN - stack_space;
6008
6009 current_compile_p=popallspace;
6010 set_target(current_compile_p);
6011
6012 /* We need to guarantee 16-byte stack alignment on x86 at any point
6013 within the JIT generated code. We have multiple exit points
6014 possible but a single entry. A "jmp" is used so that we don't
6015 have to generate stack alignment in generated code that has to
6016 call external functions (e.g. a generic instruction handler).
6017
6018 In summary, JIT generated code is not leaf so we have to deal
6019 with it here to maintain correct stack alignment. */
6020 align_target(align_jumps);
6021 current_compile_p=get_target();
6022 pushall_call_handler=get_target();
6023 for (i=N_REGS;i--;) {
6024 if (need_to_preserve[i])
6025 raw_push_l_r(i);
6026 }
6027 raw_dec_sp(stack_space);
6028 r=REG_PC_TMP;
6029 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6030 raw_and_l_ri(r,TAGMASK);
6031 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6032
6033 /* now the exit points */
6034 align_target(align_jumps);
6035 popall_do_nothing=get_target();
6036 raw_inc_sp(stack_space);
6037 for (i=0;i<N_REGS;i++) {
6038 if (need_to_preserve[i])
6039 raw_pop_l_r(i);
6040 }
6041 raw_jmp((uintptr)do_nothing);
6042
6043 align_target(align_jumps);
6044 popall_execute_normal=get_target();
6045 raw_inc_sp(stack_space);
6046 for (i=0;i<N_REGS;i++) {
6047 if (need_to_preserve[i])
6048 raw_pop_l_r(i);
6049 }
6050 raw_jmp((uintptr)execute_normal);
6051
6052 align_target(align_jumps);
6053 popall_cache_miss=get_target();
6054 raw_inc_sp(stack_space);
6055 for (i=0;i<N_REGS;i++) {
6056 if (need_to_preserve[i])
6057 raw_pop_l_r(i);
6058 }
6059 raw_jmp((uintptr)cache_miss);
6060
6061 align_target(align_jumps);
6062 popall_recompile_block=get_target();
6063 raw_inc_sp(stack_space);
6064 for (i=0;i<N_REGS;i++) {
6065 if (need_to_preserve[i])
6066 raw_pop_l_r(i);
6067 }
6068 raw_jmp((uintptr)recompile_block);
6069
6070 align_target(align_jumps);
6071 popall_exec_nostats=get_target();
6072 raw_inc_sp(stack_space);
6073 for (i=0;i<N_REGS;i++) {
6074 if (need_to_preserve[i])
6075 raw_pop_l_r(i);
6076 }
6077 raw_jmp((uintptr)exec_nostats);
6078
6079 align_target(align_jumps);
6080 popall_check_checksum=get_target();
6081 raw_inc_sp(stack_space);
6082 for (i=0;i<N_REGS;i++) {
6083 if (need_to_preserve[i])
6084 raw_pop_l_r(i);
6085 }
6086 raw_jmp((uintptr)check_checksum);
6087
6088 // no need to further write into popallspace
6089 vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6090 }
6091
6092 static __inline__ void reset_lists(void)
6093 {
6094 int i;
6095
6096 for (i=0;i<MAX_HOLD_BI;i++)
6097 hold_bi[i]=NULL;
6098 active=NULL;
6099 dormant=NULL;
6100 }
6101
6102 static void prepare_block(blockinfo* bi)
6103 {
6104 int i;
6105
6106 set_target(current_compile_p);
6107 align_target(align_jumps);
6108 bi->direct_pen=(cpuop_func *)get_target();
6109 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6110 raw_mov_l_mr((uintptr)&regs.pc_p,0);
6111 raw_jmp((uintptr)popall_execute_normal);
6112
6113 align_target(align_jumps);
6114 bi->direct_pcc=(cpuop_func *)get_target();
6115 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6116 raw_mov_l_mr((uintptr)&regs.pc_p,0);
6117 raw_jmp((uintptr)popall_check_checksum);
6118 current_compile_p=get_target();
6119
6120 bi->deplist=NULL;
6121 for (i=0;i<2;i++) {
6122 bi->dep[i].prev_p=NULL;
6123 bi->dep[i].next=NULL;
6124 }
6125 bi->env=default_ss;
6126 bi->status=BI_INVALID;
6127 bi->havestate=0;
6128 //bi->env=empty_ss;
6129 }
6130
6131 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6132 static inline void reset_compop(int opcode)
6133 {
6134 compfunctbl[opcode] = NULL;
6135 nfcompfunctbl[opcode] = NULL;
6136 }
6137
6138 static int read_opcode(const char *p)
6139 {
6140 int opcode = 0;
6141 for (int i = 0; i < 4; i++) {
6142 int op = p[i];
6143 switch (op) {
6144 case '0': case '1': case '2': case '3': case '4':
6145 case '5': case '6': case '7': case '8': case '9':
6146 opcode = (opcode << 4) | (op - '0');
6147 break;
6148 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6149 opcode = (opcode << 4) | ((op - 'a') + 10);
6150 break;
6151 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6152 opcode = (opcode << 4) | ((op - 'A') + 10);
6153 break;
6154 default:
6155 return -1;
6156 }
6157 }
6158 return opcode;
6159 }
6160
6161 static bool merge_blacklist()
6162 {
6163 const char *blacklist = PrefsFindString("jitblacklist");
6164 if (blacklist) {
6165 const char *p = blacklist;
6166 for (;;) {
6167 if (*p == 0)
6168 return true;
6169
6170 int opcode1 = read_opcode(p);
6171 if (opcode1 < 0)
6172 return false;
6173 p += 4;
6174
6175 int opcode2 = opcode1;
6176 if (*p == '-') {
6177 p++;
6178 opcode2 = read_opcode(p);
6179 if (opcode2 < 0)
6180 return false;
6181 p += 4;
6182 }
6183
6184 if (*p == 0 || *p == ';') {
6185 write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6186 for (int opcode = opcode1; opcode <= opcode2; opcode++)
6187 reset_compop(cft_map(opcode));
6188
6189 if (*p++ == ';')
6190 continue;
6191
6192 return true;
6193 }
6194
6195 return false;
6196 }
6197 }
6198 return true;
6199 }
6200
6201 void build_comp(void)
6202 {
6203 int i;
6204 int jumpcount=0;
6205 unsigned long opcode;
6206 struct comptbl* tbl=op_smalltbl_0_comp_ff;
6207 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6208 int count;
6209 int cpu_level = 0; // 68000 (default)
6210 if (CPUType == 4)
6211 cpu_level = 4; // 68040 with FPU
6212 else {
6213 if (FPUType)
6214 cpu_level = 3; // 68020 with FPU
6215 else if (CPUType >= 2)
6216 cpu_level = 2; // 68020
6217 else if (CPUType == 1)
6218 cpu_level = 1;
6219 }
6220 struct cputbl *nfctbl = (
6221 cpu_level == 4 ? op_smalltbl_0_nf
6222 : cpu_level == 3 ? op_smalltbl_1_nf
6223 : cpu_level == 2 ? op_smalltbl_2_nf
6224 : cpu_level == 1 ? op_smalltbl_3_nf
6225 : op_smalltbl_4_nf);
6226
6227 write_log ("<JIT compiler> : building compiler function tables\n");
6228
6229 for (opcode = 0; opcode < 65536; opcode++) {
6230 reset_compop(opcode);
6231 nfcpufunctbl[opcode] = op_illg_1;
6232 prop[opcode].use_flags = 0x1f;
6233 prop[opcode].set_flags = 0x1f;
6234 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6235 }
6236
6237 for (i = 0; tbl[i].opcode < 65536; i++) {
6238 int cflow = table68k[tbl[i].opcode].cflow;
6239 if (follow_const_jumps && (tbl[i].specific & 16))
6240 cflow = fl_const_jump;
6241 else
6242 cflow &= ~fl_const_jump;
6243 prop[cft_map(tbl[i].opcode)].cflow = cflow;
6244
6245 int uses_fpu = tbl[i].specific & 32;
6246 if (uses_fpu && avoid_fpu)
6247 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6248 else
6249 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6250 }
6251
6252 for (i = 0; nftbl[i].opcode < 65536; i++) {
6253 int uses_fpu = tbl[i].specific & 32;
6254 if (uses_fpu && avoid_fpu)
6255 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6256 else
6257 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6258
6259 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6260 }
6261
6262 for (i = 0; nfctbl[i].handler; i++) {
6263 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6264 }
6265
6266 for (opcode = 0; opcode < 65536; opcode++) {
6267 compop_func *f;
6268 compop_func *nff;
6269 cpuop_func *nfcf;
6270 int isaddx,cflow;
6271
6272 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6273 continue;
6274
6275 if (table68k[opcode].handler != -1) {
6276 f = compfunctbl[cft_map(table68k[opcode].handler)];
6277 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6278 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6279 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6280 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6281 prop[cft_map(opcode)].cflow = cflow;
6282 prop[cft_map(opcode)].is_addx = isaddx;
6283 compfunctbl[cft_map(opcode)] = f;
6284 nfcompfunctbl[cft_map(opcode)] = nff;
6285 Dif (nfcf == op_illg_1)
6286 abort();
6287 nfcpufunctbl[cft_map(opcode)] = nfcf;
6288 }
6289 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6290 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6291 /* Unconditional jumps don't evaluate condition codes, so they
6292 * don't actually use any flags themselves */
6293 if (prop[cft_map(opcode)].cflow & fl_const_jump)
6294 prop[cft_map(opcode)].use_flags = 0;
6295 }
6296 for (i = 0; nfctbl[i].handler != NULL; i++) {
6297 if (nfctbl[i].specific)
6298 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6299 }
6300
6301 /* Merge in blacklist */
6302 if (!merge_blacklist())
6303 write_log("<JIT compiler> : blacklist merge failure!\n");
6304
6305 count=0;
6306 for (opcode = 0; opcode < 65536; opcode++) {
6307 if (compfunctbl[cft_map(opcode)])
6308 count++;
6309 }
6310 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6311
6312 /* Initialise state */
6313 create_popalls();
6314 alloc_cache();
6315 reset_lists();
6316
6317 for (i=0;i<TAGSIZE;i+=2) {
6318 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6319 cache_tags[i+1].bi=NULL;
6320 }
6321
6322 #if 0
6323 for (i=0;i<N_REGS;i++) {
6324 empty_ss.nat[i].holds=-1;
6325 empty_ss.nat[i].validsize=0;
6326 empty_ss.nat[i].dirtysize=0;
6327 }
6328 #endif
6329 for (i=0;i<VREGS;i++) {
6330 empty_ss.virt[i]=L_NEEDED;
6331 }
6332 for (i=0;i<N_REGS;i++) {
6333 empty_ss.nat[i]=L_UNKNOWN;
6334 }
6335 default_ss=empty_ss;
6336 }
6337
6338
6339 static void flush_icache_none(int n)
6340 {
6341 /* Nothing to do. */
6342 }
6343
6344 static void flush_icache_hard(int n)
6345 {
6346 uae_u32 i;
6347 blockinfo* bi, *dbi;
6348
6349 hard_flush_count++;
6350 #if 0
6351 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6352 n,regs.pc,regs.pc_p,current_cache_size/1024);
6353 current_cache_size = 0;
6354 #endif
6355 bi=active;
6356 while(bi) {
6357 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6358 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6359 dbi=bi; bi=bi->next;
6360 free_blockinfo(dbi);
6361 }
6362 bi=dormant;
6363 while(bi) {
6364 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6365 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6366 dbi=bi; bi=bi->next;
6367 free_blockinfo(dbi);
6368 }
6369
6370 reset_lists();
6371 if (!compiled_code)
6372 return;
6373 current_compile_p=compiled_code;
6374 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6375 }
6376
6377
6378 /* "Soft flushing" --- instead of actually throwing everything away,
6379 we simply mark everything as "needs to be checked".
6380 */
6381
6382 static inline void flush_icache_lazy(int n)
6383 {
6384 uae_u32 i;
6385 blockinfo* bi;
6386 blockinfo* bi2;
6387
6388 soft_flush_count++;
6389 if (!active)
6390 return;
6391
6392 bi=active;
6393 while (bi) {
6394 uae_u32 cl=cacheline(bi->pc_p);
6395 if (bi->status==BI_INVALID ||
6396 bi->status==BI_NEED_RECOMP) {
6397 if (bi==cache_tags[cl+1].bi)
6398 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6399 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6400 set_dhtu(bi,bi->direct_pen);
6401 bi->status=BI_INVALID;
6402 }
6403 else {
6404 if (bi==cache_tags[cl+1].bi)
6405 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6406 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6407 set_dhtu(bi,bi->direct_pcc);
6408 bi->status=BI_NEED_CHECK;
6409 }
6410 bi2=bi;
6411 bi=bi->next;
6412 }
6413 /* bi2 is now the last entry in the active list */
6414 bi2->next=dormant;
6415 if (dormant)
6416 dormant->prev_p=&(bi2->next);
6417
6418 dormant=active;
6419 active->prev_p=&dormant;
6420 active=NULL;
6421 }
6422
6423 void flush_icache_range(uae_u8 *start_p, uae_u32 length)
6424 {
6425 if (!active)
6426 return;
6427
6428 #if LAZY_FLUSH_ICACHE_RANGE
6429 blockinfo *bi = active;
6430 while (bi) {
6431 #if USE_CHECKSUM_INFO
6432 bool candidate = false;
6433 for (checksum_info *csi = bi->csi; csi; csi = csi->next) {
6434 if (((start_p - csi->start_p) < csi->length) ||
6435 ((csi->start_p - start_p) < length)) {
6436 candidate = true;
6437 break;
6438 }
6439 }
6440 #else
6441 // Assume system is consistent and would invalidate the right range
6442 const bool candidate = (bi->pc_p - start_p) < length;
6443 #endif
6444 blockinfo *dbi = bi;
6445 bi = bi->next;
6446 if (candidate) {
6447 uae_u32 cl = cacheline(dbi->pc_p);
6448 if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) {
6449 if (dbi == cache_tags[cl+1].bi)
6450 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6451 dbi->handler_to_use = (cpuop_func *)popall_execute_normal;
6452 set_dhtu(dbi, dbi->direct_pen);
6453 dbi->status = BI_INVALID;
6454 }
6455 else {
6456 if (dbi == cache_tags[cl+1].bi)
6457 cache_tags[cl].handler = (cpuop_func *)popall_check_checksum;
6458 dbi->handler_to_use = (cpuop_func *)popall_check_checksum;
6459 set_dhtu(dbi, dbi->direct_pcc);
6460 dbi->status = BI_NEED_CHECK;
6461 }
6462 remove_from_list(dbi);
6463 add_to_dormant(dbi);
6464 }
6465 }
6466 return;
6467 #endif
6468 flush_icache(-1);
6469 }
6470
6471 static void catastrophe(void)
6472 {
6473 abort();
6474 }
6475
6476 int failure;
6477
6478 #define TARGET_M68K 0
6479 #define TARGET_POWERPC 1
6480 #define TARGET_X86 2
6481 #define TARGET_X86_64 3
6482 #if defined(i386) || defined(__i386__)
6483 #define TARGET_NATIVE TARGET_X86
6484 #endif
6485 #if defined(powerpc) || defined(__powerpc__)
6486 #define TARGET_NATIVE TARGET_POWERPC
6487 #endif
6488 #if defined(x86_64) || defined(__x86_64__)
6489 #define TARGET_NATIVE TARGET_X86_64
6490 #endif
6491
6492 #ifdef ENABLE_MON
6493 static uae_u32 mon_read_byte_jit(uintptr addr)
6494 {
6495 uae_u8 *m = (uae_u8 *)addr;
6496 return (uintptr)(*m);
6497 }
6498
6499 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6500 {
6501 uae_u8 *m = (uae_u8 *)addr;
6502 *m = b;
6503 }
6504 #endif
6505
6506 void disasm_block(int target, uint8 * start, size_t length)
6507 {
6508 if (!JITDebug)
6509 return;
6510
6511 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6512 char disasm_str[200];
6513 sprintf(disasm_str, "%s $%x $%x",
6514 target == TARGET_M68K ? "d68" :
6515 target == TARGET_X86 ? "d86" :
6516 target == TARGET_X86_64 ? "d8664" :
6517 target == TARGET_POWERPC ? "d" : "x",
6518 start, start + length - 1);
6519
6520 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6521 void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6522
6523 mon_read_byte = mon_read_byte_jit;
6524 mon_write_byte = mon_write_byte_jit;
6525
6526 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6527 mon(4, arg);
6528
6529 mon_read_byte = old_mon_read_byte;
6530 mon_write_byte = old_mon_write_byte;
6531 #endif
6532 }
6533
6534 static void disasm_native_block(uint8 *start, size_t length)
6535 {
6536 disasm_block(TARGET_NATIVE, start, length);
6537 }
6538
6539 static void disasm_m68k_block(uint8 *start, size_t length)
6540 {
6541 disasm_block(TARGET_M68K, start, length);
6542 }
6543
6544 #ifdef HAVE_GET_WORD_UNSWAPPED
6545 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6546 #else
6547 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6548 #endif
6549
6550 #if JIT_DEBUG
6551 static uae_u8 *last_regs_pc_p = 0;
6552 static uae_u8 *last_compiled_block_addr = 0;
6553
6554 void compiler_dumpstate(void)
6555 {
6556 if (!JITDebug)
6557 return;
6558
6559 write_log("### Host addresses\n");
6560 write_log("MEM_BASE : %x\n", MEMBaseDiff);
6561 write_log("PC_P : %p\n", &regs.pc_p);
6562 write_log("SPCFLAGS : %p\n", &regs.spcflags);
6563 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6564 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6565 write_log("\n");
6566
6567 write_log("### M68k processor state\n");
6568 m68k_dumpstate(0);
6569 write_log("\n");
6570
6571 write_log("### Block in Mac address space\n");
6572 write_log("M68K block : %p\n",
6573 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6574 write_log("Native block : %p (%d bytes)\n",
6575 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6576 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6577 write_log("\n");
6578 }
6579 #endif
6580
6581 static void compile_block(cpu_history* pc_hist, int blocklen)
6582 {
6583 if (letit && compiled_code) {
6584 #if PROFILE_COMPILE_TIME
6585 compile_count++;
6586 clock_t start_time = clock();
6587 #endif
6588 #if JIT_DEBUG
6589 bool disasm_block = false;
6590 #endif
6591
6592 /* OK, here we need to 'compile' a block */
6593 int i;
6594 int r;
6595 int was_comp=0;
6596 uae_u8 liveflags[MAXRUN+1];
6597 #if USE_CHECKSUM_INFO
6598 bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6599 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6600 uintptr min_pcp=max_pcp;
6601 #else
6602 uintptr max_pcp=(uintptr)pc_hist[0].location;
6603 uintptr min_pcp=max_pcp;
6604 #endif
6605 uae_u32 cl=cacheline(pc_hist[0].location);
6606 void* specflags=(void*)&regs.spcflags;
6607 blockinfo* bi=NULL;
6608 blockinfo* bi2;
6609 int extra_len=0;
6610
6611 redo_current_block=0;
6612 if (current_compile_p>=max_compile_start)
6613 flush_icache_hard(7);
6614
6615 alloc_blockinfos();
6616
6617 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6618 bi2=get_blockinfo(cl);
6619
6620 optlev=bi->optlevel;
6621 if (bi->status!=BI_INVALID) {
6622 Dif (bi!=bi2) {
6623 /* I don't think it can happen anymore. Shouldn't, in
6624 any case. So let's make sure... */
6625 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6626 bi->count,bi->optlevel,bi->handler_to_use,
6627 cache_tags[cl].handler);
6628 abort();
6629 }
6630
6631 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6632 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6633 /* What the heck? We are not supposed to be here! */
6634 abort();
6635 }
6636 }
6637 if (bi->count==-1) {
6638 optlev++;
6639 while (!optcount[optlev])
6640 optlev++;
6641 bi->count=optcount[optlev]-1;
6642 }
6643 current_block_pc_p=(uintptr)pc_hist[0].location;
6644
6645 remove_deps(bi); /* We are about to create new code */
6646 bi->optlevel=optlev;
6647 bi->pc_p=(uae_u8*)pc_hist[0].location;
6648 #if USE_CHECKSUM_INFO
6649 free_checksum_info_chain(bi->csi);
6650 bi->csi = NULL;
6651 #endif
6652
6653 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6654 i=blocklen;
6655 while (i--) {
6656 uae_u16* currpcp=pc_hist[i].location;
6657 uae_u32 op=DO_GET_OPCODE(currpcp);
6658
6659 #if USE_CHECKSUM_INFO
6660 trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6661 if (follow_const_jumps && is_const_jump(op)) {
6662 checksum_info *csi = alloc_checksum_info();
6663 csi->start_p = (uae_u8 *)min_pcp;
6664 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6665 csi->next = bi->csi;
6666 bi->csi = csi;
6667 max_pcp = (uintptr)currpcp;
6668 }
6669 min_pcp = (uintptr)currpcp;
6670 #else
6671 if ((uintptr)currpcp<min_pcp)
6672 min_pcp=(uintptr)currpcp;
6673 if ((uintptr)currpcp>max_pcp)
6674 max_pcp=(uintptr)currpcp;
6675 #endif
6676
6677 liveflags[i]=((liveflags[i+1]&
6678 (~prop[op].set_flags))|
6679 prop[op].use_flags);
6680 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6681 liveflags[i]&= ~FLAG_Z;
6682 }
6683
6684 #if USE_CHECKSUM_INFO
6685 checksum_info *csi = alloc_checksum_info();
6686 csi->start_p = (uae_u8 *)min_pcp;
6687 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6688 csi->next = bi->csi;
6689 bi->csi = csi;
6690 #endif
6691
6692 bi->needed_flags=liveflags[0];
6693
6694 align_target(align_loops);
6695 was_comp=0;
6696
6697 bi->direct_handler=(cpuop_func *)get_target();
6698 set_dhtu(bi,bi->direct_handler);
6699 bi->status=BI_COMPILING;
6700 current_block_start_target=(uintptr)get_target();
6701
6702 log_startblock();
6703
6704 if (bi->count>=0) { /* Need to generate countdown code */
6705 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6706 raw_sub_l_mi((uintptr)&(bi->count),1);
6707 raw_jl((uintptr)popall_recompile_block);
6708 }
6709 if (optlev==0) { /* No need to actually translate */
6710 /* Execute normally without keeping stats */
6711 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6712 raw_jmp((uintptr)popall_exec_nostats);
6713 }
6714 else {
6715 reg_alloc_run=0;
6716 next_pc_p=0;
6717 taken_pc_p=0;
6718 branch_cc=0;
6719
6720 comp_pc_p=(uae_u8*)pc_hist[0].location;
6721 init_comp();
6722 was_comp=1;
6723
6724 #ifdef USE_CPU_EMUL_SERVICES
6725 raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
6726 raw_jcc_b_oponly(NATIVE_CC_GT);
6727 uae_s8 *branchadd=(uae_s8*)get_target();
6728 emit_byte(0);
6729 raw_call((uintptr)cpu_do_check_ticks);
6730 *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
6731 #endif
6732
6733 #if JIT_DEBUG
6734 if (JITDebug) {
6735 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6736 raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6737 }
6738 #endif
6739
6740 for (i=0;i<blocklen &&
6741 get_target_noopt()<max_compile_start;i++) {
6742 cpuop_func **cputbl;
6743 compop_func **comptbl;
6744 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6745 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6746 if (!needed_flags) {
6747 cputbl=nfcpufunctbl;
6748 comptbl=nfcompfunctbl;
6749 }
6750 else {
6751 cputbl=cpufunctbl;
6752 comptbl=compfunctbl;
6753 }
6754
6755 #if FLIGHT_RECORDER
6756 {
6757 mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
6758 clobber_flags();
6759 remove_all_offsets();
6760 int arg = readreg_specific(S1,4,REG_PAR1);
6761 prepare_for_call_1();
6762 unlock2(arg);
6763 prepare_for_call_2();
6764 raw_call((uintptr)m68k_record_step);
6765 }
6766 #endif
6767
6768 failure = 1; // gb-- defaults to failure state
6769 if (comptbl[opcode] && optlev>1) {
6770 failure=0;
6771 if (!was_comp) {
6772 comp_pc_p=(uae_u8*)pc_hist[i].location;
6773 init_comp();
6774 }
6775 was_comp=1;
6776
6777 comptbl[opcode](opcode);
6778 freescratch();
6779 if (!(liveflags[i+1] & FLAG_CZNV)) {
6780 /* We can forget about flags */
6781 dont_care_flags();
6782 }
6783 #if INDIVIDUAL_INST
6784 flush(1);
6785 nop();
6786 flush(1);
6787 was_comp=0;
6788 #endif
6789 }
6790
6791 if (failure) {
6792 if (was_comp) {
6793 flush(1);
6794 was_comp=0;
6795 }
6796 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6797 #if USE_NORMAL_CALLING_CONVENTION
6798 raw_push_l_r(REG_PAR1);
6799 #endif
6800 raw_mov_l_mi((uintptr)&regs.pc_p,
6801 (uintptr)pc_hist[i].location);
6802 raw_call((uintptr)cputbl[opcode]);
6803 #if PROFILE_UNTRANSLATED_INSNS
6804 // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6805 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6806 #endif
6807 #if USE_NORMAL_CALLING_CONVENTION
6808 raw_inc_sp(4);
6809 #endif
6810
6811 if (i < blocklen - 1) {
6812 uae_s8* branchadd;
6813
6814 raw_mov_l_rm(0,(uintptr)specflags);
6815 raw_test_l_rr(0,0);
6816 raw_jz_b_oponly();
6817 branchadd=(uae_s8 *)get_target();
6818 emit_byte(0);
6819 raw_jmp((uintptr)popall_do_nothing);
6820 *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6821 }
6822 }
6823 }
6824 #if 1 /* This isn't completely kosher yet; It really needs to be
6825 be integrated into a general inter-block-dependency scheme */
6826 if (next_pc_p && taken_pc_p &&
6827 was_comp && taken_pc_p==current_block_pc_p) {
6828 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6829 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6830 uae_u8 x=bi1->needed_flags;
6831
6832 if (x==0xff || 1) { /* To be on the safe side */
6833 uae_u16* next=(uae_u16*)next_pc_p;
6834 uae_u32 op=DO_GET_OPCODE(next);
6835
6836 x=0x1f;
6837 x&=(~prop[op].set_flags);
6838 x|=prop[op].use_flags;
6839 }
6840
6841 x|=bi2->needed_flags;
6842 if (!(x & FLAG_CZNV)) {
6843 /* We can forget about flags */
6844 dont_care_flags();
6845 extra_len+=2; /* The next instruction now is part of this
6846 block */
6847 }
6848
6849 }
6850 #endif
6851 log_flush();
6852
6853 if (next_pc_p) { /* A branch was registered */
6854 uintptr t1=next_pc_p;
6855 uintptr t2=taken_pc_p;
6856 int cc=branch_cc;
6857
6858 uae_u32* branchadd;
6859 uae_u32* tba;
6860 bigstate tmp;
6861 blockinfo* tbi;
6862
6863 if (taken_pc_p<next_pc_p) {
6864 /* backward branch. Optimize for the "taken" case ---
6865 which means the raw_jcc should fall through when
6866 the 68k branch is taken. */
6867 t1=taken_pc_p;
6868 t2=next_pc_p;
6869 cc=branch_cc^1;
6870 }
6871
6872 tmp=live; /* ouch! This is big... */
6873 raw_jcc_l_oponly(cc);
6874 branchadd=(uae_u32*)get_target();
6875 emit_long(0);
6876
6877 /* predicted outcome */
6878 tbi=get_blockinfo_addr_new((void*)t1,1);
6879 match_states(tbi);
6880 raw_cmp_l_mi((uintptr)specflags,0);
6881 raw_jcc_l_oponly(4);
6882 tba=(uae_u32*)get_target();
6883 emit_long(get_handler(t1)-((uintptr)tba+4));
6884 raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6885 flush_reg_count();
6886 raw_jmp((uintptr)popall_do_nothing);
6887 create_jmpdep(bi,0,tba,t1);
6888
6889 align_target(align_jumps);
6890 /* not-predicted outcome */
6891 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6892 live=tmp; /* Ouch again */
6893 tbi=get_blockinfo_addr_new((void*)t2,1);
6894 match_states(tbi);
6895
6896 //flush(1); /* Can only get here if was_comp==1 */
6897 raw_cmp_l_mi((uintptr)specflags,0);
6898 raw_jcc_l_oponly(4);
6899 tba=(uae_u32*)get_target();
6900 emit_long(get_handler(t2)-((uintptr)tba+4));
6901 raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6902 flush_reg_count();
6903 raw_jmp((uintptr)popall_do_nothing);
6904 create_jmpdep(bi,1,tba,t2);
6905 }
6906 else
6907 {
6908 if (was_comp) {
6909 flush(1);
6910 }
6911 flush_reg_count();
6912
6913 /* Let's find out where next_handler is... */
6914 if (was_comp && isinreg(PC_P)) {
6915 r=live.state[PC_P].realreg;
6916 raw_and_l_ri(r,TAGMASK);
6917 int r2 = (r==0) ? 1 : 0;
6918 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6919 raw_cmp_l_mi((uintptr)specflags,0);
6920 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6921 raw_jmp_r(r2);
6922 }
6923 else if (was_comp && isconst(PC_P)) {
6924 uae_u32 v=live.state[PC_P].val;
6925 uae_u32* tba;
6926 blockinfo* tbi;
6927
6928 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6929 match_states(tbi);
6930
6931 raw_cmp_l_mi((uintptr)specflags,0);
6932 raw_jcc_l_oponly(4);
6933 tba=(uae_u32*)get_target();
6934 emit_long(get_handler(v)-((uintptr)tba+4));
6935 raw_mov_l_mi((uintptr)&regs.pc_p,v);
6936 raw_jmp((uintptr)popall_do_nothing);
6937 create_jmpdep(bi,0,tba,v);
6938 }
6939 else {
6940 r=REG_PC_TMP;
6941 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6942 raw_and_l_ri(r,TAGMASK);
6943 int r2 = (r==0) ? 1 : 0;
6944 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6945 raw_cmp_l_mi((uintptr)specflags,0);
6946 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6947 raw_jmp_r(r2);
6948 }
6949 }
6950 }
6951
6952 #if USE_MATCH
6953 if (callers_need_recompile(&live,&(bi->env))) {
6954 mark_callers_recompile(bi);
6955 }
6956
6957 big_to_small_state(&live,&(bi->env));
6958 #endif
6959
6960 #if USE_CHECKSUM_INFO
6961 remove_from_list(bi);
6962 if (trace_in_rom) {
6963 // No need to checksum that block trace on cache invalidation
6964 free_checksum_info_chain(bi->csi);
6965 bi->csi = NULL;
6966 add_to_dormant(bi);
6967 }
6968 else {
6969 calc_checksum(bi,&(bi->c1),&(bi->c2));
6970 add_to_active(bi);
6971 }
6972 #else
6973 if (next_pc_p+extra_len>=max_pcp &&
6974 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6975 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6976 else
6977 max_pcp+=LONGEST_68K_INST;
6978
6979 bi->len=max_pcp-min_pcp;
6980 bi->min_pcp=min_pcp;
6981
6982 remove_from_list(bi);
6983 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6984 add_to_dormant(bi); /* No need to checksum it on cache flush.
6985 Please don't start changing ROMs in
6986 flight! */
6987 }
6988 else {
6989 calc_checksum(bi,&(bi->c1),&(bi->c2));
6990 add_to_active(bi);
6991 }
6992 #endif
6993
6994 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6995
6996 #if JIT_DEBUG
6997 if (JITDebug)
6998 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6999
7000 if (JITDebug && disasm_block) {
7001 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
7002 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
7003 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
7004 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
7005 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
7006 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
7007 getchar();
7008 }
7009 #endif
7010
7011 log_dump();
7012 align_target(align_jumps);
7013
7014 /* This is the non-direct handler */
7015 bi->handler=
7016 bi->handler_to_use=(cpuop_func *)get_target();
7017 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
7018 raw_jnz((uintptr)popall_cache_miss);
7019 comp_pc_p=(uae_u8*)pc_hist[0].location;
7020
7021 bi->status=BI_FINALIZING;
7022 init_comp();
7023 match_states(bi);
7024 flush(1);
7025
7026 raw_jmp((uintptr)bi->direct_handler);
7027
7028 current_compile_p=get_target();
7029 raise_in_cl_list(bi);
7030
7031 /* We will flush soon, anyway, so let's do it now */
7032 if (current_compile_p>=max_compile_start)
7033 flush_icache_hard(7);
7034
7035 bi->status=BI_ACTIVE;
7036 if (redo_current_block)
7037 block_need_recompile(bi);
7038
7039 #if PROFILE_COMPILE_TIME
7040 compile_time += (clock() - start_time);
7041 #endif
7042 }
7043
7044 /* Account for compilation time */
7045 cpu_do_check_ticks();
7046 }
7047
7048 void do_nothing(void)
7049 {
7050 /* What did you expect this to do? */
7051 }
7052
7053 void exec_nostats(void)
7054 {
7055 for (;;) {
7056 uae_u32 opcode = GET_OPCODE;
7057 #if FLIGHT_RECORDER
7058 m68k_record_step(m68k_getpc());
7059 #endif
7060 (*cpufunctbl[opcode])(opcode);
7061 cpu_check_ticks();
7062 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7063 return; /* We will deal with the spcflags in the caller */
7064 }
7065 }
7066 }
7067
7068 void execute_normal(void)
7069 {
7070 if (!check_for_cache_miss()) {
7071 cpu_history pc_hist[MAXRUN];
7072 int blocklen = 0;
7073 #if REAL_ADDRESSING || DIRECT_ADDRESSING
7074 start_pc_p = regs.pc_p;
7075 start_pc = get_virtual_address(regs.pc_p);
7076 #else
7077 start_pc_p = regs.pc_oldp;
7078 start_pc = regs.pc;
7079 #endif
7080 for (;;) { /* Take note: This is the do-it-normal loop */
7081 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7082 uae_u32 opcode = GET_OPCODE;
7083 #if FLIGHT_RECORDER
7084 m68k_record_step(m68k_getpc());
7085 #endif
7086 (*cpufunctbl[opcode])(opcode);
7087 cpu_check_ticks();
7088 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7089 compile_block(pc_hist, blocklen);
7090 return; /* We will deal with the spcflags in the caller */
7091 }
7092 /* No need to check regs.spcflags, because if they were set,
7093 we'd have ended up inside that "if" */
7094 }
7095 }
7096 }
7097
7098 typedef void (*compiled_handler)(void);
7099
7100 static void m68k_do_compile_execute(void)
7101 {
7102 for (;;) {
7103 ((compiled_handler)(pushall_call_handler))();
7104 /* Whenever we return from that, we should check spcflags */
7105 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7106 if (m68k_do_specialties ())
7107 return;
7108 }
7109 }
7110 }
7111
7112 void m68k_compile_execute (void)
7113 {
7114 for (;;) {
7115 if (quit_program)
7116 break;
7117 m68k_do_compile_execute();
7118 }
7119 }