ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.36
Committed: 2006-01-16T21:31:08Z (18 years, 5 months ago) by gbeauche
Branch: MAIN
Changes since 1.35: +24 -70 lines
Log Message:
JIT generated code is not guaranteed to be leaf, e.g. there could be a call
to a generic instruction handler (untranslated code). This caused problems
on MacOS X for Intel where the unaligned stack conditions turned out to be
more visible. Performance loss is really neglectable and this is the right
fix now anyway.

File Contents

# Content
1 /*
2 * compiler/compemu_support.cpp - Core dynamic translation engine
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2005 Christian Bauer
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28 #endif
29
30 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31 #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32 #endif
33
34 /* NOTE: support for AMD64 assumes translation cache and other code
35 * buffers are allocated into a 32-bit address space because (i) B2/JIT
36 * code is not 64-bit clean and (ii) it's faster to resolve branches
37 * that way.
38 */
39 #if !defined(__i386__) && !defined(__x86_64__)
40 #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41 #endif
42
43 #define USE_MATCH 0
44
45 /* kludge for Brian, so he can compile under MSVC++ */
46 #define USE_NORMAL_CALLING_CONVENTION 0
47
48 #ifndef WIN32
49 #include <unistd.h>
50 #include <sys/types.h>
51 #include <sys/mman.h>
52 #endif
53
54 #include <stdlib.h>
55 #include <fcntl.h>
56 #include <errno.h>
57
58 #include "sysdeps.h"
59 #include "cpu_emulation.h"
60 #include "main.h"
61 #include "prefs.h"
62 #include "user_strings.h"
63 #include "vm_alloc.h"
64
65 #include "m68k.h"
66 #include "memory.h"
67 #include "readcpu.h"
68 #include "newcpu.h"
69 #include "comptbl.h"
70 #include "compiler/compemu.h"
71 #include "fpu/fpu.h"
72 #include "fpu/flags.h"
73
74 #define DEBUG 1
75 #include "debug.h"
76
77 #ifdef ENABLE_MON
78 #include "mon.h"
79 #endif
80
81 #ifndef WIN32
82 #define PROFILE_COMPILE_TIME 1
83 #define PROFILE_UNTRANSLATED_INSNS 1
84 #endif
85
86 #if defined(__x86_64__) && 0
87 #define RECORD_REGISTER_USAGE 1
88 #endif
89
90 #ifdef WIN32
91 #undef write_log
92 #define write_log dummy_write_log
93 static void dummy_write_log(const char *, ...) { }
94 #endif
95
96 #if JIT_DEBUG
97 #undef abort
98 #define abort() do { \
99 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100 exit(EXIT_FAILURE); \
101 } while (0)
102 #endif
103
104 #if RECORD_REGISTER_USAGE
105 static uint64 reg_count[16];
106 static int reg_count_local[16];
107
108 static int reg_count_compare(const void *ap, const void *bp)
109 {
110 const int a = *((int *)ap);
111 const int b = *((int *)bp);
112 return reg_count[b] - reg_count[a];
113 }
114 #endif
115
116 #if PROFILE_COMPILE_TIME
117 #include <time.h>
118 static uae_u32 compile_count = 0;
119 static clock_t compile_time = 0;
120 static clock_t emul_start_time = 0;
121 static clock_t emul_end_time = 0;
122 #endif
123
124 #if PROFILE_UNTRANSLATED_INSNS
125 const int untranslated_top_ten = 20;
126 static uae_u32 raw_cputbl_count[65536] = { 0, };
127 static uae_u16 opcode_nums[65536];
128
129 static int untranslated_compfn(const void *e1, const void *e2)
130 {
131 return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132 }
133 #endif
134
135 static compop_func *compfunctbl[65536];
136 static compop_func *nfcompfunctbl[65536];
137 static cpuop_func *nfcpufunctbl[65536];
138 uae_u8* comp_pc_p;
139
140 // From main_unix.cpp
141 extern bool ThirtyThreeBitAddressing;
142
143 // From newcpu.cpp
144 extern bool quit_program;
145
146 // gb-- Extra data for Basilisk II/JIT
147 #if JIT_DEBUG
148 static bool JITDebug = false; // Enable runtime disassemblers through mon?
149 #else
150 const bool JITDebug = false; // Don't use JIT debug mode at all
151 #endif
152 #if USE_INLINING
153 static bool follow_const_jumps = true; // Flag: translation through constant jumps
154 #else
155 const bool follow_const_jumps = false;
156 #endif
157
158 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
159 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
160 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
161 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
162 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
163 static bool have_cmov = false; // target has CMOV instructions ?
164 static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
165 static bool have_rat_stall = true; // target has partial register stalls ?
166 const bool tune_alignment = true; // Tune code alignments for running CPU ?
167 const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
168 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
169 static int align_loops = 32; // Align the start of loops
170 static int align_jumps = 32; // Align the start of jumps
171 static int optcount[10] = {
172 10, // How often a block has to be executed before it is translated
173 0, // How often to use naive translation
174 0, 0, 0, 0,
175 -1, -1, -1, -1
176 };
177
178 struct op_properties {
179 uae_u8 use_flags;
180 uae_u8 set_flags;
181 uae_u8 is_addx;
182 uae_u8 cflow;
183 };
184 static op_properties prop[65536];
185
186 static inline int end_block(uae_u32 opcode)
187 {
188 return (prop[opcode].cflow & fl_end_block);
189 }
190
191 static inline bool is_const_jump(uae_u32 opcode)
192 {
193 return (prop[opcode].cflow == fl_const_jump);
194 }
195
196 static inline bool may_trap(uae_u32 opcode)
197 {
198 return (prop[opcode].cflow & fl_trap);
199 }
200
201 static inline unsigned int cft_map (unsigned int f)
202 {
203 #ifndef HAVE_GET_WORD_UNSWAPPED
204 return f;
205 #else
206 return ((f >> 8) & 255) | ((f & 255) << 8);
207 #endif
208 }
209
210 uae_u8* start_pc_p;
211 uae_u32 start_pc;
212 uae_u32 current_block_pc_p;
213 static uintptr current_block_start_target;
214 uae_u32 needed_flags;
215 static uintptr next_pc_p;
216 static uintptr taken_pc_p;
217 static int branch_cc;
218 static int redo_current_block;
219
220 int segvcount=0;
221 int soft_flush_count=0;
222 int hard_flush_count=0;
223 int checksum_count=0;
224 static uae_u8* current_compile_p=NULL;
225 static uae_u8* max_compile_start;
226 static uae_u8* compiled_code=NULL;
227 static uae_s32 reg_alloc_run;
228 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
229 static uae_u8* popallspace=NULL;
230
231 void* pushall_call_handler=NULL;
232 static void* popall_do_nothing=NULL;
233 static void* popall_exec_nostats=NULL;
234 static void* popall_execute_normal=NULL;
235 static void* popall_cache_miss=NULL;
236 static void* popall_recompile_block=NULL;
237 static void* popall_check_checksum=NULL;
238
239 /* The 68k only ever executes from even addresses. So right now, we
240 * waste half the entries in this array
241 * UPDATE: We now use those entries to store the start of the linked
242 * lists that we maintain for each hash result.
243 */
244 cacheline cache_tags[TAGSIZE];
245 int letit=0;
246 blockinfo* hold_bi[MAX_HOLD_BI];
247 blockinfo* active;
248 blockinfo* dormant;
249
250 /* 68040 */
251 extern struct cputbl op_smalltbl_0_nf[];
252 extern struct comptbl op_smalltbl_0_comp_nf[];
253 extern struct comptbl op_smalltbl_0_comp_ff[];
254
255 /* 68020 + 68881 */
256 extern struct cputbl op_smalltbl_1_nf[];
257
258 /* 68020 */
259 extern struct cputbl op_smalltbl_2_nf[];
260
261 /* 68010 */
262 extern struct cputbl op_smalltbl_3_nf[];
263
264 /* 68000 */
265 extern struct cputbl op_smalltbl_4_nf[];
266
267 /* 68000 slow but compatible. */
268 extern struct cputbl op_smalltbl_5_nf[];
269
270 static void flush_icache_hard(int n);
271 static void flush_icache_lazy(int n);
272 static void flush_icache_none(int n);
273 void (*flush_icache)(int n) = flush_icache_none;
274
275
276
277 bigstate live;
278 smallstate empty_ss;
279 smallstate default_ss;
280 static int optlev;
281
282 static int writereg(int r, int size);
283 static void unlock2(int r);
284 static void setlock(int r);
285 static int readreg_specific(int r, int size, int spec);
286 static int writereg_specific(int r, int size, int spec);
287 static void prepare_for_call_1(void);
288 static void prepare_for_call_2(void);
289 static void align_target(uae_u32 a);
290
291 static uae_s32 nextused[VREGS];
292
293 uae_u32 m68k_pc_offset;
294
295 /* Some arithmetic ooperations can be optimized away if the operands
296 * are known to be constant. But that's only a good idea when the
297 * side effects they would have on the flags are not important. This
298 * variable indicates whether we need the side effects or not
299 */
300 uae_u32 needflags=0;
301
302 /* Flag handling is complicated.
303 *
304 * x86 instructions create flags, which quite often are exactly what we
305 * want. So at times, the "68k" flags are actually in the x86 flags.
306 *
307 * Then again, sometimes we do x86 instructions that clobber the x86
308 * flags, but don't represent a corresponding m68k instruction. In that
309 * case, we have to save them.
310 *
311 * We used to save them to the stack, but now store them back directly
312 * into the regflags.cznv of the traditional emulation. Thus some odd
313 * names.
314 *
315 * So flags can be in either of two places (used to be three; boy were
316 * things complicated back then!); And either place can contain either
317 * valid flags or invalid trash (and on the stack, there was also the
318 * option of "nothing at all", now gone). A couple of variables keep
319 * track of the respective states.
320 *
321 * To make things worse, we might or might not be interested in the flags.
322 * by default, we are, but a call to dont_care_flags can change that
323 * until the next call to live_flags. If we are not, pretty much whatever
324 * is in the register and/or the native flags is seen as valid.
325 */
326
327 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
328 {
329 return cache_tags[cl+1].bi;
330 }
331
332 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
333 {
334 blockinfo* bi=get_blockinfo(cacheline(addr));
335
336 while (bi) {
337 if (bi->pc_p==addr)
338 return bi;
339 bi=bi->next_same_cl;
340 }
341 return NULL;
342 }
343
344
345 /*******************************************************************
346 * All sorts of list related functions for all of the lists *
347 *******************************************************************/
348
349 static __inline__ void remove_from_cl_list(blockinfo* bi)
350 {
351 uae_u32 cl=cacheline(bi->pc_p);
352
353 if (bi->prev_same_cl_p)
354 *(bi->prev_same_cl_p)=bi->next_same_cl;
355 if (bi->next_same_cl)
356 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
357 if (cache_tags[cl+1].bi)
358 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
359 else
360 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
361 }
362
363 static __inline__ void remove_from_list(blockinfo* bi)
364 {
365 if (bi->prev_p)
366 *(bi->prev_p)=bi->next;
367 if (bi->next)
368 bi->next->prev_p=bi->prev_p;
369 }
370
371 static __inline__ void remove_from_lists(blockinfo* bi)
372 {
373 remove_from_list(bi);
374 remove_from_cl_list(bi);
375 }
376
377 static __inline__ void add_to_cl_list(blockinfo* bi)
378 {
379 uae_u32 cl=cacheline(bi->pc_p);
380
381 if (cache_tags[cl+1].bi)
382 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
383 bi->next_same_cl=cache_tags[cl+1].bi;
384
385 cache_tags[cl+1].bi=bi;
386 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
387
388 cache_tags[cl].handler=bi->handler_to_use;
389 }
390
391 static __inline__ void raise_in_cl_list(blockinfo* bi)
392 {
393 remove_from_cl_list(bi);
394 add_to_cl_list(bi);
395 }
396
397 static __inline__ void add_to_active(blockinfo* bi)
398 {
399 if (active)
400 active->prev_p=&(bi->next);
401 bi->next=active;
402
403 active=bi;
404 bi->prev_p=&active;
405 }
406
407 static __inline__ void add_to_dormant(blockinfo* bi)
408 {
409 if (dormant)
410 dormant->prev_p=&(bi->next);
411 bi->next=dormant;
412
413 dormant=bi;
414 bi->prev_p=&dormant;
415 }
416
417 static __inline__ void remove_dep(dependency* d)
418 {
419 if (d->prev_p)
420 *(d->prev_p)=d->next;
421 if (d->next)
422 d->next->prev_p=d->prev_p;
423 d->prev_p=NULL;
424 d->next=NULL;
425 }
426
427 /* This block's code is about to be thrown away, so it no longer
428 depends on anything else */
429 static __inline__ void remove_deps(blockinfo* bi)
430 {
431 remove_dep(&(bi->dep[0]));
432 remove_dep(&(bi->dep[1]));
433 }
434
435 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
436 {
437 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
438 }
439
440 /********************************************************************
441 * Soft flush handling support functions *
442 ********************************************************************/
443
444 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
445 {
446 //write_log("bi is %p\n",bi);
447 if (dh!=bi->direct_handler_to_use) {
448 dependency* x=bi->deplist;
449 //write_log("bi->deplist=%p\n",bi->deplist);
450 while (x) {
451 //write_log("x is %p\n",x);
452 //write_log("x->next is %p\n",x->next);
453 //write_log("x->prev_p is %p\n",x->prev_p);
454
455 if (x->jmp_off) {
456 adjust_jmpdep(x,dh);
457 }
458 x=x->next;
459 }
460 bi->direct_handler_to_use=dh;
461 }
462 }
463
464 static __inline__ void invalidate_block(blockinfo* bi)
465 {
466 int i;
467
468 bi->optlevel=0;
469 bi->count=optcount[0]-1;
470 bi->handler=NULL;
471 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
472 bi->direct_handler=NULL;
473 set_dhtu(bi,bi->direct_pen);
474 bi->needed_flags=0xff;
475 bi->status=BI_INVALID;
476 for (i=0;i<2;i++) {
477 bi->dep[i].jmp_off=NULL;
478 bi->dep[i].target=NULL;
479 }
480 remove_deps(bi);
481 }
482
483 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
484 {
485 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
486
487 Dif(!tbi) {
488 write_log("Could not create jmpdep!\n");
489 abort();
490 }
491 bi->dep[i].jmp_off=jmpaddr;
492 bi->dep[i].source=bi;
493 bi->dep[i].target=tbi;
494 bi->dep[i].next=tbi->deplist;
495 if (bi->dep[i].next)
496 bi->dep[i].next->prev_p=&(bi->dep[i].next);
497 bi->dep[i].prev_p=&(tbi->deplist);
498 tbi->deplist=&(bi->dep[i]);
499 }
500
501 static __inline__ void block_need_recompile(blockinfo * bi)
502 {
503 uae_u32 cl = cacheline(bi->pc_p);
504
505 set_dhtu(bi, bi->direct_pen);
506 bi->direct_handler = bi->direct_pen;
507
508 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
509 bi->handler = (cpuop_func *)popall_execute_normal;
510 if (bi == cache_tags[cl + 1].bi)
511 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
512 bi->status = BI_NEED_RECOMP;
513 }
514
515 static __inline__ void mark_callers_recompile(blockinfo * bi)
516 {
517 dependency *x = bi->deplist;
518
519 while (x) {
520 dependency *next = x->next; /* This disappears when we mark for
521 * recompilation and thus remove the
522 * blocks from the lists */
523 if (x->jmp_off) {
524 blockinfo *cbi = x->source;
525
526 Dif(cbi->status == BI_INVALID) {
527 // write_log("invalid block in dependency list\n"); // FIXME?
528 // abort();
529 }
530 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
531 block_need_recompile(cbi);
532 mark_callers_recompile(cbi);
533 }
534 else if (cbi->status == BI_COMPILING) {
535 redo_current_block = 1;
536 }
537 else if (cbi->status == BI_NEED_RECOMP) {
538 /* nothing */
539 }
540 else {
541 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
542 }
543 }
544 x = next;
545 }
546 }
547
548 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
549 {
550 blockinfo* bi=get_blockinfo_addr(addr);
551 int i;
552
553 if (!bi) {
554 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
555 if (hold_bi[i]) {
556 uae_u32 cl=cacheline(addr);
557
558 bi=hold_bi[i];
559 hold_bi[i]=NULL;
560 bi->pc_p=(uae_u8 *)addr;
561 invalidate_block(bi);
562 add_to_active(bi);
563 add_to_cl_list(bi);
564
565 }
566 }
567 }
568 if (!bi) {
569 write_log("Looking for blockinfo, can't find free one\n");
570 abort();
571 }
572 return bi;
573 }
574
575 static void prepare_block(blockinfo* bi);
576
577 /* Managment of blockinfos.
578
579 A blockinfo struct is allocated whenever a new block has to be
580 compiled. If the list of free blockinfos is empty, we allocate a new
581 pool of blockinfos and link the newly created blockinfos altogether
582 into the list of free blockinfos. Otherwise, we simply pop a structure
583 off the free list.
584
585 Blockinfo are lazily deallocated, i.e. chained altogether in the
586 list of free blockinfos whenvever a translation cache flush (hard or
587 soft) request occurs.
588 */
589
590 template< class T >
591 class LazyBlockAllocator
592 {
593 enum {
594 kPoolSize = 1 + 4096 / sizeof(T)
595 };
596 struct Pool {
597 T chunk[kPoolSize];
598 Pool * next;
599 };
600 Pool * mPools;
601 T * mChunks;
602 public:
603 LazyBlockAllocator() : mPools(0), mChunks(0) { }
604 ~LazyBlockAllocator();
605 T * acquire();
606 void release(T * const);
607 };
608
609 template< class T >
610 LazyBlockAllocator<T>::~LazyBlockAllocator()
611 {
612 Pool * currentPool = mPools;
613 while (currentPool) {
614 Pool * deadPool = currentPool;
615 currentPool = currentPool->next;
616 free(deadPool);
617 }
618 }
619
620 template< class T >
621 T * LazyBlockAllocator<T>::acquire()
622 {
623 if (!mChunks) {
624 // There is no chunk left, allocate a new pool and link the
625 // chunks into the free list
626 Pool * newPool = (Pool *)malloc(sizeof(Pool));
627 for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
628 chunk->next = mChunks;
629 mChunks = chunk;
630 }
631 newPool->next = mPools;
632 mPools = newPool;
633 }
634 T * chunk = mChunks;
635 mChunks = chunk->next;
636 return chunk;
637 }
638
639 template< class T >
640 void LazyBlockAllocator<T>::release(T * const chunk)
641 {
642 chunk->next = mChunks;
643 mChunks = chunk;
644 }
645
646 template< class T >
647 class HardBlockAllocator
648 {
649 public:
650 T * acquire() {
651 T * data = (T *)current_compile_p;
652 current_compile_p += sizeof(T);
653 return data;
654 }
655
656 void release(T * const chunk) {
657 // Deallocated on invalidation
658 }
659 };
660
661 #if USE_SEPARATE_BIA
662 static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
663 static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
664 #else
665 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
666 static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
667 #endif
668
669 static __inline__ checksum_info *alloc_checksum_info(void)
670 {
671 checksum_info *csi = ChecksumInfoAllocator.acquire();
672 csi->next = NULL;
673 return csi;
674 }
675
676 static __inline__ void free_checksum_info(checksum_info *csi)
677 {
678 csi->next = NULL;
679 ChecksumInfoAllocator.release(csi);
680 }
681
682 static __inline__ void free_checksum_info_chain(checksum_info *csi)
683 {
684 while (csi != NULL) {
685 checksum_info *csi2 = csi->next;
686 free_checksum_info(csi);
687 csi = csi2;
688 }
689 }
690
691 static __inline__ blockinfo *alloc_blockinfo(void)
692 {
693 blockinfo *bi = BlockInfoAllocator.acquire();
694 #if USE_CHECKSUM_INFO
695 bi->csi = NULL;
696 #endif
697 return bi;
698 }
699
700 static __inline__ void free_blockinfo(blockinfo *bi)
701 {
702 #if USE_CHECKSUM_INFO
703 free_checksum_info_chain(bi->csi);
704 bi->csi = NULL;
705 #endif
706 BlockInfoAllocator.release(bi);
707 }
708
709 static __inline__ void alloc_blockinfos(void)
710 {
711 int i;
712 blockinfo* bi;
713
714 for (i=0;i<MAX_HOLD_BI;i++) {
715 if (hold_bi[i])
716 return;
717 bi=hold_bi[i]=alloc_blockinfo();
718 prepare_block(bi);
719 }
720 }
721
722 /********************************************************************
723 * Functions to emit data into memory, and other general support *
724 ********************************************************************/
725
726 static uae_u8* target;
727
728 static void emit_init(void)
729 {
730 }
731
732 static __inline__ void emit_byte(uae_u8 x)
733 {
734 *target++=x;
735 }
736
737 static __inline__ void emit_word(uae_u16 x)
738 {
739 *((uae_u16*)target)=x;
740 target+=2;
741 }
742
743 static __inline__ void emit_long(uae_u32 x)
744 {
745 *((uae_u32*)target)=x;
746 target+=4;
747 }
748
749 static __inline__ void emit_quad(uae_u64 x)
750 {
751 *((uae_u64*)target)=x;
752 target+=8;
753 }
754
755 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
756 {
757 memcpy((uae_u8 *)target,block,blocklen);
758 target+=blocklen;
759 }
760
761 static __inline__ uae_u32 reverse32(uae_u32 v)
762 {
763 #if 1
764 // gb-- We have specialized byteswapping functions, just use them
765 return do_byteswap_32(v);
766 #else
767 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
768 #endif
769 }
770
771 /********************************************************************
772 * Getting the information about the target CPU *
773 ********************************************************************/
774
775 #include "codegen_x86.cpp"
776
777 void set_target(uae_u8* t)
778 {
779 target=t;
780 }
781
782 static __inline__ uae_u8* get_target_noopt(void)
783 {
784 return target;
785 }
786
787 __inline__ uae_u8* get_target(void)
788 {
789 return get_target_noopt();
790 }
791
792
793 /********************************************************************
794 * Flags status handling. EMIT TIME! *
795 ********************************************************************/
796
797 static void bt_l_ri_noclobber(R4 r, IMM i);
798
799 static void make_flags_live_internal(void)
800 {
801 if (live.flags_in_flags==VALID)
802 return;
803 Dif (live.flags_on_stack==TRASH) {
804 write_log("Want flags, got something on stack, but it is TRASH\n");
805 abort();
806 }
807 if (live.flags_on_stack==VALID) {
808 int tmp;
809 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
810 raw_reg_to_flags(tmp);
811 unlock2(tmp);
812
813 live.flags_in_flags=VALID;
814 return;
815 }
816 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
817 live.flags_in_flags,live.flags_on_stack);
818 abort();
819 }
820
821 static void flags_to_stack(void)
822 {
823 if (live.flags_on_stack==VALID)
824 return;
825 if (!live.flags_are_important) {
826 live.flags_on_stack=VALID;
827 return;
828 }
829 Dif (live.flags_in_flags!=VALID)
830 abort();
831 else {
832 int tmp;
833 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
834 raw_flags_to_reg(tmp);
835 unlock2(tmp);
836 }
837 live.flags_on_stack=VALID;
838 }
839
840 static __inline__ void clobber_flags(void)
841 {
842 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
843 flags_to_stack();
844 live.flags_in_flags=TRASH;
845 }
846
847 /* Prepare for leaving the compiled stuff */
848 static __inline__ void flush_flags(void)
849 {
850 flags_to_stack();
851 return;
852 }
853
854 int touchcnt;
855
856 /********************************************************************
857 * Partial register flushing for optimized calls *
858 ********************************************************************/
859
860 struct regusage {
861 uae_u16 rmask;
862 uae_u16 wmask;
863 };
864
865 static inline void ru_set(uae_u16 *mask, int reg)
866 {
867 #if USE_OPTIMIZED_CALLS
868 *mask |= 1 << reg;
869 #endif
870 }
871
872 static inline bool ru_get(const uae_u16 *mask, int reg)
873 {
874 #if USE_OPTIMIZED_CALLS
875 return (*mask & (1 << reg));
876 #else
877 /* Default: instruction reads & write to register */
878 return true;
879 #endif
880 }
881
882 static inline void ru_set_read(regusage *ru, int reg)
883 {
884 ru_set(&ru->rmask, reg);
885 }
886
887 static inline void ru_set_write(regusage *ru, int reg)
888 {
889 ru_set(&ru->wmask, reg);
890 }
891
892 static inline bool ru_read_p(const regusage *ru, int reg)
893 {
894 return ru_get(&ru->rmask, reg);
895 }
896
897 static inline bool ru_write_p(const regusage *ru, int reg)
898 {
899 return ru_get(&ru->wmask, reg);
900 }
901
902 static void ru_fill_ea(regusage *ru, int reg, amodes mode,
903 wordsizes size, int write_mode)
904 {
905 switch (mode) {
906 case Areg:
907 reg += 8;
908 /* fall through */
909 case Dreg:
910 ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
911 break;
912 case Ad16:
913 /* skip displacment */
914 m68k_pc_offset += 2;
915 case Aind:
916 case Aipi:
917 case Apdi:
918 ru_set_read(ru, reg+8);
919 break;
920 case Ad8r:
921 ru_set_read(ru, reg+8);
922 /* fall through */
923 case PC8r: {
924 uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
925 reg = (dp >> 12) & 15;
926 ru_set_read(ru, reg);
927 if (dp & 0x100)
928 m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
929 break;
930 }
931 case PC16:
932 case absw:
933 case imm0:
934 case imm1:
935 m68k_pc_offset += 2;
936 break;
937 case absl:
938 case imm2:
939 m68k_pc_offset += 4;
940 break;
941 case immi:
942 m68k_pc_offset += (size == sz_long) ? 4 : 2;
943 break;
944 }
945 }
946
947 /* TODO: split into a static initialization part and a dynamic one
948 (instructions depending on extension words) */
949 static void ru_fill(regusage *ru, uae_u32 opcode)
950 {
951 m68k_pc_offset += 2;
952
953 /* Default: no register is used or written to */
954 ru->rmask = 0;
955 ru->wmask = 0;
956
957 uae_u32 real_opcode = cft_map(opcode);
958 struct instr *dp = &table68k[real_opcode];
959
960 bool rw_dest = true;
961 bool handled = false;
962
963 /* Handle some instructions specifically */
964 uae_u16 reg, ext;
965 switch (dp->mnemo) {
966 case i_BFCHG:
967 case i_BFCLR:
968 case i_BFEXTS:
969 case i_BFEXTU:
970 case i_BFFFO:
971 case i_BFINS:
972 case i_BFSET:
973 case i_BFTST:
974 ext = comp_get_iword((m68k_pc_offset+=2)-2);
975 if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
976 if (ext & 0x020) ru_set_read(ru, ext & 7);
977 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
978 if (dp->dmode == Dreg)
979 ru_set_read(ru, dp->dreg);
980 switch (dp->mnemo) {
981 case i_BFEXTS:
982 case i_BFEXTU:
983 case i_BFFFO:
984 ru_set_write(ru, (ext >> 12) & 7);
985 break;
986 case i_BFINS:
987 ru_set_read(ru, (ext >> 12) & 7);
988 /* fall through */
989 case i_BFCHG:
990 case i_BFCLR:
991 case i_BSET:
992 if (dp->dmode == Dreg)
993 ru_set_write(ru, dp->dreg);
994 break;
995 }
996 handled = true;
997 rw_dest = false;
998 break;
999
1000 case i_BTST:
1001 rw_dest = false;
1002 break;
1003
1004 case i_CAS:
1005 {
1006 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1007 int Du = ext & 7;
1008 ru_set_read(ru, Du);
1009 int Dc = (ext >> 6) & 7;
1010 ru_set_read(ru, Dc);
1011 ru_set_write(ru, Dc);
1012 break;
1013 }
1014 case i_CAS2:
1015 {
1016 int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1017 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1018 Rn1 = (ext >> 12) & 15;
1019 Du1 = (ext >> 6) & 7;
1020 Dc1 = ext & 7;
1021 ru_set_read(ru, Rn1);
1022 ru_set_read(ru, Du1);
1023 ru_set_read(ru, Dc1);
1024 ru_set_write(ru, Dc1);
1025 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1026 Rn2 = (ext >> 12) & 15;
1027 Du2 = (ext >> 6) & 7;
1028 Dc2 = ext & 7;
1029 ru_set_read(ru, Rn2);
1030 ru_set_read(ru, Du2);
1031 ru_set_write(ru, Dc2);
1032 break;
1033 }
1034 case i_DIVL: case i_MULL:
1035 m68k_pc_offset += 2;
1036 break;
1037 case i_LEA:
1038 case i_MOVE: case i_MOVEA: case i_MOVE16:
1039 rw_dest = false;
1040 break;
1041 case i_PACK: case i_UNPK:
1042 rw_dest = false;
1043 m68k_pc_offset += 2;
1044 break;
1045 case i_TRAPcc:
1046 m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1047 break;
1048 case i_RTR:
1049 /* do nothing, just for coverage debugging */
1050 break;
1051 /* TODO: handle EXG instruction */
1052 }
1053
1054 /* Handle A-Traps better */
1055 if ((real_opcode & 0xf000) == 0xa000) {
1056 handled = true;
1057 }
1058
1059 /* Handle EmulOps better */
1060 if ((real_opcode & 0xff00) == 0x7100) {
1061 handled = true;
1062 ru->rmask = 0xffff;
1063 ru->wmask = 0;
1064 }
1065
1066 if (dp->suse && !handled)
1067 ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1068
1069 if (dp->duse && !handled)
1070 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1071
1072 if (rw_dest)
1073 ru->rmask |= ru->wmask;
1074
1075 handled = handled || dp->suse || dp->duse;
1076
1077 /* Mark all registers as used/written if the instruction may trap */
1078 if (may_trap(opcode)) {
1079 handled = true;
1080 ru->rmask = 0xffff;
1081 ru->wmask = 0xffff;
1082 }
1083
1084 if (!handled) {
1085 write_log("ru_fill: %04x = { %04x, %04x }\n",
1086 real_opcode, ru->rmask, ru->wmask);
1087 abort();
1088 }
1089 }
1090
1091 /********************************************************************
1092 * register allocation per block logging *
1093 ********************************************************************/
1094
1095 static uae_s8 vstate[VREGS];
1096 static uae_s8 vwritten[VREGS];
1097 static uae_s8 nstate[N_REGS];
1098
1099 #define L_UNKNOWN -127
1100 #define L_UNAVAIL -1
1101 #define L_NEEDED -2
1102 #define L_UNNEEDED -3
1103
1104 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1105 {
1106 int i;
1107
1108 for (i = 0; i < VREGS; i++)
1109 s->virt[i] = vstate[i];
1110 for (i = 0; i < N_REGS; i++)
1111 s->nat[i] = nstate[i];
1112 }
1113
1114 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1115 {
1116 int i;
1117 int reverse = 0;
1118
1119 for (i = 0; i < VREGS; i++) {
1120 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1121 return 1;
1122 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1123 reverse++;
1124 }
1125 for (i = 0; i < N_REGS; i++) {
1126 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1127 return 1;
1128 if (nstate[i] < 0 && s->nat[i] >= 0)
1129 reverse++;
1130 }
1131 if (reverse >= 2 && USE_MATCH)
1132 return 1; /* In this case, it might be worth recompiling the
1133 * callers */
1134 return 0;
1135 }
1136
1137 static __inline__ void log_startblock(void)
1138 {
1139 int i;
1140
1141 for (i = 0; i < VREGS; i++) {
1142 vstate[i] = L_UNKNOWN;
1143 vwritten[i] = 0;
1144 }
1145 for (i = 0; i < N_REGS; i++)
1146 nstate[i] = L_UNKNOWN;
1147 }
1148
1149 /* Using an n-reg for a temp variable */
1150 static __inline__ void log_isused(int n)
1151 {
1152 if (nstate[n] == L_UNKNOWN)
1153 nstate[n] = L_UNAVAIL;
1154 }
1155
1156 static __inline__ void log_visused(int r)
1157 {
1158 if (vstate[r] == L_UNKNOWN)
1159 vstate[r] = L_NEEDED;
1160 }
1161
1162 static __inline__ void do_load_reg(int n, int r)
1163 {
1164 if (r == FLAGTMP)
1165 raw_load_flagreg(n, r);
1166 else if (r == FLAGX)
1167 raw_load_flagx(n, r);
1168 else
1169 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1170 }
1171
1172 static __inline__ void check_load_reg(int n, int r)
1173 {
1174 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1175 }
1176
1177 static __inline__ void log_vwrite(int r)
1178 {
1179 vwritten[r] = 1;
1180 }
1181
1182 /* Using an n-reg to hold a v-reg */
1183 static __inline__ void log_isreg(int n, int r)
1184 {
1185 static int count = 0;
1186
1187 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1188 nstate[n] = r;
1189 else {
1190 do_load_reg(n, r);
1191 if (nstate[n] == L_UNKNOWN)
1192 nstate[n] = L_UNAVAIL;
1193 }
1194 if (vstate[r] == L_UNKNOWN)
1195 vstate[r] = L_NEEDED;
1196 }
1197
1198 static __inline__ void log_clobberreg(int r)
1199 {
1200 if (vstate[r] == L_UNKNOWN)
1201 vstate[r] = L_UNNEEDED;
1202 }
1203
1204 /* This ends all possibility of clever register allocation */
1205
1206 static __inline__ void log_flush(void)
1207 {
1208 int i;
1209
1210 for (i = 0; i < VREGS; i++)
1211 if (vstate[i] == L_UNKNOWN)
1212 vstate[i] = L_NEEDED;
1213 for (i = 0; i < N_REGS; i++)
1214 if (nstate[i] == L_UNKNOWN)
1215 nstate[i] = L_UNAVAIL;
1216 }
1217
1218 static __inline__ void log_dump(void)
1219 {
1220 int i;
1221
1222 return;
1223
1224 write_log("----------------------\n");
1225 for (i = 0; i < N_REGS; i++) {
1226 switch (nstate[i]) {
1227 case L_UNKNOWN:
1228 write_log("Nat %d : UNKNOWN\n", i);
1229 break;
1230 case L_UNAVAIL:
1231 write_log("Nat %d : UNAVAIL\n", i);
1232 break;
1233 default:
1234 write_log("Nat %d : %d\n", i, nstate[i]);
1235 break;
1236 }
1237 }
1238 for (i = 0; i < VREGS; i++) {
1239 if (vstate[i] == L_UNNEEDED)
1240 write_log("Virt %d: UNNEEDED\n", i);
1241 }
1242 }
1243
1244 /********************************************************************
1245 * register status handling. EMIT TIME! *
1246 ********************************************************************/
1247
1248 static __inline__ void set_status(int r, int status)
1249 {
1250 if (status == ISCONST)
1251 log_clobberreg(r);
1252 live.state[r].status=status;
1253 }
1254
1255 static __inline__ int isinreg(int r)
1256 {
1257 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1258 }
1259
1260 static __inline__ void adjust_nreg(int r, uae_u32 val)
1261 {
1262 if (!val)
1263 return;
1264 raw_lea_l_brr(r,r,val);
1265 }
1266
1267 static void tomem(int r)
1268 {
1269 int rr=live.state[r].realreg;
1270
1271 if (isinreg(r)) {
1272 if (live.state[r].val && live.nat[rr].nholds==1
1273 && !live.nat[rr].locked) {
1274 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1275 // live.state[r].val,r,rr,target);
1276 adjust_nreg(rr,live.state[r].val);
1277 live.state[r].val=0;
1278 live.state[r].dirtysize=4;
1279 set_status(r,DIRTY);
1280 }
1281 }
1282
1283 if (live.state[r].status==DIRTY) {
1284 switch (live.state[r].dirtysize) {
1285 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1286 case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1287 case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1288 default: abort();
1289 }
1290 log_vwrite(r);
1291 set_status(r,CLEAN);
1292 live.state[r].dirtysize=0;
1293 }
1294 }
1295
1296 static __inline__ int isconst(int r)
1297 {
1298 return live.state[r].status==ISCONST;
1299 }
1300
1301 int is_const(int r)
1302 {
1303 return isconst(r);
1304 }
1305
1306 static __inline__ void writeback_const(int r)
1307 {
1308 if (!isconst(r))
1309 return;
1310 Dif (live.state[r].needflush==NF_HANDLER) {
1311 write_log("Trying to write back constant NF_HANDLER!\n");
1312 abort();
1313 }
1314
1315 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1316 log_vwrite(r);
1317 live.state[r].val=0;
1318 set_status(r,INMEM);
1319 }
1320
1321 static __inline__ void tomem_c(int r)
1322 {
1323 if (isconst(r)) {
1324 writeback_const(r);
1325 }
1326 else
1327 tomem(r);
1328 }
1329
1330 static void evict(int r)
1331 {
1332 int rr;
1333
1334 if (!isinreg(r))
1335 return;
1336 tomem(r);
1337 rr=live.state[r].realreg;
1338
1339 Dif (live.nat[rr].locked &&
1340 live.nat[rr].nholds==1) {
1341 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1342 abort();
1343 }
1344
1345 live.nat[rr].nholds--;
1346 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1347 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1348 int thisind=live.state[r].realind;
1349
1350 live.nat[rr].holds[thisind]=topreg;
1351 live.state[topreg].realind=thisind;
1352 }
1353 live.state[r].realreg=-1;
1354 set_status(r,INMEM);
1355 }
1356
1357 static __inline__ void free_nreg(int r)
1358 {
1359 int i=live.nat[r].nholds;
1360
1361 while (i) {
1362 int vr;
1363
1364 --i;
1365 vr=live.nat[r].holds[i];
1366 evict(vr);
1367 }
1368 Dif (live.nat[r].nholds!=0) {
1369 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1370 abort();
1371 }
1372 }
1373
1374 /* Use with care! */
1375 static __inline__ void isclean(int r)
1376 {
1377 if (!isinreg(r))
1378 return;
1379 live.state[r].validsize=4;
1380 live.state[r].dirtysize=0;
1381 live.state[r].val=0;
1382 set_status(r,CLEAN);
1383 }
1384
1385 static __inline__ void disassociate(int r)
1386 {
1387 isclean(r);
1388 evict(r);
1389 }
1390
1391 static __inline__ void set_const(int r, uae_u32 val)
1392 {
1393 disassociate(r);
1394 live.state[r].val=val;
1395 set_status(r,ISCONST);
1396 }
1397
1398 static __inline__ uae_u32 get_offset(int r)
1399 {
1400 return live.state[r].val;
1401 }
1402
1403 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1404 {
1405 int bestreg;
1406 uae_s32 when;
1407 int i;
1408 uae_s32 badness=0; /* to shut up gcc */
1409 bestreg=-1;
1410 when=2000000000;
1411
1412 for (i=N_REGS;i--;) {
1413 badness=live.nat[i].touched;
1414 if (live.nat[i].nholds==0)
1415 badness=0;
1416 if (i==hint)
1417 badness-=200000000;
1418 if (!live.nat[i].locked && badness<when) {
1419 if ((size==1 && live.nat[i].canbyte) ||
1420 (size==2 && live.nat[i].canword) ||
1421 (size==4)) {
1422 bestreg=i;
1423 when=badness;
1424 if (live.nat[i].nholds==0 && hint<0)
1425 break;
1426 if (i==hint)
1427 break;
1428 }
1429 }
1430 }
1431 Dif (bestreg==-1)
1432 abort();
1433
1434 if (live.nat[bestreg].nholds>0) {
1435 free_nreg(bestreg);
1436 }
1437 if (isinreg(r)) {
1438 int rr=live.state[r].realreg;
1439 /* This will happen if we read a partially dirty register at a
1440 bigger size */
1441 Dif (willclobber || live.state[r].validsize>=size)
1442 abort();
1443 Dif (live.nat[rr].nholds!=1)
1444 abort();
1445 if (size==4 && live.state[r].validsize==2) {
1446 log_isused(bestreg);
1447 log_visused(r);
1448 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1449 raw_bswap_32(bestreg);
1450 raw_zero_extend_16_rr(rr,rr);
1451 raw_zero_extend_16_rr(bestreg,bestreg);
1452 raw_bswap_32(bestreg);
1453 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1454 live.state[r].validsize=4;
1455 live.nat[rr].touched=touchcnt++;
1456 return rr;
1457 }
1458 if (live.state[r].validsize==1) {
1459 /* Nothing yet */
1460 }
1461 evict(r);
1462 }
1463
1464 if (!willclobber) {
1465 if (live.state[r].status!=UNDEF) {
1466 if (isconst(r)) {
1467 raw_mov_l_ri(bestreg,live.state[r].val);
1468 live.state[r].val=0;
1469 live.state[r].dirtysize=4;
1470 set_status(r,DIRTY);
1471 log_isused(bestreg);
1472 }
1473 else {
1474 log_isreg(bestreg, r); /* This will also load it! */
1475 live.state[r].dirtysize=0;
1476 set_status(r,CLEAN);
1477 }
1478 }
1479 else {
1480 live.state[r].val=0;
1481 live.state[r].dirtysize=0;
1482 set_status(r,CLEAN);
1483 log_isused(bestreg);
1484 }
1485 live.state[r].validsize=4;
1486 }
1487 else { /* this is the easiest way, but not optimal. FIXME! */
1488 /* Now it's trickier, but hopefully still OK */
1489 if (!isconst(r) || size==4) {
1490 live.state[r].validsize=size;
1491 live.state[r].dirtysize=size;
1492 live.state[r].val=0;
1493 set_status(r,DIRTY);
1494 if (size == 4) {
1495 log_clobberreg(r);
1496 log_isused(bestreg);
1497 }
1498 else {
1499 log_visused(r);
1500 log_isused(bestreg);
1501 }
1502 }
1503 else {
1504 if (live.state[r].status!=UNDEF)
1505 raw_mov_l_ri(bestreg,live.state[r].val);
1506 live.state[r].val=0;
1507 live.state[r].validsize=4;
1508 live.state[r].dirtysize=4;
1509 set_status(r,DIRTY);
1510 log_isused(bestreg);
1511 }
1512 }
1513 live.state[r].realreg=bestreg;
1514 live.state[r].realind=live.nat[bestreg].nholds;
1515 live.nat[bestreg].touched=touchcnt++;
1516 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1517 live.nat[bestreg].nholds++;
1518
1519 return bestreg;
1520 }
1521
1522 static int alloc_reg(int r, int size, int willclobber)
1523 {
1524 return alloc_reg_hinted(r,size,willclobber,-1);
1525 }
1526
1527 static void unlock2(int r)
1528 {
1529 Dif (!live.nat[r].locked)
1530 abort();
1531 live.nat[r].locked--;
1532 }
1533
1534 static void setlock(int r)
1535 {
1536 live.nat[r].locked++;
1537 }
1538
1539
1540 static void mov_nregs(int d, int s)
1541 {
1542 int ns=live.nat[s].nholds;
1543 int nd=live.nat[d].nholds;
1544 int i;
1545
1546 if (s==d)
1547 return;
1548
1549 if (nd>0)
1550 free_nreg(d);
1551
1552 log_isused(d);
1553 raw_mov_l_rr(d,s);
1554
1555 for (i=0;i<live.nat[s].nholds;i++) {
1556 int vs=live.nat[s].holds[i];
1557
1558 live.state[vs].realreg=d;
1559 live.state[vs].realind=i;
1560 live.nat[d].holds[i]=vs;
1561 }
1562 live.nat[d].nholds=live.nat[s].nholds;
1563
1564 live.nat[s].nholds=0;
1565 }
1566
1567
1568 static __inline__ void make_exclusive(int r, int size, int spec)
1569 {
1570 int clobber;
1571 reg_status oldstate;
1572 int rr=live.state[r].realreg;
1573 int nr;
1574 int nind;
1575 int ndirt=0;
1576 int i;
1577
1578 if (!isinreg(r))
1579 return;
1580 if (live.nat[rr].nholds==1)
1581 return;
1582 for (i=0;i<live.nat[rr].nholds;i++) {
1583 int vr=live.nat[rr].holds[i];
1584 if (vr!=r &&
1585 (live.state[vr].status==DIRTY || live.state[vr].val))
1586 ndirt++;
1587 }
1588 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1589 /* Everything else is clean, so let's keep this register */
1590 for (i=0;i<live.nat[rr].nholds;i++) {
1591 int vr=live.nat[rr].holds[i];
1592 if (vr!=r) {
1593 evict(vr);
1594 i--; /* Try that index again! */
1595 }
1596 }
1597 Dif (live.nat[rr].nholds!=1) {
1598 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1599 rr,live.nat[rr].nholds,r);
1600 abort();
1601 }
1602 return;
1603 }
1604
1605 /* We have to split the register */
1606 oldstate=live.state[r];
1607
1608 setlock(rr); /* Make sure this doesn't go away */
1609 /* Forget about r being in the register rr */
1610 disassociate(r);
1611 /* Get a new register, that we will clobber completely */
1612 if (oldstate.status==DIRTY) {
1613 /* If dirtysize is <4, we need a register that can handle the
1614 eventual smaller memory store! Thanks to Quake68k for exposing
1615 this detail ;-) */
1616 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1617 }
1618 else {
1619 nr=alloc_reg_hinted(r,4,1,spec);
1620 }
1621 nind=live.state[r].realind;
1622 live.state[r]=oldstate; /* Keep all the old state info */
1623 live.state[r].realreg=nr;
1624 live.state[r].realind=nind;
1625
1626 if (size<live.state[r].validsize) {
1627 if (live.state[r].val) {
1628 /* Might as well compensate for the offset now */
1629 raw_lea_l_brr(nr,rr,oldstate.val);
1630 live.state[r].val=0;
1631 live.state[r].dirtysize=4;
1632 set_status(r,DIRTY);
1633 }
1634 else
1635 raw_mov_l_rr(nr,rr); /* Make another copy */
1636 }
1637 unlock2(rr);
1638 }
1639
1640 static __inline__ void add_offset(int r, uae_u32 off)
1641 {
1642 live.state[r].val+=off;
1643 }
1644
1645 static __inline__ void remove_offset(int r, int spec)
1646 {
1647 reg_status oldstate;
1648 int rr;
1649
1650 if (isconst(r))
1651 return;
1652 if (live.state[r].val==0)
1653 return;
1654 if (isinreg(r) && live.state[r].validsize<4)
1655 evict(r);
1656
1657 if (!isinreg(r))
1658 alloc_reg_hinted(r,4,0,spec);
1659
1660 Dif (live.state[r].validsize!=4) {
1661 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1662 abort();
1663 }
1664 make_exclusive(r,0,-1);
1665 /* make_exclusive might have done the job already */
1666 if (live.state[r].val==0)
1667 return;
1668
1669 rr=live.state[r].realreg;
1670
1671 if (live.nat[rr].nholds==1) {
1672 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1673 // live.state[r].val,r,rr,target);
1674 adjust_nreg(rr,live.state[r].val);
1675 live.state[r].dirtysize=4;
1676 live.state[r].val=0;
1677 set_status(r,DIRTY);
1678 return;
1679 }
1680 write_log("Failed in remove_offset\n");
1681 abort();
1682 }
1683
1684 static __inline__ void remove_all_offsets(void)
1685 {
1686 int i;
1687
1688 for (i=0;i<VREGS;i++)
1689 remove_offset(i,-1);
1690 }
1691
1692 static inline void flush_reg_count(void)
1693 {
1694 #if RECORD_REGISTER_USAGE
1695 for (int r = 0; r < 16; r++)
1696 if (reg_count_local[r])
1697 ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1698 #endif
1699 }
1700
1701 static inline void record_register(int r)
1702 {
1703 #if RECORD_REGISTER_USAGE
1704 if (r < 16)
1705 reg_count_local[r]++;
1706 #endif
1707 }
1708
1709 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1710 {
1711 int n;
1712 int answer=-1;
1713
1714 record_register(r);
1715 if (live.state[r].status==UNDEF) {
1716 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1717 }
1718 if (!can_offset)
1719 remove_offset(r,spec);
1720
1721 if (isinreg(r) && live.state[r].validsize>=size) {
1722 n=live.state[r].realreg;
1723 switch(size) {
1724 case 1:
1725 if (live.nat[n].canbyte || spec>=0) {
1726 answer=n;
1727 }
1728 break;
1729 case 2:
1730 if (live.nat[n].canword || spec>=0) {
1731 answer=n;
1732 }
1733 break;
1734 case 4:
1735 answer=n;
1736 break;
1737 default: abort();
1738 }
1739 if (answer<0)
1740 evict(r);
1741 }
1742 /* either the value was in memory to start with, or it was evicted and
1743 is in memory now */
1744 if (answer<0) {
1745 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1746 }
1747
1748 if (spec>=0 && spec!=answer) {
1749 /* Too bad */
1750 mov_nregs(spec,answer);
1751 answer=spec;
1752 }
1753 live.nat[answer].locked++;
1754 live.nat[answer].touched=touchcnt++;
1755 return answer;
1756 }
1757
1758
1759
1760 static int readreg(int r, int size)
1761 {
1762 return readreg_general(r,size,-1,0);
1763 }
1764
1765 static int readreg_specific(int r, int size, int spec)
1766 {
1767 return readreg_general(r,size,spec,0);
1768 }
1769
1770 static int readreg_offset(int r, int size)
1771 {
1772 return readreg_general(r,size,-1,1);
1773 }
1774
1775 /* writereg_general(r, size, spec)
1776 *
1777 * INPUT
1778 * - r : mid-layer register
1779 * - size : requested size (1/2/4)
1780 * - spec : -1 if find or make a register free, otherwise specifies
1781 * the physical register to use in any case
1782 *
1783 * OUTPUT
1784 * - hard (physical, x86 here) register allocated to virtual register r
1785 */
1786 static __inline__ int writereg_general(int r, int size, int spec)
1787 {
1788 int n;
1789 int answer=-1;
1790
1791 record_register(r);
1792 if (size<4) {
1793 remove_offset(r,spec);
1794 }
1795
1796 make_exclusive(r,size,spec);
1797 if (isinreg(r)) {
1798 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1799 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1800 n=live.state[r].realreg;
1801
1802 Dif (live.nat[n].nholds!=1)
1803 abort();
1804 switch(size) {
1805 case 1:
1806 if (live.nat[n].canbyte || spec>=0) {
1807 live.state[r].dirtysize=ndsize;
1808 live.state[r].validsize=nvsize;
1809 answer=n;
1810 }
1811 break;
1812 case 2:
1813 if (live.nat[n].canword || spec>=0) {
1814 live.state[r].dirtysize=ndsize;
1815 live.state[r].validsize=nvsize;
1816 answer=n;
1817 }
1818 break;
1819 case 4:
1820 live.state[r].dirtysize=ndsize;
1821 live.state[r].validsize=nvsize;
1822 answer=n;
1823 break;
1824 default: abort();
1825 }
1826 if (answer<0)
1827 evict(r);
1828 }
1829 /* either the value was in memory to start with, or it was evicted and
1830 is in memory now */
1831 if (answer<0) {
1832 answer=alloc_reg_hinted(r,size,1,spec);
1833 }
1834 if (spec>=0 && spec!=answer) {
1835 mov_nregs(spec,answer);
1836 answer=spec;
1837 }
1838 if (live.state[r].status==UNDEF)
1839 live.state[r].validsize=4;
1840 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1841 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1842
1843 live.nat[answer].locked++;
1844 live.nat[answer].touched=touchcnt++;
1845 if (size==4) {
1846 live.state[r].val=0;
1847 }
1848 else {
1849 Dif (live.state[r].val) {
1850 write_log("Problem with val\n");
1851 abort();
1852 }
1853 }
1854 set_status(r,DIRTY);
1855 return answer;
1856 }
1857
1858 static int writereg(int r, int size)
1859 {
1860 return writereg_general(r,size,-1);
1861 }
1862
1863 static int writereg_specific(int r, int size, int spec)
1864 {
1865 return writereg_general(r,size,spec);
1866 }
1867
1868 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1869 {
1870 int n;
1871 int answer=-1;
1872
1873 record_register(r);
1874 if (live.state[r].status==UNDEF) {
1875 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1876 }
1877 remove_offset(r,spec);
1878 make_exclusive(r,0,spec);
1879
1880 Dif (wsize<rsize) {
1881 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1882 abort();
1883 }
1884 if (isinreg(r) && live.state[r].validsize>=rsize) {
1885 n=live.state[r].realreg;
1886 Dif (live.nat[n].nholds!=1)
1887 abort();
1888
1889 switch(rsize) {
1890 case 1:
1891 if (live.nat[n].canbyte || spec>=0) {
1892 answer=n;
1893 }
1894 break;
1895 case 2:
1896 if (live.nat[n].canword || spec>=0) {
1897 answer=n;
1898 }
1899 break;
1900 case 4:
1901 answer=n;
1902 break;
1903 default: abort();
1904 }
1905 if (answer<0)
1906 evict(r);
1907 }
1908 /* either the value was in memory to start with, or it was evicted and
1909 is in memory now */
1910 if (answer<0) {
1911 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1912 }
1913
1914 if (spec>=0 && spec!=answer) {
1915 /* Too bad */
1916 mov_nregs(spec,answer);
1917 answer=spec;
1918 }
1919 if (wsize>live.state[r].dirtysize)
1920 live.state[r].dirtysize=wsize;
1921 if (wsize>live.state[r].validsize)
1922 live.state[r].validsize=wsize;
1923 set_status(r,DIRTY);
1924
1925 live.nat[answer].locked++;
1926 live.nat[answer].touched=touchcnt++;
1927
1928 Dif (live.state[r].val) {
1929 write_log("Problem with val(rmw)\n");
1930 abort();
1931 }
1932 return answer;
1933 }
1934
1935 static int rmw(int r, int wsize, int rsize)
1936 {
1937 return rmw_general(r,wsize,rsize,-1);
1938 }
1939
1940 static int rmw_specific(int r, int wsize, int rsize, int spec)
1941 {
1942 return rmw_general(r,wsize,rsize,spec);
1943 }
1944
1945
1946 /* needed for restoring the carry flag on non-P6 cores */
1947 static void bt_l_ri_noclobber(R4 r, IMM i)
1948 {
1949 int size=4;
1950 if (i<16)
1951 size=2;
1952 r=readreg(r,size);
1953 raw_bt_l_ri(r,i);
1954 unlock2(r);
1955 }
1956
1957 /********************************************************************
1958 * FPU register status handling. EMIT TIME! *
1959 ********************************************************************/
1960
1961 static void f_tomem(int r)
1962 {
1963 if (live.fate[r].status==DIRTY) {
1964 #if USE_LONG_DOUBLE
1965 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1966 #else
1967 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1968 #endif
1969 live.fate[r].status=CLEAN;
1970 }
1971 }
1972
1973 static void f_tomem_drop(int r)
1974 {
1975 if (live.fate[r].status==DIRTY) {
1976 #if USE_LONG_DOUBLE
1977 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1978 #else
1979 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1980 #endif
1981 live.fate[r].status=INMEM;
1982 }
1983 }
1984
1985
1986 static __inline__ int f_isinreg(int r)
1987 {
1988 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1989 }
1990
1991 static void f_evict(int r)
1992 {
1993 int rr;
1994
1995 if (!f_isinreg(r))
1996 return;
1997 rr=live.fate[r].realreg;
1998 if (live.fat[rr].nholds==1)
1999 f_tomem_drop(r);
2000 else
2001 f_tomem(r);
2002
2003 Dif (live.fat[rr].locked &&
2004 live.fat[rr].nholds==1) {
2005 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2006 abort();
2007 }
2008
2009 live.fat[rr].nholds--;
2010 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2011 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2012 int thisind=live.fate[r].realind;
2013 live.fat[rr].holds[thisind]=topreg;
2014 live.fate[topreg].realind=thisind;
2015 }
2016 live.fate[r].status=INMEM;
2017 live.fate[r].realreg=-1;
2018 }
2019
2020 static __inline__ void f_free_nreg(int r)
2021 {
2022 int i=live.fat[r].nholds;
2023
2024 while (i) {
2025 int vr;
2026
2027 --i;
2028 vr=live.fat[r].holds[i];
2029 f_evict(vr);
2030 }
2031 Dif (live.fat[r].nholds!=0) {
2032 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2033 abort();
2034 }
2035 }
2036
2037
2038 /* Use with care! */
2039 static __inline__ void f_isclean(int r)
2040 {
2041 if (!f_isinreg(r))
2042 return;
2043 live.fate[r].status=CLEAN;
2044 }
2045
2046 static __inline__ void f_disassociate(int r)
2047 {
2048 f_isclean(r);
2049 f_evict(r);
2050 }
2051
2052
2053
2054 static int f_alloc_reg(int r, int willclobber)
2055 {
2056 int bestreg;
2057 uae_s32 when;
2058 int i;
2059 uae_s32 badness;
2060 bestreg=-1;
2061 when=2000000000;
2062 for (i=N_FREGS;i--;) {
2063 badness=live.fat[i].touched;
2064 if (live.fat[i].nholds==0)
2065 badness=0;
2066
2067 if (!live.fat[i].locked && badness<when) {
2068 bestreg=i;
2069 when=badness;
2070 if (live.fat[i].nholds==0)
2071 break;
2072 }
2073 }
2074 Dif (bestreg==-1)
2075 abort();
2076
2077 if (live.fat[bestreg].nholds>0) {
2078 f_free_nreg(bestreg);
2079 }
2080 if (f_isinreg(r)) {
2081 f_evict(r);
2082 }
2083
2084 if (!willclobber) {
2085 if (live.fate[r].status!=UNDEF) {
2086 #if USE_LONG_DOUBLE
2087 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2088 #else
2089 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2090 #endif
2091 }
2092 live.fate[r].status=CLEAN;
2093 }
2094 else {
2095 live.fate[r].status=DIRTY;
2096 }
2097 live.fate[r].realreg=bestreg;
2098 live.fate[r].realind=live.fat[bestreg].nholds;
2099 live.fat[bestreg].touched=touchcnt++;
2100 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2101 live.fat[bestreg].nholds++;
2102
2103 return bestreg;
2104 }
2105
2106 static void f_unlock(int r)
2107 {
2108 Dif (!live.fat[r].locked)
2109 abort();
2110 live.fat[r].locked--;
2111 }
2112
2113 static void f_setlock(int r)
2114 {
2115 live.fat[r].locked++;
2116 }
2117
2118 static __inline__ int f_readreg(int r)
2119 {
2120 int n;
2121 int answer=-1;
2122
2123 if (f_isinreg(r)) {
2124 n=live.fate[r].realreg;
2125 answer=n;
2126 }
2127 /* either the value was in memory to start with, or it was evicted and
2128 is in memory now */
2129 if (answer<0)
2130 answer=f_alloc_reg(r,0);
2131
2132 live.fat[answer].locked++;
2133 live.fat[answer].touched=touchcnt++;
2134 return answer;
2135 }
2136
2137 static __inline__ void f_make_exclusive(int r, int clobber)
2138 {
2139 freg_status oldstate;
2140 int rr=live.fate[r].realreg;
2141 int nr;
2142 int nind;
2143 int ndirt=0;
2144 int i;
2145
2146 if (!f_isinreg(r))
2147 return;
2148 if (live.fat[rr].nholds==1)
2149 return;
2150 for (i=0;i<live.fat[rr].nholds;i++) {
2151 int vr=live.fat[rr].holds[i];
2152 if (vr!=r && live.fate[vr].status==DIRTY)
2153 ndirt++;
2154 }
2155 if (!ndirt && !live.fat[rr].locked) {
2156 /* Everything else is clean, so let's keep this register */
2157 for (i=0;i<live.fat[rr].nholds;i++) {
2158 int vr=live.fat[rr].holds[i];
2159 if (vr!=r) {
2160 f_evict(vr);
2161 i--; /* Try that index again! */
2162 }
2163 }
2164 Dif (live.fat[rr].nholds!=1) {
2165 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2166 for (i=0;i<live.fat[rr].nholds;i++) {
2167 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2168 live.fate[live.fat[rr].holds[i]].realreg,
2169 live.fate[live.fat[rr].holds[i]].realind);
2170 }
2171 write_log("\n");
2172 abort();
2173 }
2174 return;
2175 }
2176
2177 /* We have to split the register */
2178 oldstate=live.fate[r];
2179
2180 f_setlock(rr); /* Make sure this doesn't go away */
2181 /* Forget about r being in the register rr */
2182 f_disassociate(r);
2183 /* Get a new register, that we will clobber completely */
2184 nr=f_alloc_reg(r,1);
2185 nind=live.fate[r].realind;
2186 if (!clobber)
2187 raw_fmov_rr(nr,rr); /* Make another copy */
2188 live.fate[r]=oldstate; /* Keep all the old state info */
2189 live.fate[r].realreg=nr;
2190 live.fate[r].realind=nind;
2191 f_unlock(rr);
2192 }
2193
2194
2195 static __inline__ int f_writereg(int r)
2196 {
2197 int n;
2198 int answer=-1;
2199
2200 f_make_exclusive(r,1);
2201 if (f_isinreg(r)) {
2202 n=live.fate[r].realreg;
2203 answer=n;
2204 }
2205 if (answer<0) {
2206 answer=f_alloc_reg(r,1);
2207 }
2208 live.fate[r].status=DIRTY;
2209 live.fat[answer].locked++;
2210 live.fat[answer].touched=touchcnt++;
2211 return answer;
2212 }
2213
2214 static int f_rmw(int r)
2215 {
2216 int n;
2217
2218 f_make_exclusive(r,0);
2219 if (f_isinreg(r)) {
2220 n=live.fate[r].realreg;
2221 }
2222 else
2223 n=f_alloc_reg(r,0);
2224 live.fate[r].status=DIRTY;
2225 live.fat[n].locked++;
2226 live.fat[n].touched=touchcnt++;
2227 return n;
2228 }
2229
2230 static void fflags_into_flags_internal(uae_u32 tmp)
2231 {
2232 int r;
2233
2234 clobber_flags();
2235 r=f_readreg(FP_RESULT);
2236 if (FFLAG_NREG_CLOBBER_CONDITION) {
2237 int tmp2=tmp;
2238 tmp=writereg_specific(tmp,4,FFLAG_NREG);
2239 raw_fflags_into_flags(r);
2240 unlock2(tmp);
2241 forget_about(tmp2);
2242 }
2243 else
2244 raw_fflags_into_flags(r);
2245 f_unlock(r);
2246 live_flags();
2247 }
2248
2249
2250
2251
2252 /********************************************************************
2253 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2254 ********************************************************************/
2255
2256 /*
2257 * RULES FOR HANDLING REGISTERS:
2258 *
2259 * * In the function headers, order the parameters
2260 * - 1st registers written to
2261 * - 2nd read/modify/write registers
2262 * - 3rd registers read from
2263 * * Before calling raw_*, you must call readreg, writereg or rmw for
2264 * each register
2265 * * The order for this is
2266 * - 1st call remove_offset for all registers written to with size<4
2267 * - 2nd call readreg for all registers read without offset
2268 * - 3rd call rmw for all rmw registers
2269 * - 4th call readreg_offset for all registers that can handle offsets
2270 * - 5th call get_offset for all the registers from the previous step
2271 * - 6th call writereg for all written-to registers
2272 * - 7th call raw_*
2273 * - 8th unlock2 all registers that were locked
2274 */
2275
2276 MIDFUNC(0,live_flags,(void))
2277 {
2278 live.flags_on_stack=TRASH;
2279 live.flags_in_flags=VALID;
2280 live.flags_are_important=1;
2281 }
2282 MENDFUNC(0,live_flags,(void))
2283
2284 MIDFUNC(0,dont_care_flags,(void))
2285 {
2286 live.flags_are_important=0;
2287 }
2288 MENDFUNC(0,dont_care_flags,(void))
2289
2290
2291 MIDFUNC(0,duplicate_carry,(void))
2292 {
2293 evict(FLAGX);
2294 make_flags_live_internal();
2295 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2296 log_vwrite(FLAGX);
2297 }
2298 MENDFUNC(0,duplicate_carry,(void))
2299
2300 MIDFUNC(0,restore_carry,(void))
2301 {
2302 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2303 bt_l_ri_noclobber(FLAGX,0);
2304 }
2305 else { /* Avoid the stall the above creates.
2306 This is slow on non-P6, though.
2307 */
2308 COMPCALL(rol_b_ri(FLAGX,8));
2309 isclean(FLAGX);
2310 }
2311 }
2312 MENDFUNC(0,restore_carry,(void))
2313
2314 MIDFUNC(0,start_needflags,(void))
2315 {
2316 needflags=1;
2317 }
2318 MENDFUNC(0,start_needflags,(void))
2319
2320 MIDFUNC(0,end_needflags,(void))
2321 {
2322 needflags=0;
2323 }
2324 MENDFUNC(0,end_needflags,(void))
2325
2326 MIDFUNC(0,make_flags_live,(void))
2327 {
2328 make_flags_live_internal();
2329 }
2330 MENDFUNC(0,make_flags_live,(void))
2331
2332 MIDFUNC(1,fflags_into_flags,(W2 tmp))
2333 {
2334 clobber_flags();
2335 fflags_into_flags_internal(tmp);
2336 }
2337 MENDFUNC(1,fflags_into_flags,(W2 tmp))
2338
2339
2340 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2341 {
2342 int size=4;
2343 if (i<16)
2344 size=2;
2345 CLOBBER_BT;
2346 r=readreg(r,size);
2347 raw_bt_l_ri(r,i);
2348 unlock2(r);
2349 }
2350 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2351
2352 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2353 {
2354 CLOBBER_BT;
2355 r=readreg(r,4);
2356 b=readreg(b,4);
2357 raw_bt_l_rr(r,b);
2358 unlock2(r);
2359 unlock2(b);
2360 }
2361 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2362
2363 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2364 {
2365 int size=4;
2366 if (i<16)
2367 size=2;
2368 CLOBBER_BT;
2369 r=rmw(r,size,size);
2370 raw_btc_l_ri(r,i);
2371 unlock2(r);
2372 }
2373 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2374
2375 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2376 {
2377 CLOBBER_BT;
2378 b=readreg(b,4);
2379 r=rmw(r,4,4);
2380 raw_btc_l_rr(r,b);
2381 unlock2(r);
2382 unlock2(b);
2383 }
2384 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2385
2386
2387 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2388 {
2389 int size=4;
2390 if (i<16)
2391 size=2;
2392 CLOBBER_BT;
2393 r=rmw(r,size,size);
2394 raw_btr_l_ri(r,i);
2395 unlock2(r);
2396 }
2397 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2398
2399 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2400 {
2401 CLOBBER_BT;
2402 b=readreg(b,4);
2403 r=rmw(r,4,4);
2404 raw_btr_l_rr(r,b);
2405 unlock2(r);
2406 unlock2(b);
2407 }
2408 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2409
2410
2411 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2412 {
2413 int size=4;
2414 if (i<16)
2415 size=2;
2416 CLOBBER_BT;
2417 r=rmw(r,size,size);
2418 raw_bts_l_ri(r,i);
2419 unlock2(r);
2420 }
2421 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2422
2423 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2424 {
2425 CLOBBER_BT;
2426 b=readreg(b,4);
2427 r=rmw(r,4,4);
2428 raw_bts_l_rr(r,b);
2429 unlock2(r);
2430 unlock2(b);
2431 }
2432 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2433
2434 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2435 {
2436 CLOBBER_MOV;
2437 d=writereg(d,4);
2438 raw_mov_l_rm(d,s);
2439 unlock2(d);
2440 }
2441 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2442
2443
2444 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2445 {
2446 r=readreg(r,4);
2447 raw_call_r(r);
2448 unlock2(r);
2449 }
2450 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2451
2452 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2453 {
2454 CLOBBER_SUB;
2455 raw_sub_l_mi(d,s) ;
2456 }
2457 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2458
2459 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2460 {
2461 CLOBBER_MOV;
2462 raw_mov_l_mi(d,s) ;
2463 }
2464 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2465
2466 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2467 {
2468 CLOBBER_MOV;
2469 raw_mov_w_mi(d,s) ;
2470 }
2471 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2472
2473 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2474 {
2475 CLOBBER_MOV;
2476 raw_mov_b_mi(d,s) ;
2477 }
2478 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2479
2480 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2481 {
2482 if (!i && !needflags)
2483 return;
2484 CLOBBER_ROL;
2485 r=rmw(r,1,1);
2486 raw_rol_b_ri(r,i);
2487 unlock2(r);
2488 }
2489 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2490
2491 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2492 {
2493 if (!i && !needflags)
2494 return;
2495 CLOBBER_ROL;
2496 r=rmw(r,2,2);
2497 raw_rol_w_ri(r,i);
2498 unlock2(r);
2499 }
2500 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2501
2502 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2503 {
2504 if (!i && !needflags)
2505 return;
2506 CLOBBER_ROL;
2507 r=rmw(r,4,4);
2508 raw_rol_l_ri(r,i);
2509 unlock2(r);
2510 }
2511 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2512
2513 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2514 {
2515 if (isconst(r)) {
2516 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2517 return;
2518 }
2519 CLOBBER_ROL;
2520 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2521 d=rmw(d,4,4);
2522 Dif (r!=1) {
2523 write_log("Illegal register %d in raw_rol_b\n",r);
2524 abort();
2525 }
2526 raw_rol_l_rr(d,r) ;
2527 unlock2(r);
2528 unlock2(d);
2529 }
2530 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2531
2532 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2533 { /* Can only do this with r==1, i.e. cl */
2534
2535 if (isconst(r)) {
2536 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2537 return;
2538 }
2539 CLOBBER_ROL;
2540 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2541 d=rmw(d,2,2);
2542 Dif (r!=1) {
2543 write_log("Illegal register %d in raw_rol_b\n",r);
2544 abort();
2545 }
2546 raw_rol_w_rr(d,r) ;
2547 unlock2(r);
2548 unlock2(d);
2549 }
2550 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2551
2552 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2553 { /* Can only do this with r==1, i.e. cl */
2554
2555 if (isconst(r)) {
2556 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2557 return;
2558 }
2559
2560 CLOBBER_ROL;
2561 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2562 d=rmw(d,1,1);
2563 Dif (r!=1) {
2564 write_log("Illegal register %d in raw_rol_b\n",r);
2565 abort();
2566 }
2567 raw_rol_b_rr(d,r) ;
2568 unlock2(r);
2569 unlock2(d);
2570 }
2571 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2572
2573
2574 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2575 {
2576 if (isconst(r)) {
2577 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2578 return;
2579 }
2580 CLOBBER_SHLL;
2581 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2582 d=rmw(d,4,4);
2583 Dif (r!=1) {
2584 write_log("Illegal register %d in raw_rol_b\n",r);
2585 abort();
2586 }
2587 raw_shll_l_rr(d,r) ;
2588 unlock2(r);
2589 unlock2(d);
2590 }
2591 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2592
2593 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2594 { /* Can only do this with r==1, i.e. cl */
2595
2596 if (isconst(r)) {
2597 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2598 return;
2599 }
2600 CLOBBER_SHLL;
2601 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2602 d=rmw(d,2,2);
2603 Dif (r!=1) {
2604 write_log("Illegal register %d in raw_shll_b\n",r);
2605 abort();
2606 }
2607 raw_shll_w_rr(d,r) ;
2608 unlock2(r);
2609 unlock2(d);
2610 }
2611 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2612
2613 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2614 { /* Can only do this with r==1, i.e. cl */
2615
2616 if (isconst(r)) {
2617 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2618 return;
2619 }
2620
2621 CLOBBER_SHLL;
2622 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2623 d=rmw(d,1,1);
2624 Dif (r!=1) {
2625 write_log("Illegal register %d in raw_shll_b\n",r);
2626 abort();
2627 }
2628 raw_shll_b_rr(d,r) ;
2629 unlock2(r);
2630 unlock2(d);
2631 }
2632 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2633
2634
2635 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2636 {
2637 if (!i && !needflags)
2638 return;
2639 CLOBBER_ROR;
2640 r=rmw(r,1,1);
2641 raw_ror_b_ri(r,i);
2642 unlock2(r);
2643 }
2644 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2645
2646 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2647 {
2648 if (!i && !needflags)
2649 return;
2650 CLOBBER_ROR;
2651 r=rmw(r,2,2);
2652 raw_ror_w_ri(r,i);
2653 unlock2(r);
2654 }
2655 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2656
2657 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2658 {
2659 if (!i && !needflags)
2660 return;
2661 CLOBBER_ROR;
2662 r=rmw(r,4,4);
2663 raw_ror_l_ri(r,i);
2664 unlock2(r);
2665 }
2666 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2667
2668 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2669 {
2670 if (isconst(r)) {
2671 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2672 return;
2673 }
2674 CLOBBER_ROR;
2675 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2676 d=rmw(d,4,4);
2677 raw_ror_l_rr(d,r) ;
2678 unlock2(r);
2679 unlock2(d);
2680 }
2681 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2682
2683 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2684 {
2685 if (isconst(r)) {
2686 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2687 return;
2688 }
2689 CLOBBER_ROR;
2690 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2691 d=rmw(d,2,2);
2692 raw_ror_w_rr(d,r) ;
2693 unlock2(r);
2694 unlock2(d);
2695 }
2696 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2697
2698 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2699 {
2700 if (isconst(r)) {
2701 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2702 return;
2703 }
2704
2705 CLOBBER_ROR;
2706 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2707 d=rmw(d,1,1);
2708 raw_ror_b_rr(d,r) ;
2709 unlock2(r);
2710 unlock2(d);
2711 }
2712 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2713
2714 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2715 {
2716 if (isconst(r)) {
2717 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2718 return;
2719 }
2720 CLOBBER_SHRL;
2721 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2722 d=rmw(d,4,4);
2723 Dif (r!=1) {
2724 write_log("Illegal register %d in raw_rol_b\n",r);
2725 abort();
2726 }
2727 raw_shrl_l_rr(d,r) ;
2728 unlock2(r);
2729 unlock2(d);
2730 }
2731 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2732
2733 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2734 { /* Can only do this with r==1, i.e. cl */
2735
2736 if (isconst(r)) {
2737 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2738 return;
2739 }
2740 CLOBBER_SHRL;
2741 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2742 d=rmw(d,2,2);
2743 Dif (r!=1) {
2744 write_log("Illegal register %d in raw_shrl_b\n",r);
2745 abort();
2746 }
2747 raw_shrl_w_rr(d,r) ;
2748 unlock2(r);
2749 unlock2(d);
2750 }
2751 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2752
2753 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2754 { /* Can only do this with r==1, i.e. cl */
2755
2756 if (isconst(r)) {
2757 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2758 return;
2759 }
2760
2761 CLOBBER_SHRL;
2762 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2763 d=rmw(d,1,1);
2764 Dif (r!=1) {
2765 write_log("Illegal register %d in raw_shrl_b\n",r);
2766 abort();
2767 }
2768 raw_shrl_b_rr(d,r) ;
2769 unlock2(r);
2770 unlock2(d);
2771 }
2772 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2773
2774
2775
2776 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2777 {
2778 if (!i && !needflags)
2779 return;
2780 if (isconst(r) && !needflags) {
2781 live.state[r].val<<=i;
2782 return;
2783 }
2784 CLOBBER_SHLL;
2785 r=rmw(r,4,4);
2786 raw_shll_l_ri(r,i);
2787 unlock2(r);
2788 }
2789 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2790
2791 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2792 {
2793 if (!i && !needflags)
2794 return;
2795 CLOBBER_SHLL;
2796 r=rmw(r,2,2);
2797 raw_shll_w_ri(r,i);
2798 unlock2(r);
2799 }
2800 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2801
2802 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2803 {
2804 if (!i && !needflags)
2805 return;
2806 CLOBBER_SHLL;
2807 r=rmw(r,1,1);
2808 raw_shll_b_ri(r,i);
2809 unlock2(r);
2810 }
2811 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2812
2813 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2814 {
2815 if (!i && !needflags)
2816 return;
2817 if (isconst(r) && !needflags) {
2818 live.state[r].val>>=i;
2819 return;
2820 }
2821 CLOBBER_SHRL;
2822 r=rmw(r,4,4);
2823 raw_shrl_l_ri(r,i);
2824 unlock2(r);
2825 }
2826 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2827
2828 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2829 {
2830 if (!i && !needflags)
2831 return;
2832 CLOBBER_SHRL;
2833 r=rmw(r,2,2);
2834 raw_shrl_w_ri(r,i);
2835 unlock2(r);
2836 }
2837 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2838
2839 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2840 {
2841 if (!i && !needflags)
2842 return;
2843 CLOBBER_SHRL;
2844 r=rmw(r,1,1);
2845 raw_shrl_b_ri(r,i);
2846 unlock2(r);
2847 }
2848 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2849
2850 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2851 {
2852 if (!i && !needflags)
2853 return;
2854 CLOBBER_SHRA;
2855 r=rmw(r,4,4);
2856 raw_shra_l_ri(r,i);
2857 unlock2(r);
2858 }
2859 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2860
2861 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2862 {
2863 if (!i && !needflags)
2864 return;
2865 CLOBBER_SHRA;
2866 r=rmw(r,2,2);
2867 raw_shra_w_ri(r,i);
2868 unlock2(r);
2869 }
2870 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2871
2872 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2873 {
2874 if (!i && !needflags)
2875 return;
2876 CLOBBER_SHRA;
2877 r=rmw(r,1,1);
2878 raw_shra_b_ri(r,i);
2879 unlock2(r);
2880 }
2881 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2882
2883 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2884 {
2885 if (isconst(r)) {
2886 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2887 return;
2888 }
2889 CLOBBER_SHRA;
2890 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2891 d=rmw(d,4,4);
2892 Dif (r!=1) {
2893 write_log("Illegal register %d in raw_rol_b\n",r);
2894 abort();
2895 }
2896 raw_shra_l_rr(d,r) ;
2897 unlock2(r);
2898 unlock2(d);
2899 }
2900 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2901
2902 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2903 { /* Can only do this with r==1, i.e. cl */
2904
2905 if (isconst(r)) {
2906 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2907 return;
2908 }
2909 CLOBBER_SHRA;
2910 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2911 d=rmw(d,2,2);
2912 Dif (r!=1) {
2913 write_log("Illegal register %d in raw_shra_b\n",r);
2914 abort();
2915 }
2916 raw_shra_w_rr(d,r) ;
2917 unlock2(r);
2918 unlock2(d);
2919 }
2920 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2921
2922 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2923 { /* Can only do this with r==1, i.e. cl */
2924
2925 if (isconst(r)) {
2926 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2927 return;
2928 }
2929
2930 CLOBBER_SHRA;
2931 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2932 d=rmw(d,1,1);
2933 Dif (r!=1) {
2934 write_log("Illegal register %d in raw_shra_b\n",r);
2935 abort();
2936 }
2937 raw_shra_b_rr(d,r) ;
2938 unlock2(r);
2939 unlock2(d);
2940 }
2941 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2942
2943
2944 MIDFUNC(2,setcc,(W1 d, IMM cc))
2945 {
2946 CLOBBER_SETCC;
2947 d=writereg(d,1);
2948 raw_setcc(d,cc);
2949 unlock2(d);
2950 }
2951 MENDFUNC(2,setcc,(W1 d, IMM cc))
2952
2953 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2954 {
2955 CLOBBER_SETCC;
2956 raw_setcc_m(d,cc);
2957 }
2958 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2959
2960 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2961 {
2962 if (d==s)
2963 return;
2964 CLOBBER_CMOV;
2965 s=readreg(s,4);
2966 d=rmw(d,4,4);
2967 raw_cmov_l_rr(d,s,cc);
2968 unlock2(s);
2969 unlock2(d);
2970 }
2971 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2972
2973 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2974 {
2975 CLOBBER_CMOV;
2976 d=rmw(d,4,4);
2977 raw_cmov_l_rm(d,s,cc);
2978 unlock2(d);
2979 }
2980 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2981
2982 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2983 {
2984 CLOBBER_BSF;
2985 s = readreg(s, 4);
2986 d = writereg(d, 4);
2987 raw_bsf_l_rr(d, s);
2988 unlock2(s);
2989 unlock2(d);
2990 }
2991 MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2992
2993 /* Set the Z flag depending on the value in s. Note that the
2994 value has to be 0 or -1 (or, more precisely, for non-zero
2995 values, bit 14 must be set)! */
2996 MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2997 {
2998 CLOBBER_BSF;
2999 s=rmw_specific(s,4,4,FLAG_NREG3);
3000 tmp=writereg(tmp,4);
3001 raw_flags_set_zero(s, tmp);
3002 unlock2(tmp);
3003 unlock2(s);
3004 }
3005 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3006
3007 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3008 {
3009 CLOBBER_MUL;
3010 s=readreg(s,4);
3011 d=rmw(d,4,4);
3012 raw_imul_32_32(d,s);
3013 unlock2(s);
3014 unlock2(d);
3015 }
3016 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3017
3018 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3019 {
3020 CLOBBER_MUL;
3021 s=rmw_specific(s,4,4,MUL_NREG2);
3022 d=rmw_specific(d,4,4,MUL_NREG1);
3023 raw_imul_64_32(d,s);
3024 unlock2(s);
3025 unlock2(d);
3026 }
3027 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3028
3029 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3030 {
3031 CLOBBER_MUL;
3032 s=rmw_specific(s,4,4,MUL_NREG2);
3033 d=rmw_specific(d,4,4,MUL_NREG1);
3034 raw_mul_64_32(d,s);
3035 unlock2(s);
3036 unlock2(d);
3037 }
3038 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3039
3040 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3041 {
3042 CLOBBER_MUL;
3043 s=readreg(s,4);
3044 d=rmw(d,4,4);
3045 raw_mul_32_32(d,s);
3046 unlock2(s);
3047 unlock2(d);
3048 }
3049 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3050
3051 #if SIZEOF_VOID_P == 8
3052 MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3053 {
3054 int isrmw;
3055
3056 if (isconst(s)) {
3057 set_const(d,(uae_s32)live.state[s].val);
3058 return;
3059 }
3060
3061 CLOBBER_SE32;
3062 isrmw=(s==d);
3063 if (!isrmw) {
3064 s=readreg(s,4);
3065 d=writereg(d,4);
3066 }
3067 else { /* If we try to lock this twice, with different sizes, we
3068 are int trouble! */
3069 s=d=rmw(s,4,4);
3070 }
3071 raw_sign_extend_32_rr(d,s);
3072 if (!isrmw) {
3073 unlock2(d);
3074 unlock2(s);
3075 }
3076 else {
3077 unlock2(s);
3078 }
3079 }
3080 MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3081 #endif
3082
3083 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3084 {
3085 int isrmw;
3086
3087 if (isconst(s)) {
3088 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3089 return;
3090 }
3091
3092 CLOBBER_SE16;
3093 isrmw=(s==d);
3094 if (!isrmw) {
3095 s=readreg(s,2);
3096 d=writereg(d,4);
3097 }
3098 else { /* If we try to lock this twice, with different sizes, we
3099 are int trouble! */
3100 s=d=rmw(s,4,2);
3101 }
3102 raw_sign_extend_16_rr(d,s);
3103 if (!isrmw) {
3104 unlock2(d);
3105 unlock2(s);
3106 }
3107 else {
3108 unlock2(s);
3109 }
3110 }
3111 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3112
3113 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3114 {
3115 int isrmw;
3116
3117 if (isconst(s)) {
3118 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3119 return;
3120 }
3121
3122 isrmw=(s==d);
3123 CLOBBER_SE8;
3124 if (!isrmw) {
3125 s=readreg(s,1);
3126 d=writereg(d,4);
3127 }
3128 else { /* If we try to lock this twice, with different sizes, we
3129 are int trouble! */
3130 s=d=rmw(s,4,1);
3131 }
3132
3133 raw_sign_extend_8_rr(d,s);
3134
3135 if (!isrmw) {
3136 unlock2(d);
3137 unlock2(s);
3138 }
3139 else {
3140 unlock2(s);
3141 }
3142 }
3143 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3144
3145
3146 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3147 {
3148 int isrmw;
3149
3150 if (isconst(s)) {
3151 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3152 return;
3153 }
3154
3155 isrmw=(s==d);
3156 CLOBBER_ZE16;
3157 if (!isrmw) {
3158 s=readreg(s,2);
3159 d=writereg(d,4);
3160 }
3161 else { /* If we try to lock this twice, with different sizes, we
3162 are int trouble! */
3163 s=d=rmw(s,4,2);
3164 }
3165 raw_zero_extend_16_rr(d,s);
3166 if (!isrmw) {
3167 unlock2(d);
3168 unlock2(s);
3169 }
3170 else {
3171 unlock2(s);
3172 }
3173 }
3174 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3175
3176 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3177 {
3178 int isrmw;
3179 if (isconst(s)) {
3180 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3181 return;
3182 }
3183
3184 isrmw=(s==d);
3185 CLOBBER_ZE8;
3186 if (!isrmw) {
3187 s=readreg(s,1);
3188 d=writereg(d,4);
3189 }
3190 else { /* If we try to lock this twice, with different sizes, we
3191 are int trouble! */
3192 s=d=rmw(s,4,1);
3193 }
3194
3195 raw_zero_extend_8_rr(d,s);
3196
3197 if (!isrmw) {
3198 unlock2(d);
3199 unlock2(s);
3200 }
3201 else {
3202 unlock2(s);
3203 }
3204 }
3205 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3206
3207 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3208 {
3209 if (d==s)
3210 return;
3211 if (isconst(s)) {
3212 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3213 return;
3214 }
3215
3216 CLOBBER_MOV;
3217 s=readreg(s,1);
3218 d=writereg(d,1);
3219 raw_mov_b_rr(d,s);
3220 unlock2(d);
3221 unlock2(s);
3222 }
3223 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3224
3225 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3226 {
3227 if (d==s)
3228 return;
3229 if (isconst(s)) {
3230 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3231 return;
3232 }
3233
3234 CLOBBER_MOV;
3235 s=readreg(s,2);
3236 d=writereg(d,2);
3237 raw_mov_w_rr(d,s);
3238 unlock2(d);
3239 unlock2(s);
3240 }
3241 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3242
3243
3244 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3245 {
3246 CLOBBER_MOV;
3247 baser=readreg(baser,4);
3248 index=readreg(index,4);
3249 d=writereg(d,4);
3250
3251 raw_mov_l_rrm_indexed(d,baser,index,factor);
3252 unlock2(d);
3253 unlock2(baser);
3254 unlock2(index);
3255 }
3256 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3257
3258 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3259 {
3260 CLOBBER_MOV;
3261 baser=readreg(baser,4);
3262 index=readreg(index,4);
3263 d=writereg(d,2);
3264
3265 raw_mov_w_rrm_indexed(d,baser,index,factor);
3266 unlock2(d);
3267 unlock2(baser);
3268 unlock2(index);
3269 }
3270 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3271
3272 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3273 {
3274 CLOBBER_MOV;
3275 baser=readreg(baser,4);
3276 index=readreg(index,4);
3277 d=writereg(d,1);
3278
3279 raw_mov_b_rrm_indexed(d,baser,index,factor);
3280
3281 unlock2(d);
3282 unlock2(baser);
3283 unlock2(index);
3284 }
3285 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3286
3287
3288 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3289 {
3290 CLOBBER_MOV;
3291 baser=readreg(baser,4);
3292 index=readreg(index,4);
3293 s=readreg(s,4);
3294
3295 Dif (baser==s || index==s)
3296 abort();
3297
3298
3299 raw_mov_l_mrr_indexed(baser,index,factor,s);
3300 unlock2(s);
3301 unlock2(baser);
3302 unlock2(index);
3303 }
3304 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3305
3306 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3307 {
3308 CLOBBER_MOV;
3309 baser=readreg(baser,4);
3310 index=readreg(index,4);
3311 s=readreg(s,2);
3312
3313 raw_mov_w_mrr_indexed(baser,index,factor,s);
3314 unlock2(s);
3315 unlock2(baser);
3316 unlock2(index);
3317 }
3318 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3319
3320 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3321 {
3322 CLOBBER_MOV;
3323 s=readreg(s,1);
3324 baser=readreg(baser,4);
3325 index=readreg(index,4);
3326
3327 raw_mov_b_mrr_indexed(baser,index,factor,s);
3328 unlock2(s);
3329 unlock2(baser);
3330 unlock2(index);
3331 }
3332 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3333
3334
3335 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3336 {
3337 int basereg=baser;
3338 int indexreg=index;
3339
3340 CLOBBER_MOV;
3341 s=readreg(s,4);
3342 baser=readreg_offset(baser,4);
3343 index=readreg_offset(index,4);
3344
3345 base+=get_offset(basereg);
3346 base+=factor*get_offset(indexreg);
3347
3348 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3349 unlock2(s);
3350 unlock2(baser);
3351 unlock2(index);
3352 }
3353 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3354
3355 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3356 {
3357 int basereg=baser;
3358 int indexreg=index;
3359
3360 CLOBBER_MOV;
3361 s=readreg(s,2);
3362 baser=readreg_offset(baser,4);
3363 index=readreg_offset(index,4);
3364
3365 base+=get_offset(basereg);
3366 base+=factor*get_offset(indexreg);
3367
3368 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3369 unlock2(s);
3370 unlock2(baser);
3371 unlock2(index);
3372 }
3373 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3374
3375 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3376 {
3377 int basereg=baser;
3378 int indexreg=index;
3379
3380 CLOBBER_MOV;
3381 s=readreg(s,1);
3382 baser=readreg_offset(baser,4);
3383 index=readreg_offset(index,4);
3384
3385 base+=get_offset(basereg);
3386 base+=factor*get_offset(indexreg);
3387
3388 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3389 unlock2(s);
3390 unlock2(baser);
3391 unlock2(index);
3392 }
3393 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3394
3395
3396
3397 /* Read a long from base+baser+factor*index */
3398 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3399 {
3400 int basereg=baser;
3401 int indexreg=index;
3402
3403 CLOBBER_MOV;
3404 baser=readreg_offset(baser,4);
3405 index=readreg_offset(index,4);
3406 base+=get_offset(basereg);
3407 base+=factor*get_offset(indexreg);
3408 d=writereg(d,4);
3409 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3410 unlock2(d);
3411 unlock2(baser);
3412 unlock2(index);
3413 }
3414 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3415
3416
3417 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3418 {
3419 int basereg=baser;
3420 int indexreg=index;
3421
3422 CLOBBER_MOV;
3423 remove_offset(d,-1);
3424 baser=readreg_offset(baser,4);
3425 index=readreg_offset(index,4);
3426 base+=get_offset(basereg);
3427 base+=factor*get_offset(indexreg);
3428 d=writereg(d,2);
3429 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3430 unlock2(d);
3431 unlock2(baser);
3432 unlock2(index);
3433 }
3434 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3435
3436
3437 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3438 {
3439 int basereg=baser;
3440 int indexreg=index;
3441
3442 CLOBBER_MOV;
3443 remove_offset(d,-1);
3444 baser=readreg_offset(baser,4);
3445 index=readreg_offset(index,4);
3446 base+=get_offset(basereg);
3447 base+=factor*get_offset(indexreg);
3448 d=writereg(d,1);
3449 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3450 unlock2(d);
3451 unlock2(baser);
3452 unlock2(index);
3453 }
3454 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3455
3456 /* Read a long from base+factor*index */
3457 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3458 {
3459 int indexreg=index;
3460
3461 if (isconst(index)) {
3462 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3463 return;
3464 }
3465
3466 CLOBBER_MOV;
3467 index=readreg_offset(index,4);
3468 base+=get_offset(indexreg)*factor;
3469 d=writereg(d,4);
3470
3471 raw_mov_l_rm_indexed(d,base,index,factor);
3472 unlock2(index);
3473 unlock2(d);
3474 }
3475 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3476
3477
3478 /* read the long at the address contained in s+offset and store in d */
3479 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3480 {
3481 if (isconst(s)) {
3482 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3483 return;
3484 }
3485 CLOBBER_MOV;
3486 s=readreg(s,4);
3487 d=writereg(d,4);
3488
3489 raw_mov_l_rR(d,s,offset);
3490 unlock2(d);
3491 unlock2(s);
3492 }
3493 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3494
3495 /* read the word at the address contained in s+offset and store in d */
3496 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3497 {
3498 if (isconst(s)) {
3499 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3500 return;
3501 }
3502 CLOBBER_MOV;
3503 s=readreg(s,4);
3504 d=writereg(d,2);
3505
3506 raw_mov_w_rR(d,s,offset);
3507 unlock2(d);
3508 unlock2(s);
3509 }
3510 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3511
3512 /* read the word at the address contained in s+offset and store in d */
3513 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3514 {
3515 if (isconst(s)) {
3516 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3517 return;
3518 }
3519 CLOBBER_MOV;
3520 s=readreg(s,4);
3521 d=writereg(d,1);
3522
3523 raw_mov_b_rR(d,s,offset);
3524 unlock2(d);
3525 unlock2(s);
3526 }
3527 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3528
3529 /* read the long at the address contained in s+offset and store in d */
3530 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3531 {
3532 int sreg=s;
3533 if (isconst(s)) {
3534 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3535 return;
3536 }
3537 CLOBBER_MOV;
3538 s=readreg_offset(s,4);
3539 offset+=get_offset(sreg);
3540 d=writereg(d,4);
3541
3542 raw_mov_l_brR(d,s,offset);
3543 unlock2(d);
3544 unlock2(s);
3545 }
3546 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3547
3548 /* read the word at the address contained in s+offset and store in d */
3549 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3550 {
3551 int sreg=s;
3552 if (isconst(s)) {
3553 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3554 return;
3555 }
3556 CLOBBER_MOV;
3557 remove_offset(d,-1);
3558 s=readreg_offset(s,4);
3559 offset+=get_offset(sreg);
3560 d=writereg(d,2);
3561
3562 raw_mov_w_brR(d,s,offset);
3563 unlock2(d);
3564 unlock2(s);
3565 }
3566 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3567
3568 /* read the word at the address contained in s+offset and store in d */
3569 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3570 {
3571 int sreg=s;
3572 if (isconst(s)) {
3573 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3574 return;
3575 }
3576 CLOBBER_MOV;
3577 remove_offset(d,-1);
3578 s=readreg_offset(s,4);
3579 offset+=get_offset(sreg);
3580 d=writereg(d,1);
3581
3582 raw_mov_b_brR(d,s,offset);
3583 unlock2(d);
3584 unlock2(s);
3585 }
3586 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3587
3588 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3589 {
3590 int dreg=d;
3591 if (isconst(d)) {
3592 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3593 return;
3594 }
3595
3596 CLOBBER_MOV;
3597 d=readreg_offset(d,4);
3598 offset+=get_offset(dreg);
3599 raw_mov_l_Ri(d,i,offset);
3600 unlock2(d);
3601 }
3602 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3603
3604 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3605 {
3606 int dreg=d;
3607 if (isconst(d)) {
3608 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3609 return;
3610 }
3611
3612 CLOBBER_MOV;
3613 d=readreg_offset(d,4);
3614 offset+=get_offset(dreg);
3615 raw_mov_w_Ri(d,i,offset);
3616 unlock2(d);
3617 }
3618 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3619
3620 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3621 {
3622 int dreg=d;
3623 if (isconst(d)) {
3624 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3625 return;
3626 }
3627
3628 CLOBBER_MOV;
3629 d=readreg_offset(d,4);
3630 offset+=get_offset(dreg);
3631 raw_mov_b_Ri(d,i,offset);
3632 unlock2(d);
3633 }
3634 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3635
3636 /* Warning! OFFSET is byte sized only! */
3637 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3638 {
3639 if (isconst(d)) {
3640 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3641 return;
3642 }
3643 if (isconst(s)) {
3644 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3645 return;
3646 }
3647
3648 CLOBBER_MOV;
3649 s=readreg(s,4);
3650 d=readreg(d,4);
3651
3652 raw_mov_l_Rr(d,s,offset);
3653 unlock2(d);
3654 unlock2(s);
3655 }
3656 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3657
3658 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3659 {
3660 if (isconst(d)) {
3661 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3662 return;
3663 }
3664 if (isconst(s)) {
3665 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3666 return;
3667 }
3668
3669 CLOBBER_MOV;
3670 s=readreg(s,2);
3671 d=readreg(d,4);
3672 raw_mov_w_Rr(d,s,offset);
3673 unlock2(d);
3674 unlock2(s);
3675 }
3676 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3677
3678 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3679 {
3680 if (isconst(d)) {
3681 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3682 return;
3683 }
3684 if (isconst(s)) {
3685 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3686 return;
3687 }
3688
3689 CLOBBER_MOV;
3690 s=readreg(s,1);
3691 d=readreg(d,4);
3692 raw_mov_b_Rr(d,s,offset);
3693 unlock2(d);
3694 unlock2(s);
3695 }
3696 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3697
3698 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3699 {
3700 if (isconst(s)) {
3701 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3702 return;
3703 }
3704 #if USE_OFFSET
3705 if (d==s) {
3706 add_offset(d,offset);
3707 return;
3708 }
3709 #endif
3710 CLOBBER_LEA;
3711 s=readreg(s,4);
3712 d=writereg(d,4);
3713 raw_lea_l_brr(d,s,offset);
3714 unlock2(d);
3715 unlock2(s);
3716 }
3717 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3718
3719 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3720 {
3721 if (!offset) {
3722 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3723 return;
3724 }
3725 CLOBBER_LEA;
3726 s=readreg(s,4);
3727 index=readreg(index,4);
3728 d=writereg(d,4);
3729
3730 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3731 unlock2(d);
3732 unlock2(index);
3733 unlock2(s);
3734 }
3735 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3736
3737 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3738 {
3739 CLOBBER_LEA;
3740 s=readreg(s,4);
3741 index=readreg(index,4);
3742 d=writereg(d,4);
3743
3744 raw_lea_l_rr_indexed(d,s,index,factor);
3745 unlock2(d);
3746 unlock2(index);
3747 unlock2(s);
3748 }
3749 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3750
3751 /* write d to the long at the address contained in s+offset */
3752 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3753 {
3754 int dreg=d;
3755 if (isconst(d)) {
3756 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3757 return;
3758 }
3759
3760 CLOBBER_MOV;
3761 s=readreg(s,4);
3762 d=readreg_offset(d,4);
3763 offset+=get_offset(dreg);
3764
3765 raw_mov_l_bRr(d,s,offset);
3766 unlock2(d);
3767 unlock2(s);
3768 }
3769 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3770
3771 /* write the word at the address contained in s+offset and store in d */
3772 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3773 {
3774 int dreg=d;
3775
3776 if (isconst(d)) {
3777 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3778 return;
3779 }
3780
3781 CLOBBER_MOV;
3782 s=readreg(s,2);
3783 d=readreg_offset(d,4);
3784 offset+=get_offset(dreg);
3785 raw_mov_w_bRr(d,s,offset);
3786 unlock2(d);
3787 unlock2(s);
3788 }
3789 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3790
3791 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3792 {
3793 int dreg=d;
3794 if (isconst(d)) {
3795 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3796 return;
3797 }
3798
3799 CLOBBER_MOV;
3800 s=readreg(s,1);
3801 d=readreg_offset(d,4);
3802 offset+=get_offset(dreg);
3803 raw_mov_b_bRr(d,s,offset);
3804 unlock2(d);
3805 unlock2(s);
3806 }
3807 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3808
3809 MIDFUNC(1,bswap_32,(RW4 r))
3810 {
3811 int reg=r;
3812
3813 if (isconst(r)) {
3814 uae_u32 oldv=live.state[r].val;
3815 live.state[r].val=reverse32(oldv);
3816 return;
3817 }
3818
3819 CLOBBER_SW32;
3820 r=rmw(r,4,4);
3821 raw_bswap_32(r);
3822 unlock2(r);
3823 }
3824 MENDFUNC(1,bswap_32,(RW4 r))
3825
3826 MIDFUNC(1,bswap_16,(RW2 r))
3827 {
3828 if (isconst(r)) {
3829 uae_u32 oldv=live.state[r].val;
3830 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3831 (oldv&0xffff0000);
3832 return;
3833 }
3834
3835 CLOBBER_SW16;
3836 r=rmw(r,2,2);
3837
3838 raw_bswap_16(r);
3839 unlock2(r);
3840 }
3841 MENDFUNC(1,bswap_16,(RW2 r))
3842
3843
3844
3845 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3846 {
3847 int olds;
3848
3849 if (d==s) { /* How pointless! */
3850 return;
3851 }
3852 if (isconst(s)) {
3853 COMPCALL(mov_l_ri)(d,live.state[s].val);
3854 return;
3855 }
3856 olds=s;
3857 disassociate(d);
3858 s=readreg_offset(s,4);
3859 live.state[d].realreg=s;
3860 live.state[d].realind=live.nat[s].nholds;
3861 live.state[d].val=live.state[olds].val;
3862 live.state[d].validsize=4;
3863 live.state[d].dirtysize=4;
3864 set_status(d,DIRTY);
3865
3866 live.nat[s].holds[live.nat[s].nholds]=d;
3867 live.nat[s].nholds++;
3868 log_clobberreg(d);
3869 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3870 d,s,live.state[d].realind,live.nat[s].nholds); */
3871 unlock2(s);
3872 }
3873 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3874
3875 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3876 {
3877 if (isconst(s)) {
3878 COMPCALL(mov_l_mi)(d,live.state[s].val);
3879 return;
3880 }
3881 CLOBBER_MOV;
3882 s=readreg(s,4);
3883
3884 raw_mov_l_mr(d,s);
3885 unlock2(s);
3886 }
3887 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3888
3889
3890 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3891 {
3892 if (isconst(s)) {
3893 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3894 return;
3895 }
3896 CLOBBER_MOV;
3897 s=readreg(s,2);
3898
3899 raw_mov_w_mr(d,s);
3900 unlock2(s);
3901 }
3902 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3903
3904 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3905 {
3906 CLOBBER_MOV;
3907 d=writereg(d,2);
3908
3909 raw_mov_w_rm(d,s);
3910 unlock2(d);
3911 }
3912 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3913
3914 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3915 {
3916 if (isconst(s)) {
3917 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3918 return;
3919 }
3920
3921 CLOBBER_MOV;
3922 s=readreg(s,1);
3923
3924 raw_mov_b_mr(d,s);
3925 unlock2(s);
3926 }
3927 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3928
3929 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3930 {
3931 CLOBBER_MOV;
3932 d=writereg(d,1);
3933
3934 raw_mov_b_rm(d,s);
3935 unlock2(d);
3936 }
3937 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3938
3939 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3940 {
3941 set_const(d,s);
3942 return;
3943 }
3944 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3945
3946 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3947 {
3948 CLOBBER_MOV;
3949 d=writereg(d,2);
3950
3951 raw_mov_w_ri(d,s);
3952 unlock2(d);
3953 }
3954 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3955
3956 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3957 {
3958 CLOBBER_MOV;
3959 d=writereg(d,1);
3960
3961 raw_mov_b_ri(d,s);
3962 unlock2(d);
3963 }
3964 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3965
3966
3967 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3968 {
3969 CLOBBER_ADD;
3970 raw_add_l_mi(d,s) ;
3971 }
3972 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3973
3974 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3975 {
3976 CLOBBER_ADD;
3977 raw_add_w_mi(d,s) ;
3978 }
3979 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3980
3981 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3982 {
3983 CLOBBER_ADD;
3984 raw_add_b_mi(d,s) ;
3985 }
3986 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3987
3988
3989 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3990 {
3991 CLOBBER_TEST;
3992 d=readreg(d,4);
3993
3994 raw_test_l_ri(d,i);
3995 unlock2(d);
3996 }
3997 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3998
3999 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
4000 {
4001 CLOBBER_TEST;
4002 d=readreg(d,4);
4003 s=readreg(s,4);
4004
4005 raw_test_l_rr(d,s);;
4006 unlock2(d);
4007 unlock2(s);
4008 }
4009 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4010
4011 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4012 {
4013 CLOBBER_TEST;
4014 d=readreg(d,2);
4015 s=readreg(s,2);
4016
4017 raw_test_w_rr(d,s);
4018 unlock2(d);
4019 unlock2(s);
4020 }
4021 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4022
4023 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4024 {
4025 CLOBBER_TEST;
4026 d=readreg(d,1);
4027 s=readreg(s,1);
4028
4029 raw_test_b_rr(d,s);
4030 unlock2(d);
4031 unlock2(s);
4032 }
4033 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4034
4035
4036 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4037 {
4038 if (isconst(d) && !needflags) {
4039 live.state[d].val &= i;
4040 return;
4041 }
4042
4043 CLOBBER_AND;
4044 d=rmw(d,4,4);
4045
4046 raw_and_l_ri(d,i);
4047 unlock2(d);
4048 }
4049 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4050
4051 MIDFUNC(2,and_l,(RW4 d, R4 s))
4052 {
4053 CLOBBER_AND;
4054 s=readreg(s,4);
4055 d=rmw(d,4,4);
4056
4057 raw_and_l(d,s);
4058 unlock2(d);
4059 unlock2(s);
4060 }
4061 MENDFUNC(2,and_l,(RW4 d, R4 s))
4062
4063 MIDFUNC(2,and_w,(RW2 d, R2 s))
4064 {
4065 CLOBBER_AND;
4066 s=readreg(s,2);
4067 d=rmw(d,2,2);
4068
4069 raw_and_w(d,s);
4070 unlock2(d);
4071 unlock2(s);
4072 }
4073 MENDFUNC(2,and_w,(RW2 d, R2 s))
4074
4075 MIDFUNC(2,and_b,(RW1 d, R1 s))
4076 {
4077 CLOBBER_AND;
4078 s=readreg(s,1);
4079 d=rmw(d,1,1);
4080
4081 raw_and_b(d,s);
4082 unlock2(d);
4083 unlock2(s);
4084 }
4085 MENDFUNC(2,and_b,(RW1 d, R1 s))
4086
4087 // gb-- used for making an fpcr value in compemu_fpp.cpp
4088 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4089 {
4090 CLOBBER_OR;
4091 d=rmw(d,4,4);
4092
4093 raw_or_l_rm(d,s);
4094 unlock2(d);
4095 }
4096 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4097
4098 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4099 {
4100 if (isconst(d) && !needflags) {
4101 live.state[d].val|=i;
4102 return;
4103 }
4104 CLOBBER_OR;
4105 d=rmw(d,4,4);
4106
4107 raw_or_l_ri(d,i);
4108 unlock2(d);
4109 }
4110 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4111
4112 MIDFUNC(2,or_l,(RW4 d, R4 s))
4113 {
4114 if (isconst(d) && isconst(s) && !needflags) {
4115 live.state[d].val|=live.state[s].val;
4116 return;
4117 }
4118 CLOBBER_OR;
4119 s=readreg(s,4);
4120 d=rmw(d,4,4);
4121
4122 raw_or_l(d,s);
4123 unlock2(d);
4124 unlock2(s);
4125 }
4126 MENDFUNC(2,or_l,(RW4 d, R4 s))
4127
4128 MIDFUNC(2,or_w,(RW2 d, R2 s))
4129 {
4130 CLOBBER_OR;
4131 s=readreg(s,2);
4132 d=rmw(d,2,2);
4133
4134 raw_or_w(d,s);
4135 unlock2(d);
4136 unlock2(s);
4137 }
4138 MENDFUNC(2,or_w,(RW2 d, R2 s))
4139
4140 MIDFUNC(2,or_b,(RW1 d, R1 s))
4141 {
4142 CLOBBER_OR;
4143 s=readreg(s,1);
4144 d=rmw(d,1,1);
4145
4146 raw_or_b(d,s);
4147 unlock2(d);
4148 unlock2(s);
4149 }
4150 MENDFUNC(2,or_b,(RW1 d, R1 s))
4151
4152 MIDFUNC(2,adc_l,(RW4 d, R4 s))
4153 {
4154 CLOBBER_ADC;
4155 s=readreg(s,4);
4156 d=rmw(d,4,4);
4157
4158 raw_adc_l(d,s);
4159
4160 unlock2(d);
4161 unlock2(s);
4162 }
4163 MENDFUNC(2,adc_l,(RW4 d, R4 s))
4164
4165 MIDFUNC(2,adc_w,(RW2 d, R2 s))
4166 {
4167 CLOBBER_ADC;
4168 s=readreg(s,2);
4169 d=rmw(d,2,2);
4170
4171 raw_adc_w(d,s);
4172 unlock2(d);
4173 unlock2(s);
4174 }
4175 MENDFUNC(2,adc_w,(RW2 d, R2 s))
4176
4177 MIDFUNC(2,adc_b,(RW1 d, R1 s))
4178 {
4179 CLOBBER_ADC;
4180 s=readreg(s,1);
4181 d=rmw(d,1,1);
4182
4183 raw_adc_b(d,s);
4184 unlock2(d);
4185 unlock2(s);
4186 }
4187 MENDFUNC(2,adc_b,(RW1 d, R1 s))
4188
4189 MIDFUNC(2,add_l,(RW4 d, R4 s))
4190 {
4191 if (isconst(s)) {
4192 COMPCALL(add_l_ri)(d,live.state[s].val);
4193 return;
4194 }
4195
4196 CLOBBER_ADD;
4197 s=readreg(s,4);
4198 d=rmw(d,4,4);
4199
4200 raw_add_l(d,s);
4201
4202 unlock2(d);
4203 unlock2(s);
4204 }
4205 MENDFUNC(2,add_l,(RW4 d, R4 s))
4206
4207 MIDFUNC(2,add_w,(RW2 d, R2 s))
4208 {
4209 if (isconst(s)) {
4210 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4211 return;
4212 }
4213
4214 CLOBBER_ADD;
4215 s=readreg(s,2);
4216 d=rmw(d,2,2);
4217
4218 raw_add_w(d,s);
4219 unlock2(d);
4220 unlock2(s);
4221 }
4222 MENDFUNC(2,add_w,(RW2 d, R2 s))
4223
4224 MIDFUNC(2,add_b,(RW1 d, R1 s))
4225 {
4226 if (isconst(s)) {
4227 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4228 return;
4229 }
4230
4231 CLOBBER_ADD;
4232 s=readreg(s,1);
4233 d=rmw(d,1,1);
4234
4235 raw_add_b(d,s);
4236 unlock2(d);
4237 unlock2(s);
4238 }
4239 MENDFUNC(2,add_b,(RW1 d, R1 s))
4240
4241 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4242 {
4243 if (!i && !needflags)
4244 return;
4245 if (isconst(d) && !needflags) {
4246 live.state[d].val-=i;
4247 return;
4248 }
4249 #if USE_OFFSET
4250 if (!needflags) {
4251 add_offset(d,-i);
4252 return;
4253 }
4254 #endif
4255
4256 CLOBBER_SUB;
4257 d=rmw(d,4,4);
4258
4259 raw_sub_l_ri(d,i);
4260 unlock2(d);
4261 }
4262 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4263
4264 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4265 {
4266 if (!i && !needflags)
4267 return;
4268
4269 CLOBBER_SUB;
4270 d=rmw(d,2,2);
4271
4272 raw_sub_w_ri(d,i);
4273 unlock2(d);
4274 }
4275 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4276
4277 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4278 {
4279 if (!i && !needflags)
4280 return;
4281
4282 CLOBBER_SUB;
4283 d=rmw(d,1,1);
4284
4285 raw_sub_b_ri(d,i);
4286
4287 unlock2(d);
4288 }
4289 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4290
4291 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4292 {
4293 if (!i && !needflags)
4294 return;
4295 if (isconst(d) && !needflags) {
4296 live.state[d].val+=i;
4297 return;
4298 }
4299 #if USE_OFFSET
4300 if (!needflags) {
4301 add_offset(d,i);
4302 return;
4303 }
4304 #endif
4305 CLOBBER_ADD;
4306 d=rmw(d,4,4);
4307 raw_add_l_ri(d,i);
4308 unlock2(d);
4309 }
4310 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4311
4312 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4313 {
4314 if (!i && !needflags)
4315 return;
4316
4317 CLOBBER_ADD;
4318 d=rmw(d,2,2);
4319
4320 raw_add_w_ri(d,i);
4321 unlock2(d);
4322 }
4323 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4324
4325 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4326 {
4327 if (!i && !needflags)
4328 return;
4329
4330 CLOBBER_ADD;
4331 d=rmw(d,1,1);
4332
4333 raw_add_b_ri(d,i);
4334
4335 unlock2(d);
4336 }
4337 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4338
4339 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4340 {
4341 CLOBBER_SBB;
4342 s=readreg(s,4);
4343 d=rmw(d,4,4);
4344
4345 raw_sbb_l(d,s);
4346 unlock2(d);
4347 unlock2(s);
4348 }
4349 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4350
4351 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4352 {
4353 CLOBBER_SBB;
4354 s=readreg(s,2);
4355 d=rmw(d,2,2);
4356
4357 raw_sbb_w(d,s);
4358 unlock2(d);
4359 unlock2(s);
4360 }
4361 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4362
4363 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4364 {
4365 CLOBBER_SBB;
4366 s=readreg(s,1);
4367 d=rmw(d,1,1);
4368
4369 raw_sbb_b(d,s);
4370 unlock2(d);
4371 unlock2(s);
4372 }
4373 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4374
4375 MIDFUNC(2,sub_l,(RW4 d, R4 s))
4376 {
4377 if (isconst(s)) {
4378 COMPCALL(sub_l_ri)(d,live.state[s].val);
4379 return;
4380 }
4381
4382 CLOBBER_SUB;
4383 s=readreg(s,4);
4384 d=rmw(d,4,4);
4385
4386 raw_sub_l(d,s);
4387 unlock2(d);
4388 unlock2(s);
4389 }
4390 MENDFUNC(2,sub_l,(RW4 d, R4 s))
4391
4392 MIDFUNC(2,sub_w,(RW2 d, R2 s))
4393 {
4394 if (isconst(s)) {
4395 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4396 return;
4397 }
4398
4399 CLOBBER_SUB;
4400 s=readreg(s,2);
4401 d=rmw(d,2,2);
4402
4403 raw_sub_w(d,s);
4404 unlock2(d);
4405 unlock2(s);
4406 }
4407 MENDFUNC(2,sub_w,(RW2 d, R2 s))
4408
4409 MIDFUNC(2,sub_b,(RW1 d, R1 s))
4410 {
4411 if (isconst(s)) {
4412 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4413 return;
4414 }
4415
4416 CLOBBER_SUB;
4417 s=readreg(s,1);
4418 d=rmw(d,1,1);
4419
4420 raw_sub_b(d,s);
4421 unlock2(d);
4422 unlock2(s);
4423 }
4424 MENDFUNC(2,sub_b,(RW1 d, R1 s))
4425
4426 MIDFUNC(2,cmp_l,(R4 d, R4 s))
4427 {
4428 CLOBBER_CMP;
4429 s=readreg(s,4);
4430 d=readreg(d,4);
4431
4432 raw_cmp_l(d,s);
4433 unlock2(d);
4434 unlock2(s);
4435 }
4436 MENDFUNC(2,cmp_l,(R4 d, R4 s))
4437
4438 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4439 {
4440 CLOBBER_CMP;
4441 r=readreg(r,4);
4442
4443 raw_cmp_l_ri(r,i);
4444 unlock2(r);
4445 }
4446 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4447
4448 MIDFUNC(2,cmp_w,(R2 d, R2 s))
4449 {
4450 CLOBBER_CMP;
4451 s=readreg(s,2);
4452 d=readreg(d,2);
4453
4454 raw_cmp_w(d,s);
4455 unlock2(d);
4456 unlock2(s);
4457 }
4458 MENDFUNC(2,cmp_w,(R2 d, R2 s))
4459
4460 MIDFUNC(2,cmp_b,(R1 d, R1 s))
4461 {
4462 CLOBBER_CMP;
4463 s=readreg(s,1);
4464 d=readreg(d,1);
4465
4466 raw_cmp_b(d,s);
4467 unlock2(d);
4468 unlock2(s);
4469 }
4470 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4471
4472
4473 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4474 {
4475 CLOBBER_XOR;
4476 s=readreg(s,4);
4477 d=rmw(d,4,4);
4478
4479 raw_xor_l(d,s);
4480 unlock2(d);
4481 unlock2(s);
4482 }
4483 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4484
4485 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4486 {
4487 CLOBBER_XOR;
4488 s=readreg(s,2);
4489 d=rmw(d,2,2);
4490
4491 raw_xor_w(d,s);
4492 unlock2(d);
4493 unlock2(s);
4494 }
4495 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4496
4497 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4498 {
4499 CLOBBER_XOR;
4500 s=readreg(s,1);
4501 d=rmw(d,1,1);
4502
4503 raw_xor_b(d,s);
4504 unlock2(d);
4505 unlock2(s);
4506 }
4507 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4508
4509 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4510 {
4511 clobber_flags();
4512 remove_all_offsets();
4513 if (osize==4) {
4514 if (out1!=in1 && out1!=r) {
4515 COMPCALL(forget_about)(out1);
4516 }
4517 }
4518 else {
4519 tomem_c(out1);
4520 }
4521
4522 in1=readreg_specific(in1,isize,REG_PAR1);
4523 r=readreg(r,4);
4524 prepare_for_call_1(); /* This should ensure that there won't be
4525 any need for swapping nregs in prepare_for_call_2
4526 */
4527 #if USE_NORMAL_CALLING_CONVENTION
4528 raw_push_l_r(in1);
4529 #endif
4530 unlock2(in1);
4531 unlock2(r);
4532
4533 prepare_for_call_2();
4534 raw_call_r(r);
4535
4536 #if USE_NORMAL_CALLING_CONVENTION
4537 raw_inc_sp(4);
4538 #endif
4539
4540
4541 live.nat[REG_RESULT].holds[0]=out1;
4542 live.nat[REG_RESULT].nholds=1;
4543 live.nat[REG_RESULT].touched=touchcnt++;
4544
4545 live.state[out1].realreg=REG_RESULT;
4546 live.state[out1].realind=0;
4547 live.state[out1].val=0;
4548 live.state[out1].validsize=osize;
4549 live.state[out1].dirtysize=osize;
4550 set_status(out1,DIRTY);
4551 }
4552 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4553
4554 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4555 {
4556 clobber_flags();
4557 remove_all_offsets();
4558 in1=readreg_specific(in1,isize1,REG_PAR1);
4559 in2=readreg_specific(in2,isize2,REG_PAR2);
4560 r=readreg(r,4);
4561 prepare_for_call_1(); /* This should ensure that there won't be
4562 any need for swapping nregs in prepare_for_call_2
4563 */
4564 #if USE_NORMAL_CALLING_CONVENTION
4565 raw_push_l_r(in2);
4566 raw_push_l_r(in1);
4567 #endif
4568 unlock2(r);
4569 unlock2(in1);
4570 unlock2(in2);
4571 prepare_for_call_2();
4572 raw_call_r(r);
4573 #if USE_NORMAL_CALLING_CONVENTION
4574 raw_inc_sp(8);
4575 #endif
4576 }
4577 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4578
4579 /* forget_about() takes a mid-layer register */
4580 MIDFUNC(1,forget_about,(W4 r))
4581 {
4582 if (isinreg(r))
4583 disassociate(r);
4584 live.state[r].val=0;
4585 set_status(r,UNDEF);
4586 }
4587 MENDFUNC(1,forget_about,(W4 r))
4588
4589 MIDFUNC(0,nop,(void))
4590 {
4591 raw_nop();
4592 }
4593 MENDFUNC(0,nop,(void))
4594
4595
4596 MIDFUNC(1,f_forget_about,(FW r))
4597 {
4598 if (f_isinreg(r))
4599 f_disassociate(r);
4600 live.fate[r].status=UNDEF;
4601 }
4602 MENDFUNC(1,f_forget_about,(FW r))
4603
4604 MIDFUNC(1,fmov_pi,(FW r))
4605 {
4606 r=f_writereg(r);
4607 raw_fmov_pi(r);
4608 f_unlock(r);
4609 }
4610 MENDFUNC(1,fmov_pi,(FW r))
4611
4612 MIDFUNC(1,fmov_log10_2,(FW r))
4613 {
4614 r=f_writereg(r);
4615 raw_fmov_log10_2(r);
4616 f_unlock(r);
4617 }
4618 MENDFUNC(1,fmov_log10_2,(FW r))
4619
4620 MIDFUNC(1,fmov_log2_e,(FW r))
4621 {
4622 r=f_writereg(r);
4623 raw_fmov_log2_e(r);
4624 f_unlock(r);
4625 }
4626 MENDFUNC(1,fmov_log2_e,(FW r))
4627
4628 MIDFUNC(1,fmov_loge_2,(FW r))
4629 {
4630 r=f_writereg(r);
4631 raw_fmov_loge_2(r);
4632 f_unlock(r);
4633 }
4634 MENDFUNC(1,fmov_loge_2,(FW r))
4635
4636 MIDFUNC(1,fmov_1,(FW r))
4637 {
4638 r=f_writereg(r);
4639 raw_fmov_1(r);
4640 f_unlock(r);
4641 }
4642 MENDFUNC(1,fmov_1,(FW r))
4643
4644 MIDFUNC(1,fmov_0,(FW r))
4645 {
4646 r=f_writereg(r);
4647 raw_fmov_0(r);
4648 f_unlock(r);
4649 }
4650 MENDFUNC(1,fmov_0,(FW r))
4651
4652 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4653 {
4654 r=f_writereg(r);
4655 raw_fmov_rm(r,m);
4656 f_unlock(r);
4657 }
4658 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4659
4660 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4661 {
4662 r=f_writereg(r);
4663 raw_fmovi_rm(r,m);
4664 f_unlock(r);
4665 }
4666 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4667
4668 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4669 {
4670 r=f_readreg(r);
4671 raw_fmovi_mr(m,r);
4672 f_unlock(r);
4673 }
4674 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4675
4676 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4677 {
4678 r=f_writereg(r);
4679 raw_fmovs_rm(r,m);
4680 f_unlock(r);
4681 }
4682 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4683
4684 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4685 {
4686 r=f_readreg(r);
4687 raw_fmovs_mr(m,r);
4688 f_unlock(r);
4689 }
4690 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4691
4692 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4693 {
4694 r=f_readreg(r);
4695 raw_fmov_ext_mr(m,r);
4696 f_unlock(r);
4697 }
4698 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4699
4700 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4701 {
4702 r=f_readreg(r);
4703 raw_fmov_mr(m,r);
4704 f_unlock(r);
4705 }
4706 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4707
4708 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4709 {
4710 r=f_writereg(r);
4711 raw_fmov_ext_rm(r,m);
4712 f_unlock(r);
4713 }
4714 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4715
4716 MIDFUNC(2,fmov_rr,(FW d, FR s))
4717 {
4718 if (d==s) { /* How pointless! */
4719 return;
4720 }
4721 #if USE_F_ALIAS
4722 f_disassociate(d);
4723 s=f_readreg(s);
4724 live.fate[d].realreg=s;
4725 live.fate[d].realind=live.fat[s].nholds;
4726 live.fate[d].status=DIRTY;
4727 live.fat[s].holds[live.fat[s].nholds]=d;
4728 live.fat[s].nholds++;
4729 f_unlock(s);
4730 #else
4731 s=f_readreg(s);
4732 d=f_writereg(d);
4733 raw_fmov_rr(d,s);
4734 f_unlock(s);
4735 f_unlock(d);
4736 #endif
4737 }
4738 MENDFUNC(2,fmov_rr,(FW d, FR s))
4739
4740 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4741 {
4742 index=readreg(index,4);
4743
4744 raw_fldcw_m_indexed(index,base);
4745 unlock2(index);
4746 }
4747 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4748
4749 MIDFUNC(1,ftst_r,(FR r))
4750 {
4751 r=f_readreg(r);
4752 raw_ftst_r(r);
4753 f_unlock(r);
4754 }
4755 MENDFUNC(1,ftst_r,(FR r))
4756
4757 MIDFUNC(0,dont_care_fflags,(void))
4758 {
4759 f_disassociate(FP_RESULT);
4760 }
4761 MENDFUNC(0,dont_care_fflags,(void))
4762
4763 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4764 {
4765 s=f_readreg(s);
4766 d=f_writereg(d);
4767 raw_fsqrt_rr(d,s);
4768 f_unlock(s);
4769 f_unlock(d);
4770 }
4771 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4772
4773 MIDFUNC(2,fabs_rr,(FW d, FR s))
4774 {
4775 s=f_readreg(s);
4776 d=f_writereg(d);
4777 raw_fabs_rr(d,s);
4778 f_unlock(s);
4779 f_unlock(d);
4780 }
4781 MENDFUNC(2,fabs_rr,(FW d, FR s))
4782
4783 MIDFUNC(2,fsin_rr,(FW d, FR s))
4784 {
4785 s=f_readreg(s);
4786 d=f_writereg(d);
4787 raw_fsin_rr(d,s);
4788 f_unlock(s);
4789 f_unlock(d);
4790 }
4791 MENDFUNC(2,fsin_rr,(FW d, FR s))
4792
4793 MIDFUNC(2,fcos_rr,(FW d, FR s))
4794 {
4795 s=f_readreg(s);
4796 d=f_writereg(d);
4797 raw_fcos_rr(d,s);
4798 f_unlock(s);
4799 f_unlock(d);
4800 }
4801 MENDFUNC(2,fcos_rr,(FW d, FR s))
4802
4803 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4804 {
4805 s=f_readreg(s);
4806 d=f_writereg(d);
4807 raw_ftwotox_rr(d,s);
4808 f_unlock(s);
4809 f_unlock(d);
4810 }
4811 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4812
4813 MIDFUNC(2,fetox_rr,(FW d, FR s))
4814 {
4815 s=f_readreg(s);
4816 d=f_writereg(d);
4817 raw_fetox_rr(d,s);
4818 f_unlock(s);
4819 f_unlock(d);
4820 }
4821 MENDFUNC(2,fetox_rr,(FW d, FR s))
4822
4823 MIDFUNC(2,frndint_rr,(FW d, FR s))
4824 {
4825 s=f_readreg(s);
4826 d=f_writereg(d);
4827 raw_frndint_rr(d,s);
4828 f_unlock(s);
4829 f_unlock(d);
4830 }
4831 MENDFUNC(2,frndint_rr,(FW d, FR s))
4832
4833 MIDFUNC(2,flog2_rr,(FW d, FR s))
4834 {
4835 s=f_readreg(s);
4836 d=f_writereg(d);
4837 raw_flog2_rr(d,s);
4838 f_unlock(s);
4839 f_unlock(d);
4840 }
4841 MENDFUNC(2,flog2_rr,(FW d, FR s))
4842
4843 MIDFUNC(2,fneg_rr,(FW d, FR s))
4844 {
4845 s=f_readreg(s);
4846 d=f_writereg(d);
4847 raw_fneg_rr(d,s);
4848 f_unlock(s);
4849 f_unlock(d);
4850 }
4851 MENDFUNC(2,fneg_rr,(FW d, FR s))
4852
4853 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4854 {
4855 s=f_readreg(s);
4856 d=f_rmw(d);
4857 raw_fadd_rr(d,s);
4858 f_unlock(s);
4859 f_unlock(d);
4860 }
4861 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4862
4863 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4864 {
4865 s=f_readreg(s);
4866 d=f_rmw(d);
4867 raw_fsub_rr(d,s);
4868 f_unlock(s);
4869 f_unlock(d);
4870 }
4871 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4872
4873 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4874 {
4875 d=f_readreg(d);
4876 s=f_readreg(s);
4877 raw_fcmp_rr(d,s);
4878 f_unlock(s);
4879 f_unlock(d);
4880 }
4881 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4882
4883 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4884 {
4885 s=f_readreg(s);
4886 d=f_rmw(d);
4887 raw_fdiv_rr(d,s);
4888 f_unlock(s);
4889 f_unlock(d);
4890 }
4891 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4892
4893 MIDFUNC(2,frem_rr,(FRW d, FR s))
4894 {
4895 s=f_readreg(s);
4896 d=f_rmw(d);
4897 raw_frem_rr(d,s);
4898 f_unlock(s);
4899 f_unlock(d);
4900 }
4901 MENDFUNC(2,frem_rr,(FRW d, FR s))
4902
4903 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4904 {
4905 s=f_readreg(s);
4906 d=f_rmw(d);
4907 raw_frem1_rr(d,s);
4908 f_unlock(s);
4909 f_unlock(d);
4910 }
4911 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4912
4913 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4914 {
4915 s=f_readreg(s);
4916 d=f_rmw(d);
4917 raw_fmul_rr(d,s);
4918 f_unlock(s);
4919 f_unlock(d);
4920 }
4921 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4922
4923 /********************************************************************
4924 * Support functions exposed to gencomp. CREATE time *
4925 ********************************************************************/
4926
4927 void set_zero(int r, int tmp)
4928 {
4929 if (setzflg_uses_bsf)
4930 bsf_l_rr(r,r);
4931 else
4932 simulate_bsf(tmp,r);
4933 }
4934
4935 int kill_rodent(int r)
4936 {
4937 return KILLTHERAT &&
4938 have_rat_stall &&
4939 (live.state[r].status==INMEM ||
4940 live.state[r].status==CLEAN ||
4941 live.state[r].status==ISCONST ||
4942 live.state[r].dirtysize==4);
4943 }
4944
4945 uae_u32 get_const(int r)
4946 {
4947 Dif (!isconst(r)) {
4948 write_log("Register %d should be constant, but isn't\n",r);
4949 abort();
4950 }
4951 return live.state[r].val;
4952 }
4953
4954 void sync_m68k_pc(void)
4955 {
4956 if (m68k_pc_offset) {
4957 add_l_ri(PC_P,m68k_pc_offset);
4958 comp_pc_p+=m68k_pc_offset;
4959 m68k_pc_offset=0;
4960 }
4961 }
4962
4963 /********************************************************************
4964 * Scratch registers management *
4965 ********************************************************************/
4966
4967 struct scratch_t {
4968 uae_u32 regs[VREGS];
4969 fpu_register fregs[VFREGS];
4970 };
4971
4972 static scratch_t scratch;
4973
4974 /********************************************************************
4975 * Support functions exposed to newcpu *
4976 ********************************************************************/
4977
4978 static inline const char *str_on_off(bool b)
4979 {
4980 return b ? "on" : "off";
4981 }
4982
4983 void compiler_init(void)
4984 {
4985 static bool initialized = false;
4986 if (initialized)
4987 return;
4988
4989 #if JIT_DEBUG
4990 // JIT debug mode ?
4991 JITDebug = PrefsFindBool("jitdebug");
4992 #endif
4993 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4994
4995 #ifdef USE_JIT_FPU
4996 // Use JIT compiler for FPU instructions ?
4997 avoid_fpu = !PrefsFindBool("jitfpu");
4998 #else
4999 // JIT FPU is always disabled
5000 avoid_fpu = true;
5001 #endif
5002 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
5003
5004 // Get size of the translation cache (in KB)
5005 cache_size = PrefsFindInt32("jitcachesize");
5006 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5007
5008 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5009 raw_init_cpu();
5010 setzflg_uses_bsf = target_check_bsf();
5011 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5012 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5013 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5014
5015 // Translation cache flush mechanism
5016 lazy_flush = PrefsFindBool("jitlazyflush");
5017 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5018 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5019
5020 // Compiler features
5021 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5022 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5023 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5024 #if USE_INLINING
5025 follow_const_jumps = PrefsFindBool("jitinline");
5026 #endif
5027 write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
5028 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5029
5030 // Build compiler tables
5031 build_comp();
5032
5033 initialized = true;
5034
5035 #if PROFILE_UNTRANSLATED_INSNS
5036 write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5037 #endif
5038
5039 #if PROFILE_COMPILE_TIME
5040 write_log("<JIT compiler> : gather statistics on translation time\n");
5041 emul_start_time = clock();
5042 #endif
5043 }
5044
5045 void compiler_exit(void)
5046 {
5047 #if PROFILE_COMPILE_TIME
5048 emul_end_time = clock();
5049 #endif
5050
5051 // Deallocate translation cache
5052 if (compiled_code) {
5053 vm_release(compiled_code, cache_size * 1024);
5054 compiled_code = 0;
5055 }
5056
5057 // Deallocate popallspace
5058 if (popallspace) {
5059 vm_release(popallspace, POPALLSPACE_SIZE);
5060 popallspace = 0;
5061 }
5062
5063 #if PROFILE_COMPILE_TIME
5064 write_log("### Compile Block statistics\n");
5065 write_log("Number of calls to compile_block : %d\n", compile_count);
5066 uae_u32 emul_time = emul_end_time - emul_start_time;
5067 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5068 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5069 100.0*double(compile_time)/double(emul_time));
5070 write_log("\n");
5071 #endif
5072
5073 #if PROFILE_UNTRANSLATED_INSNS
5074 uae_u64 untranslated_count = 0;
5075 for (int i = 0; i < 65536; i++) {
5076 opcode_nums[i] = i;
5077 untranslated_count += raw_cputbl_count[i];
5078 }
5079 write_log("Sorting out untranslated instructions count...\n");
5080 qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5081 write_log("\nRank Opc Count Name\n");
5082 for (int i = 0; i < untranslated_top_ten; i++) {
5083 uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5084 struct instr *dp;
5085 struct mnemolookup *lookup;
5086 if (!count)
5087 break;
5088 dp = table68k + opcode_nums[i];
5089 for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5090 ;
5091 write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5092 }
5093 #endif
5094
5095 #if RECORD_REGISTER_USAGE
5096 int reg_count_ids[16];
5097 uint64 tot_reg_count = 0;
5098 for (int i = 0; i < 16; i++) {
5099 reg_count_ids[i] = i;
5100 tot_reg_count += reg_count[i];
5101 }
5102 qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5103 uint64 cum_reg_count = 0;
5104 for (int i = 0; i < 16; i++) {
5105 int r = reg_count_ids[i];
5106 cum_reg_count += reg_count[r];
5107 printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5108 reg_count[r],
5109 100.0*double(reg_count[r])/double(tot_reg_count),
5110 100.0*double(cum_reg_count)/double(tot_reg_count));
5111 }
5112 #endif
5113 }
5114
5115 bool compiler_use_jit(void)
5116 {
5117 // Check for the "jit" prefs item
5118 if (!PrefsFindBool("jit"))
5119 return false;
5120
5121 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5122 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5123 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5124 return false;
5125 }
5126
5127 // FIXME: there are currently problems with JIT compilation and anything below a 68040
5128 if (CPUType < 4) {
5129 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5130 return false;
5131 }
5132
5133 return true;
5134 }
5135
5136 void init_comp(void)
5137 {
5138 int i;
5139 uae_s8* cb=can_byte;
5140 uae_s8* cw=can_word;
5141 uae_s8* au=always_used;
5142
5143 #if RECORD_REGISTER_USAGE
5144 for (i=0;i<16;i++)
5145 reg_count_local[i] = 0;
5146 #endif
5147
5148 for (i=0;i<VREGS;i++) {
5149 live.state[i].realreg=-1;
5150 live.state[i].needflush=NF_SCRATCH;
5151 live.state[i].val=0;
5152 set_status(i,UNDEF);
5153 }
5154
5155 for (i=0;i<VFREGS;i++) {
5156 live.fate[i].status=UNDEF;
5157 live.fate[i].realreg=-1;
5158 live.fate[i].needflush=NF_SCRATCH;
5159 }
5160
5161 for (i=0;i<VREGS;i++) {
5162 if (i<16) { /* First 16 registers map to 68k registers */
5163 live.state[i].mem=((uae_u32*)&regs)+i;
5164 live.state[i].needflush=NF_TOMEM;
5165 set_status(i,INMEM);
5166 }
5167 else
5168 live.state[i].mem=scratch.regs+i;
5169 }
5170 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5171 live.state[PC_P].needflush=NF_TOMEM;
5172 set_const(PC_P,(uintptr)comp_pc_p);
5173
5174 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5175 live.state[FLAGX].needflush=NF_TOMEM;
5176 set_status(FLAGX,INMEM);
5177
5178 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5179 live.state[FLAGTMP].needflush=NF_TOMEM;
5180 set_status(FLAGTMP,INMEM);
5181
5182 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5183 set_status(NEXT_HANDLER,UNDEF);
5184
5185 for (i=0;i<VFREGS;i++) {
5186 if (i<8) { /* First 8 registers map to 68k FPU registers */
5187 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5188 live.fate[i].needflush=NF_TOMEM;
5189 live.fate[i].status=INMEM;
5190 }
5191 else if (i==FP_RESULT) {
5192 live.fate[i].mem=(uae_u32*)(&fpu.result);
5193 live.fate[i].needflush=NF_TOMEM;
5194 live.fate[i].status=INMEM;
5195 }
5196 else
5197 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5198 }
5199
5200
5201 for (i=0;i<N_REGS;i++) {
5202 live.nat[i].touched=0;
5203 live.nat[i].nholds=0;
5204 live.nat[i].locked=0;
5205 if (*cb==i) {
5206 live.nat[i].canbyte=1; cb++;
5207 } else live.nat[i].canbyte=0;
5208 if (*cw==i) {
5209 live.nat[i].canword=1; cw++;
5210 } else live.nat[i].canword=0;
5211 if (*au==i) {
5212 live.nat[i].locked=1; au++;
5213 }
5214 }
5215
5216 for (i=0;i<N_FREGS;i++) {
5217 live.fat[i].touched=0;
5218 live.fat[i].nholds=0;
5219 live.fat[i].locked=0;
5220 }
5221
5222 touchcnt=1;
5223 m68k_pc_offset=0;
5224 live.flags_in_flags=TRASH;
5225 live.flags_on_stack=VALID;
5226 live.flags_are_important=1;
5227
5228 raw_fp_init();
5229 }
5230
5231 /* Only do this if you really mean it! The next call should be to init!*/
5232 void flush(int save_regs)
5233 {
5234 int fi,i;
5235
5236 log_flush();
5237 flush_flags(); /* low level */
5238 sync_m68k_pc(); /* mid level */
5239
5240 if (save_regs) {
5241 for (i=0;i<VFREGS;i++) {
5242 if (live.fate[i].needflush==NF_SCRATCH ||
5243 live.fate[i].status==CLEAN) {
5244 f_disassociate(i);
5245 }
5246 }
5247 for (i=0;i<VREGS;i++) {
5248 if (live.state[i].needflush==NF_TOMEM) {
5249 switch(live.state[i].status) {
5250 case INMEM:
5251 if (live.state[i].val) {
5252 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5253 log_vwrite(i);
5254 live.state[i].val=0;
5255 }
5256 break;
5257 case CLEAN:
5258 case DIRTY:
5259 remove_offset(i,-1); tomem(i); break;
5260 case ISCONST:
5261 if (i!=PC_P)
5262 writeback_const(i);
5263 break;
5264 default: break;
5265 }
5266 Dif (live.state[i].val && i!=PC_P) {
5267 write_log("Register %d still has val %x\n",
5268 i,live.state[i].val);
5269 }
5270 }
5271 }
5272 for (i=0;i<VFREGS;i++) {
5273 if (live.fate[i].needflush==NF_TOMEM &&
5274 live.fate[i].status==DIRTY) {
5275 f_evict(i);
5276 }
5277 }
5278 raw_fp_cleanup_drop();
5279 }
5280 if (needflags) {
5281 write_log("Warning! flush with needflags=1!\n");
5282 }
5283 }
5284
5285 static void flush_keepflags(void)
5286 {
5287 int fi,i;
5288
5289 for (i=0;i<VFREGS;i++) {
5290 if (live.fate[i].needflush==NF_SCRATCH ||
5291 live.fate[i].status==CLEAN) {
5292 f_disassociate(i);
5293 }
5294 }
5295 for (i=0;i<VREGS;i++) {
5296 if (live.state[i].needflush==NF_TOMEM) {
5297 switch(live.state[i].status) {
5298 case INMEM:
5299 /* Can't adjust the offset here --- that needs "add" */
5300 break;
5301 case CLEAN:
5302 case DIRTY:
5303 remove_offset(i,-1); tomem(i); break;
5304 case ISCONST:
5305 if (i!=PC_P)
5306 writeback_const(i);
5307 break;
5308 default: break;
5309 }
5310 }
5311 }
5312 for (i=0;i<VFREGS;i++) {
5313 if (live.fate[i].needflush==NF_TOMEM &&
5314 live.fate[i].status==DIRTY) {
5315 f_evict(i);
5316 }
5317 }
5318 raw_fp_cleanup_drop();
5319 }
5320
5321 void freescratch(void)
5322 {
5323 int i;
5324 for (i=0;i<N_REGS;i++)
5325 if (live.nat[i].locked && i!=4)
5326 write_log("Warning! %d is locked\n",i);
5327
5328 for (i=0;i<VREGS;i++)
5329 if (live.state[i].needflush==NF_SCRATCH) {
5330 forget_about(i);
5331 }
5332
5333 for (i=0;i<VFREGS;i++)
5334 if (live.fate[i].needflush==NF_SCRATCH) {
5335 f_forget_about(i);
5336 }
5337 }
5338
5339 /********************************************************************
5340 * Support functions, internal *
5341 ********************************************************************/
5342
5343
5344 static void align_target(uae_u32 a)
5345 {
5346 if (!a)
5347 return;
5348
5349 if (tune_nop_fillers)
5350 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5351 else {
5352 /* Fill with NOPs --- makes debugging with gdb easier */
5353 while ((uintptr)target&(a-1))
5354 *target++=0x90;
5355 }
5356 }
5357
5358 static __inline__ int isinrom(uintptr addr)
5359 {
5360 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5361 }
5362
5363 static void flush_all(void)
5364 {
5365 int i;
5366
5367 log_flush();
5368 for (i=0;i<VREGS;i++)
5369 if (live.state[i].status==DIRTY) {
5370 if (!call_saved[live.state[i].realreg]) {
5371 tomem(i);
5372 }
5373 }
5374 for (i=0;i<VFREGS;i++)
5375 if (f_isinreg(i))
5376 f_evict(i);
5377 raw_fp_cleanup_drop();
5378 }
5379
5380 /* Make sure all registers that will get clobbered by a call are
5381 save and sound in memory */
5382 static void prepare_for_call_1(void)
5383 {
5384 flush_all(); /* If there are registers that don't get clobbered,
5385 * we should be a bit more selective here */
5386 }
5387
5388 /* We will call a C routine in a moment. That will clobber all registers,
5389 so we need to disassociate everything */
5390 static void prepare_for_call_2(void)
5391 {
5392 int i;
5393 for (i=0;i<N_REGS;i++)
5394 if (!call_saved[i] && live.nat[i].nholds>0)
5395 free_nreg(i);
5396
5397 for (i=0;i<N_FREGS;i++)
5398 if (live.fat[i].nholds>0)
5399 f_free_nreg(i);
5400
5401 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5402 flags at the very start of the call_r
5403 functions! */
5404 }
5405
5406 /********************************************************************
5407 * Memory access and related functions, CREATE time *
5408 ********************************************************************/
5409
5410 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5411 {
5412 next_pc_p=not_taken;
5413 taken_pc_p=taken;
5414 branch_cc=cond;
5415 }
5416
5417
5418 static uae_u32 get_handler_address(uae_u32 addr)
5419 {
5420 uae_u32 cl=cacheline(addr);
5421 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5422 return (uintptr)&(bi->direct_handler_to_use);
5423 }
5424
5425 static uae_u32 get_handler(uae_u32 addr)
5426 {
5427 uae_u32 cl=cacheline(addr);
5428 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5429 return (uintptr)bi->direct_handler_to_use;
5430 }
5431
5432 static void load_handler(int reg, uae_u32 addr)
5433 {
5434 mov_l_rm(reg,get_handler_address(addr));
5435 }
5436
5437 /* This version assumes that it is writing *real* memory, and *will* fail
5438 * if that assumption is wrong! No branches, no second chances, just
5439 * straight go-for-it attitude */
5440
5441 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5442 {
5443 int f=tmp;
5444
5445 if (clobber)
5446 f=source;
5447
5448 #if SIZEOF_VOID_P == 8
5449 if (!ThirtyThreeBitAddressing)
5450 sign_extend_32_rr(address, address);
5451 #endif
5452
5453 switch(size) {
5454 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5455 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5456 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5457 }
5458 forget_about(tmp);
5459 forget_about(f);
5460 }
5461
5462 void writebyte(int address, int source, int tmp)
5463 {
5464 writemem_real(address,source,1,tmp,0);
5465 }
5466
5467 static __inline__ void writeword_general(int address, int source, int tmp,
5468 int clobber)
5469 {
5470 writemem_real(address,source,2,tmp,clobber);
5471 }
5472
5473 void writeword_clobber(int address, int source, int tmp)
5474 {
5475 writeword_general(address,source,tmp,1);
5476 }
5477
5478 void writeword(int address, int source, int tmp)
5479 {
5480 writeword_general(address,source,tmp,0);
5481 }
5482
5483 static __inline__ void writelong_general(int address, int source, int tmp,
5484 int clobber)
5485 {
5486 writemem_real(address,source,4,tmp,clobber);
5487 }
5488
5489 void writelong_clobber(int address, int source, int tmp)
5490 {
5491 writelong_general(address,source,tmp,1);
5492 }
5493
5494 void writelong(int address, int source, int tmp)
5495 {
5496 writelong_general(address,source,tmp,0);
5497 }
5498
5499
5500
5501 /* This version assumes that it is reading *real* memory, and *will* fail
5502 * if that assumption is wrong! No branches, no second chances, just
5503 * straight go-for-it attitude */
5504
5505 static void readmem_real(int address, int dest, int size, int tmp)
5506 {
5507 int f=tmp;
5508
5509 if (size==4 && address!=dest)
5510 f=dest;
5511
5512 #if SIZEOF_VOID_P == 8
5513 if (!ThirtyThreeBitAddressing)
5514 sign_extend_32_rr(address, address);
5515 #endif
5516
5517 switch(size) {
5518 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5519 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5520 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5521 }
5522 forget_about(tmp);
5523 }
5524
5525 void readbyte(int address, int dest, int tmp)
5526 {
5527 readmem_real(address,dest,1,tmp);
5528 }
5529
5530 void readword(int address, int dest, int tmp)
5531 {
5532 readmem_real(address,dest,2,tmp);
5533 }
5534
5535 void readlong(int address, int dest, int tmp)
5536 {
5537 readmem_real(address,dest,4,tmp);
5538 }
5539
5540 void get_n_addr(int address, int dest, int tmp)
5541 {
5542 // a is the register containing the virtual address
5543 // after the offset had been fetched
5544 int a=tmp;
5545
5546 // f is the register that will contain the offset
5547 int f=tmp;
5548
5549 // a == f == tmp if (address == dest)
5550 if (address!=dest) {
5551 a=address;
5552 f=dest;
5553 }
5554
5555 #if REAL_ADDRESSING
5556 mov_l_rr(dest, address);
5557 #elif DIRECT_ADDRESSING
5558 lea_l_brr(dest,address,MEMBaseDiff);
5559 #endif
5560 forget_about(tmp);
5561 }
5562
5563 void get_n_addr_jmp(int address, int dest, int tmp)
5564 {
5565 /* For this, we need to get the same address as the rest of UAE
5566 would --- otherwise we end up translating everything twice */
5567 get_n_addr(address,dest,tmp);
5568 }
5569
5570
5571 /* base is a register, but dp is an actual value.
5572 target is a register, as is tmp */
5573 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5574 {
5575 int reg = (dp >> 12) & 15;
5576 int regd_shift=(dp >> 9) & 3;
5577
5578 if (dp & 0x100) {
5579 int ignorebase=(dp&0x80);
5580 int ignorereg=(dp&0x40);
5581 int addbase=0;
5582 int outer=0;
5583
5584 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5585 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5586
5587 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5588 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5589
5590 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5591 if (!ignorereg) {
5592 if ((dp & 0x800) == 0)
5593 sign_extend_16_rr(target,reg);
5594 else
5595 mov_l_rr(target,reg);
5596 shll_l_ri(target,regd_shift);
5597 }
5598 else
5599 mov_l_ri(target,0);
5600
5601 /* target is now regd */
5602 if (!ignorebase)
5603 add_l(target,base);
5604 add_l_ri(target,addbase);
5605 if (dp&0x03) readlong(target,target,tmp);
5606 } else { /* do the getlong first, then add regd */
5607 if (!ignorebase) {
5608 mov_l_rr(target,base);
5609 add_l_ri(target,addbase);
5610 }
5611 else
5612 mov_l_ri(target,addbase);
5613 if (dp&0x03) readlong(target,target,tmp);
5614
5615 if (!ignorereg) {
5616 if ((dp & 0x800) == 0)
5617 sign_extend_16_rr(tmp,reg);
5618 else
5619 mov_l_rr(tmp,reg);
5620 shll_l_ri(tmp,regd_shift);
5621 /* tmp is now regd */
5622 add_l(target,tmp);
5623 }
5624 }
5625 add_l_ri(target,outer);
5626 }
5627 else { /* 68000 version */
5628 if ((dp & 0x800) == 0) { /* Sign extend */
5629 sign_extend_16_rr(target,reg);
5630 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5631 }
5632 else {
5633 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5634 }
5635 }
5636 forget_about(tmp);
5637 }
5638
5639
5640
5641
5642
5643 void set_cache_state(int enabled)
5644 {
5645 if (enabled!=letit)
5646 flush_icache_hard(77);
5647 letit=enabled;
5648 }
5649
5650 int get_cache_state(void)
5651 {
5652 return letit;
5653 }
5654
5655 uae_u32 get_jitted_size(void)
5656 {
5657 if (compiled_code)
5658 return current_compile_p-compiled_code;
5659 return 0;
5660 }
5661
5662 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5663 const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5664
5665 static uint8 *do_alloc_code(uint32 size, int depth)
5666 {
5667 #if defined(__linux__) && 0
5668 /*
5669 This is a really awful hack that is known to work on Linux at
5670 least.
5671
5672 The trick here is to make sure the allocated cache is nearby
5673 code segment, and more precisely in the positive half of a
5674 32-bit address space. i.e. addr < 0x80000000. Actually, it
5675 turned out that a 32-bit binary run on AMD64 yields a cache
5676 allocated around 0xa0000000, thus causing some troubles when
5677 translating addresses from m68k to x86.
5678 */
5679 static uint8 * code_base = NULL;
5680 if (code_base == NULL) {
5681 uintptr page_size = getpagesize();
5682 uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5683 if (boundaries < page_size)
5684 boundaries = page_size;
5685 code_base = (uint8 *)sbrk(0);
5686 for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5687 if (vm_acquire_fixed(code_base, size) == 0) {
5688 uint8 *code = code_base;
5689 code_base += size;
5690 return code;
5691 }
5692 code_base += boundaries;
5693 }
5694 return NULL;
5695 }
5696
5697 if (vm_acquire_fixed(code_base, size) == 0) {
5698 uint8 *code = code_base;
5699 code_base += size;
5700 return code;
5701 }
5702
5703 if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5704 return NULL;
5705
5706 return do_alloc_code(size, depth + 1);
5707 #else
5708 uint8 *code = (uint8 *)vm_acquire(size);
5709 return code == VM_MAP_FAILED ? NULL : code;
5710 #endif
5711 }
5712
5713 static inline uint8 *alloc_code(uint32 size)
5714 {
5715 uint8 *ptr = do_alloc_code(size, 0);
5716 /* allocated code must fit in 32-bit boundaries */
5717 assert((uintptr)ptr <= 0xffffffff);
5718 return ptr;
5719 }
5720
5721 void alloc_cache(void)
5722 {
5723 if (compiled_code) {
5724 flush_icache_hard(6);
5725 vm_release(compiled_code, cache_size * 1024);
5726 compiled_code = 0;
5727 }
5728
5729 if (cache_size == 0)
5730 return;
5731
5732 while (!compiled_code && cache_size) {
5733 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5734 compiled_code = 0;
5735 cache_size /= 2;
5736 }
5737 }
5738 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5739
5740 if (compiled_code) {
5741 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5742 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5743 current_compile_p = compiled_code;
5744 current_cache_size = 0;
5745 }
5746 }
5747
5748
5749
5750 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5751
5752 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5753 {
5754 uae_u32 k1 = 0;
5755 uae_u32 k2 = 0;
5756
5757 #if USE_CHECKSUM_INFO
5758 checksum_info *csi = bi->csi;
5759 Dif(!csi) abort();
5760 while (csi) {
5761 uae_s32 len = csi->length;
5762 uintptr tmp = (uintptr)csi->start_p;
5763 #else
5764 uae_s32 len = bi->len;
5765 uintptr tmp = (uintptr)bi->min_pcp;
5766 #endif
5767 uae_u32*pos;
5768
5769 len += (tmp & 3);
5770 tmp &= ~((uintptr)3);
5771 pos = (uae_u32 *)tmp;
5772
5773 if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5774 while (len > 0) {
5775 k1 += *pos;
5776 k2 ^= *pos;
5777 pos++;
5778 len -= 4;
5779 }
5780 }
5781
5782 #if USE_CHECKSUM_INFO
5783 csi = csi->next;
5784 }
5785 #endif
5786
5787 *c1 = k1;
5788 *c2 = k2;
5789 }
5790
5791 #if 0
5792 static void show_checksum(CSI_TYPE* csi)
5793 {
5794 uae_u32 k1=0;
5795 uae_u32 k2=0;
5796 uae_s32 len=CSI_LENGTH(csi);
5797 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5798 uae_u32* pos;
5799
5800 len+=(tmp&3);
5801 tmp&=(~3);
5802 pos=(uae_u32*)tmp;
5803
5804 if (len<0 || len>MAX_CHECKSUM_LEN) {
5805 return;
5806 }
5807 else {
5808 while (len>0) {
5809 write_log("%08x ",*pos);
5810 pos++;
5811 len-=4;
5812 }
5813 write_log(" bla\n");
5814 }
5815 }
5816 #endif
5817
5818
5819 int check_for_cache_miss(void)
5820 {
5821 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5822
5823 if (bi) {
5824 int cl=cacheline(regs.pc_p);
5825 if (bi!=cache_tags[cl+1].bi) {
5826 raise_in_cl_list(bi);
5827 return 1;
5828 }
5829 }
5830 return 0;
5831 }
5832
5833
5834 static void recompile_block(void)
5835 {
5836 /* An existing block's countdown code has expired. We need to make
5837 sure that execute_normal doesn't refuse to recompile due to a
5838 perceived cache miss... */
5839 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5840
5841 Dif (!bi)
5842 abort();
5843 raise_in_cl_list(bi);
5844 execute_normal();
5845 return;
5846 }
5847 static void cache_miss(void)
5848 {
5849 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5850 uae_u32 cl=cacheline(regs.pc_p);
5851 blockinfo* bi2=get_blockinfo(cl);
5852
5853 if (!bi) {
5854 execute_normal(); /* Compile this block now */
5855 return;
5856 }
5857 Dif (!bi2 || bi==bi2) {
5858 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5859 abort();
5860 }
5861 raise_in_cl_list(bi);
5862 return;
5863 }
5864
5865 static int called_check_checksum(blockinfo* bi);
5866
5867 static inline int block_check_checksum(blockinfo* bi)
5868 {
5869 uae_u32 c1,c2;
5870 bool isgood;
5871
5872 if (bi->status!=BI_NEED_CHECK)
5873 return 1; /* This block is in a checked state */
5874
5875 checksum_count++;
5876
5877 if (bi->c1 || bi->c2)
5878 calc_checksum(bi,&c1,&c2);
5879 else {
5880 c1=c2=1; /* Make sure it doesn't match */
5881 }
5882
5883 isgood=(c1==bi->c1 && c2==bi->c2);
5884
5885 if (isgood) {
5886 /* This block is still OK. So we reactivate. Of course, that
5887 means we have to move it into the needs-to-be-flushed list */
5888 bi->handler_to_use=bi->handler;
5889 set_dhtu(bi,bi->direct_handler);
5890 bi->status=BI_CHECKING;
5891 isgood=called_check_checksum(bi);
5892 }
5893 if (isgood) {
5894 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5895 c1,c2,bi->c1,bi->c2);*/
5896 remove_from_list(bi);
5897 add_to_active(bi);
5898 raise_in_cl_list(bi);
5899 bi->status=BI_ACTIVE;
5900 }
5901 else {
5902 /* This block actually changed. We need to invalidate it,
5903 and set it up to be recompiled */
5904 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5905 c1,c2,bi->c1,bi->c2); */
5906 invalidate_block(bi);
5907 raise_in_cl_list(bi);
5908 }
5909 return isgood;
5910 }
5911
5912 static int called_check_checksum(blockinfo* bi)
5913 {
5914 dependency* x=bi->deplist;
5915 int isgood=1;
5916 int i;
5917
5918 for (i=0;i<2 && isgood;i++) {
5919 if (bi->dep[i].jmp_off) {
5920 isgood=block_check_checksum(bi->dep[i].target);
5921 }
5922 }
5923 return isgood;
5924 }
5925
5926 static void check_checksum(void)
5927 {
5928 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5929 uae_u32 cl=cacheline(regs.pc_p);
5930 blockinfo* bi2=get_blockinfo(cl);
5931
5932 /* These are not the droids you are looking for... */
5933 if (!bi) {
5934 /* Whoever is the primary target is in a dormant state, but
5935 calling it was accidental, and we should just compile this
5936 new block */
5937 execute_normal();
5938 return;
5939 }
5940 if (bi!=bi2) {
5941 /* The block was hit accidentally, but it does exist. Cache miss */
5942 cache_miss();
5943 return;
5944 }
5945
5946 if (!block_check_checksum(bi))
5947 execute_normal();
5948 }
5949
5950 static __inline__ void match_states(blockinfo* bi)
5951 {
5952 int i;
5953 smallstate* s=&(bi->env);
5954
5955 if (bi->status==BI_NEED_CHECK) {
5956 block_check_checksum(bi);
5957 }
5958 if (bi->status==BI_ACTIVE ||
5959 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5960 block makes (about not using
5961 certain vregs) */
5962 for (i=0;i<16;i++) {
5963 if (s->virt[i]==L_UNNEEDED) {
5964 // write_log("unneeded reg %d at %p\n",i,target);
5965 COMPCALL(forget_about)(i); // FIXME
5966 }
5967 }
5968 }
5969 flush(1);
5970
5971 /* And now deal with the *demands* the block makes */
5972 for (i=0;i<N_REGS;i++) {
5973 int v=s->nat[i];
5974 if (v>=0) {
5975 // printf("Loading reg %d into %d at %p\n",v,i,target);
5976 readreg_specific(v,4,i);
5977 // do_load_reg(i,v);
5978 // setlock(i);
5979 }
5980 }
5981 for (i=0;i<N_REGS;i++) {
5982 int v=s->nat[i];
5983 if (v>=0) {
5984 unlock2(i);
5985 }
5986 }
5987 }
5988
5989 static __inline__ void create_popalls(void)
5990 {
5991 int i,r;
5992
5993 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5994 write_log("FATAL: Could not allocate popallspace!\n");
5995 abort();
5996 }
5997 vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5998
5999 int stack_space = STACK_OFFSET;
6000 for (i=0;i<N_REGS;i++) {
6001 if (need_to_preserve[i])
6002 stack_space += sizeof(void *);
6003 }
6004 stack_space %= STACK_ALIGN;
6005 if (stack_space)
6006 stack_space = STACK_ALIGN - stack_space;
6007
6008 current_compile_p=popallspace;
6009 set_target(current_compile_p);
6010
6011 /* We need to guarantee 16-byte stack alignment on x86 at any point
6012 within the JIT generated code. We have multiple exit points
6013 possible but a single entry. A "jmp" is used so that we don't
6014 have to generate stack alignment in generated code that has to
6015 call external functions (e.g. a generic instruction handler).
6016
6017 In summary, JIT generated code is not leaf so we have to deal
6018 with it here to maintain correct stack alignment. */
6019 align_target(align_jumps);
6020 current_compile_p=get_target();
6021 pushall_call_handler=get_target();
6022 for (i=N_REGS;i--;) {
6023 if (need_to_preserve[i])
6024 raw_push_l_r(i);
6025 }
6026 raw_dec_sp(stack_space);
6027 r=REG_PC_TMP;
6028 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6029 raw_and_l_ri(r,TAGMASK);
6030 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6031
6032 /* now the exit points */
6033 align_target(align_jumps);
6034 popall_do_nothing=get_target();
6035 raw_inc_sp(stack_space);
6036 for (i=0;i<N_REGS;i++) {
6037 if (need_to_preserve[i])
6038 raw_pop_l_r(i);
6039 }
6040 raw_jmp((uintptr)do_nothing);
6041
6042 align_target(align_jumps);
6043 popall_execute_normal=get_target();
6044 raw_inc_sp(stack_space);
6045 for (i=0;i<N_REGS;i++) {
6046 if (need_to_preserve[i])
6047 raw_pop_l_r(i);
6048 }
6049 raw_jmp((uintptr)execute_normal);
6050
6051 align_target(align_jumps);
6052 popall_cache_miss=get_target();
6053 raw_inc_sp(stack_space);
6054 for (i=0;i<N_REGS;i++) {
6055 if (need_to_preserve[i])
6056 raw_pop_l_r(i);
6057 }
6058 raw_jmp((uintptr)cache_miss);
6059
6060 align_target(align_jumps);
6061 popall_recompile_block=get_target();
6062 raw_inc_sp(stack_space);
6063 for (i=0;i<N_REGS;i++) {
6064 if (need_to_preserve[i])
6065 raw_pop_l_r(i);
6066 }
6067 raw_jmp((uintptr)recompile_block);
6068
6069 align_target(align_jumps);
6070 popall_exec_nostats=get_target();
6071 raw_inc_sp(stack_space);
6072 for (i=0;i<N_REGS;i++) {
6073 if (need_to_preserve[i])
6074 raw_pop_l_r(i);
6075 }
6076 raw_jmp((uintptr)exec_nostats);
6077
6078 align_target(align_jumps);
6079 popall_check_checksum=get_target();
6080 raw_inc_sp(stack_space);
6081 for (i=0;i<N_REGS;i++) {
6082 if (need_to_preserve[i])
6083 raw_pop_l_r(i);
6084 }
6085 raw_jmp((uintptr)check_checksum);
6086
6087 // no need to further write into popallspace
6088 vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6089 }
6090
6091 static __inline__ void reset_lists(void)
6092 {
6093 int i;
6094
6095 for (i=0;i<MAX_HOLD_BI;i++)
6096 hold_bi[i]=NULL;
6097 active=NULL;
6098 dormant=NULL;
6099 }
6100
6101 static void prepare_block(blockinfo* bi)
6102 {
6103 int i;
6104
6105 set_target(current_compile_p);
6106 align_target(align_jumps);
6107 bi->direct_pen=(cpuop_func *)get_target();
6108 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6109 raw_mov_l_mr((uintptr)&regs.pc_p,0);
6110 raw_jmp((uintptr)popall_execute_normal);
6111
6112 align_target(align_jumps);
6113 bi->direct_pcc=(cpuop_func *)get_target();
6114 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6115 raw_mov_l_mr((uintptr)&regs.pc_p,0);
6116 raw_jmp((uintptr)popall_check_checksum);
6117 current_compile_p=get_target();
6118
6119 bi->deplist=NULL;
6120 for (i=0;i<2;i++) {
6121 bi->dep[i].prev_p=NULL;
6122 bi->dep[i].next=NULL;
6123 }
6124 bi->env=default_ss;
6125 bi->status=BI_INVALID;
6126 bi->havestate=0;
6127 //bi->env=empty_ss;
6128 }
6129
6130 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6131 static inline void reset_compop(int opcode)
6132 {
6133 compfunctbl[opcode] = NULL;
6134 nfcompfunctbl[opcode] = NULL;
6135 }
6136
6137 static int read_opcode(const char *p)
6138 {
6139 int opcode = 0;
6140 for (int i = 0; i < 4; i++) {
6141 int op = p[i];
6142 switch (op) {
6143 case '0': case '1': case '2': case '3': case '4':
6144 case '5': case '6': case '7': case '8': case '9':
6145 opcode = (opcode << 4) | (op - '0');
6146 break;
6147 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6148 opcode = (opcode << 4) | ((op - 'a') + 10);
6149 break;
6150 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6151 opcode = (opcode << 4) | ((op - 'A') + 10);
6152 break;
6153 default:
6154 return -1;
6155 }
6156 }
6157 return opcode;
6158 }
6159
6160 static bool merge_blacklist()
6161 {
6162 const char *blacklist = PrefsFindString("jitblacklist");
6163 if (blacklist) {
6164 const char *p = blacklist;
6165 for (;;) {
6166 if (*p == 0)
6167 return true;
6168
6169 int opcode1 = read_opcode(p);
6170 if (opcode1 < 0)
6171 return false;
6172 p += 4;
6173
6174 int opcode2 = opcode1;
6175 if (*p == '-') {
6176 p++;
6177 opcode2 = read_opcode(p);
6178 if (opcode2 < 0)
6179 return false;
6180 p += 4;
6181 }
6182
6183 if (*p == 0 || *p == ';') {
6184 write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6185 for (int opcode = opcode1; opcode <= opcode2; opcode++)
6186 reset_compop(cft_map(opcode));
6187
6188 if (*p++ == ';')
6189 continue;
6190
6191 return true;
6192 }
6193
6194 return false;
6195 }
6196 }
6197 return true;
6198 }
6199
6200 void build_comp(void)
6201 {
6202 int i;
6203 int jumpcount=0;
6204 unsigned long opcode;
6205 struct comptbl* tbl=op_smalltbl_0_comp_ff;
6206 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6207 int count;
6208 int cpu_level = 0; // 68000 (default)
6209 if (CPUType == 4)
6210 cpu_level = 4; // 68040 with FPU
6211 else {
6212 if (FPUType)
6213 cpu_level = 3; // 68020 with FPU
6214 else if (CPUType >= 2)
6215 cpu_level = 2; // 68020
6216 else if (CPUType == 1)
6217 cpu_level = 1;
6218 }
6219 struct cputbl *nfctbl = (
6220 cpu_level == 4 ? op_smalltbl_0_nf
6221 : cpu_level == 3 ? op_smalltbl_1_nf
6222 : cpu_level == 2 ? op_smalltbl_2_nf
6223 : cpu_level == 1 ? op_smalltbl_3_nf
6224 : op_smalltbl_4_nf);
6225
6226 write_log ("<JIT compiler> : building compiler function tables\n");
6227
6228 for (opcode = 0; opcode < 65536; opcode++) {
6229 reset_compop(opcode);
6230 nfcpufunctbl[opcode] = op_illg_1;
6231 prop[opcode].use_flags = 0x1f;
6232 prop[opcode].set_flags = 0x1f;
6233 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6234 }
6235
6236 for (i = 0; tbl[i].opcode < 65536; i++) {
6237 int cflow = table68k[tbl[i].opcode].cflow;
6238 if (follow_const_jumps && (tbl[i].specific & 16))
6239 cflow = fl_const_jump;
6240 else
6241 cflow &= ~fl_const_jump;
6242 prop[cft_map(tbl[i].opcode)].cflow = cflow;
6243
6244 int uses_fpu = tbl[i].specific & 32;
6245 if (uses_fpu && avoid_fpu)
6246 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6247 else
6248 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6249 }
6250
6251 for (i = 0; nftbl[i].opcode < 65536; i++) {
6252 int uses_fpu = tbl[i].specific & 32;
6253 if (uses_fpu && avoid_fpu)
6254 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6255 else
6256 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6257
6258 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6259 }
6260
6261 for (i = 0; nfctbl[i].handler; i++) {
6262 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6263 }
6264
6265 for (opcode = 0; opcode < 65536; opcode++) {
6266 compop_func *f;
6267 compop_func *nff;
6268 cpuop_func *nfcf;
6269 int isaddx,cflow;
6270
6271 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6272 continue;
6273
6274 if (table68k[opcode].handler != -1) {
6275 f = compfunctbl[cft_map(table68k[opcode].handler)];
6276 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6277 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6278 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6279 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6280 prop[cft_map(opcode)].cflow = cflow;
6281 prop[cft_map(opcode)].is_addx = isaddx;
6282 compfunctbl[cft_map(opcode)] = f;
6283 nfcompfunctbl[cft_map(opcode)] = nff;
6284 Dif (nfcf == op_illg_1)
6285 abort();
6286 nfcpufunctbl[cft_map(opcode)] = nfcf;
6287 }
6288 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6289 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6290 /* Unconditional jumps don't evaluate condition codes, so they
6291 * don't actually use any flags themselves */
6292 if (prop[cft_map(opcode)].cflow & fl_const_jump)
6293 prop[cft_map(opcode)].use_flags = 0;
6294 }
6295 for (i = 0; nfctbl[i].handler != NULL; i++) {
6296 if (nfctbl[i].specific)
6297 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6298 }
6299
6300 /* Merge in blacklist */
6301 if (!merge_blacklist())
6302 write_log("<JIT compiler> : blacklist merge failure!\n");
6303
6304 count=0;
6305 for (opcode = 0; opcode < 65536; opcode++) {
6306 if (compfunctbl[cft_map(opcode)])
6307 count++;
6308 }
6309 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6310
6311 /* Initialise state */
6312 create_popalls();
6313 alloc_cache();
6314 reset_lists();
6315
6316 for (i=0;i<TAGSIZE;i+=2) {
6317 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6318 cache_tags[i+1].bi=NULL;
6319 }
6320
6321 #if 0
6322 for (i=0;i<N_REGS;i++) {
6323 empty_ss.nat[i].holds=-1;
6324 empty_ss.nat[i].validsize=0;
6325 empty_ss.nat[i].dirtysize=0;
6326 }
6327 #endif
6328 for (i=0;i<VREGS;i++) {
6329 empty_ss.virt[i]=L_NEEDED;
6330 }
6331 for (i=0;i<N_REGS;i++) {
6332 empty_ss.nat[i]=L_UNKNOWN;
6333 }
6334 default_ss=empty_ss;
6335 }
6336
6337
6338 static void flush_icache_none(int n)
6339 {
6340 /* Nothing to do. */
6341 }
6342
6343 static void flush_icache_hard(int n)
6344 {
6345 uae_u32 i;
6346 blockinfo* bi, *dbi;
6347
6348 hard_flush_count++;
6349 #if 0
6350 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6351 n,regs.pc,regs.pc_p,current_cache_size/1024);
6352 current_cache_size = 0;
6353 #endif
6354 bi=active;
6355 while(bi) {
6356 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6357 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6358 dbi=bi; bi=bi->next;
6359 free_blockinfo(dbi);
6360 }
6361 bi=dormant;
6362 while(bi) {
6363 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6364 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6365 dbi=bi; bi=bi->next;
6366 free_blockinfo(dbi);
6367 }
6368
6369 reset_lists();
6370 if (!compiled_code)
6371 return;
6372 current_compile_p=compiled_code;
6373 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6374 }
6375
6376
6377 /* "Soft flushing" --- instead of actually throwing everything away,
6378 we simply mark everything as "needs to be checked".
6379 */
6380
6381 static inline void flush_icache_lazy(int n)
6382 {
6383 uae_u32 i;
6384 blockinfo* bi;
6385 blockinfo* bi2;
6386
6387 soft_flush_count++;
6388 if (!active)
6389 return;
6390
6391 bi=active;
6392 while (bi) {
6393 uae_u32 cl=cacheline(bi->pc_p);
6394 if (bi->status==BI_INVALID ||
6395 bi->status==BI_NEED_RECOMP) {
6396 if (bi==cache_tags[cl+1].bi)
6397 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6398 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6399 set_dhtu(bi,bi->direct_pen);
6400 bi->status=BI_INVALID;
6401 }
6402 else {
6403 if (bi==cache_tags[cl+1].bi)
6404 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6405 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6406 set_dhtu(bi,bi->direct_pcc);
6407 bi->status=BI_NEED_CHECK;
6408 }
6409 bi2=bi;
6410 bi=bi->next;
6411 }
6412 /* bi2 is now the last entry in the active list */
6413 bi2->next=dormant;
6414 if (dormant)
6415 dormant->prev_p=&(bi2->next);
6416
6417 dormant=active;
6418 active->prev_p=&dormant;
6419 active=NULL;
6420 }
6421
6422 void flush_icache_range(uae_u32 start, uae_u32 length)
6423 {
6424 if (!active)
6425 return;
6426
6427 #if LAZY_FLUSH_ICACHE_RANGE
6428 uae_u8 *start_p = get_real_address(start);
6429 blockinfo *bi = active;
6430 while (bi) {
6431 #if USE_CHECKSUM_INFO
6432 bool invalidate = false;
6433 for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6434 invalidate = (((start_p - csi->start_p) < csi->length) ||
6435 ((csi->start_p - start_p) < length));
6436 #else
6437 // Assume system is consistent and would invalidate the right range
6438 const bool invalidate = (bi->pc_p - start_p) < length;
6439 #endif
6440 if (invalidate) {
6441 uae_u32 cl = cacheline(bi->pc_p);
6442 if (bi == cache_tags[cl + 1].bi)
6443 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6444 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6445 set_dhtu(bi, bi->direct_pen);
6446 bi->status = BI_NEED_RECOMP;
6447 }
6448 bi = bi->next;
6449 }
6450 return;
6451 #endif
6452 flush_icache(-1);
6453 }
6454
6455 static void catastrophe(void)
6456 {
6457 abort();
6458 }
6459
6460 int failure;
6461
6462 #define TARGET_M68K 0
6463 #define TARGET_POWERPC 1
6464 #define TARGET_X86 2
6465 #define TARGET_X86_64 3
6466 #if defined(i386) || defined(__i386__)
6467 #define TARGET_NATIVE TARGET_X86
6468 #endif
6469 #if defined(powerpc) || defined(__powerpc__)
6470 #define TARGET_NATIVE TARGET_POWERPC
6471 #endif
6472 #if defined(x86_64) || defined(__x86_64__)
6473 #define TARGET_NATIVE TARGET_X86_64
6474 #endif
6475
6476 #ifdef ENABLE_MON
6477 static uae_u32 mon_read_byte_jit(uintptr addr)
6478 {
6479 uae_u8 *m = (uae_u8 *)addr;
6480 return (uintptr)(*m);
6481 }
6482
6483 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6484 {
6485 uae_u8 *m = (uae_u8 *)addr;
6486 *m = b;
6487 }
6488 #endif
6489
6490 void disasm_block(int target, uint8 * start, size_t length)
6491 {
6492 if (!JITDebug)
6493 return;
6494
6495 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6496 char disasm_str[200];
6497 sprintf(disasm_str, "%s $%x $%x",
6498 target == TARGET_M68K ? "d68" :
6499 target == TARGET_X86 ? "d86" :
6500 target == TARGET_X86_64 ? "d8664" :
6501 target == TARGET_POWERPC ? "d" : "x",
6502 start, start + length - 1);
6503
6504 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6505 void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6506
6507 mon_read_byte = mon_read_byte_jit;
6508 mon_write_byte = mon_write_byte_jit;
6509
6510 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6511 mon(4, arg);
6512
6513 mon_read_byte = old_mon_read_byte;
6514 mon_write_byte = old_mon_write_byte;
6515 #endif
6516 }
6517
6518 static void disasm_native_block(uint8 *start, size_t length)
6519 {
6520 disasm_block(TARGET_NATIVE, start, length);
6521 }
6522
6523 static void disasm_m68k_block(uint8 *start, size_t length)
6524 {
6525 disasm_block(TARGET_M68K, start, length);
6526 }
6527
6528 #ifdef HAVE_GET_WORD_UNSWAPPED
6529 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6530 #else
6531 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6532 #endif
6533
6534 #if JIT_DEBUG
6535 static uae_u8 *last_regs_pc_p = 0;
6536 static uae_u8 *last_compiled_block_addr = 0;
6537
6538 void compiler_dumpstate(void)
6539 {
6540 if (!JITDebug)
6541 return;
6542
6543 write_log("### Host addresses\n");
6544 write_log("MEM_BASE : %x\n", MEMBaseDiff);
6545 write_log("PC_P : %p\n", &regs.pc_p);
6546 write_log("SPCFLAGS : %p\n", &regs.spcflags);
6547 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6548 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6549 write_log("\n");
6550
6551 write_log("### M68k processor state\n");
6552 m68k_dumpstate(0);
6553 write_log("\n");
6554
6555 write_log("### Block in Mac address space\n");
6556 write_log("M68K block : %p\n",
6557 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6558 write_log("Native block : %p (%d bytes)\n",
6559 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6560 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6561 write_log("\n");
6562 }
6563 #endif
6564
6565 static void compile_block(cpu_history* pc_hist, int blocklen)
6566 {
6567 if (letit && compiled_code) {
6568 #if PROFILE_COMPILE_TIME
6569 compile_count++;
6570 clock_t start_time = clock();
6571 #endif
6572 #if JIT_DEBUG
6573 bool disasm_block = false;
6574 #endif
6575
6576 /* OK, here we need to 'compile' a block */
6577 int i;
6578 int r;
6579 int was_comp=0;
6580 uae_u8 liveflags[MAXRUN+1];
6581 #if USE_CHECKSUM_INFO
6582 bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6583 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6584 uintptr min_pcp=max_pcp;
6585 #else
6586 uintptr max_pcp=(uintptr)pc_hist[0].location;
6587 uintptr min_pcp=max_pcp;
6588 #endif
6589 uae_u32 cl=cacheline(pc_hist[0].location);
6590 void* specflags=(void*)&regs.spcflags;
6591 blockinfo* bi=NULL;
6592 blockinfo* bi2;
6593 int extra_len=0;
6594
6595 redo_current_block=0;
6596 if (current_compile_p>=max_compile_start)
6597 flush_icache_hard(7);
6598
6599 alloc_blockinfos();
6600
6601 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6602 bi2=get_blockinfo(cl);
6603
6604 optlev=bi->optlevel;
6605 if (bi->status!=BI_INVALID) {
6606 Dif (bi!=bi2) {
6607 /* I don't think it can happen anymore. Shouldn't, in
6608 any case. So let's make sure... */
6609 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6610 bi->count,bi->optlevel,bi->handler_to_use,
6611 cache_tags[cl].handler);
6612 abort();
6613 }
6614
6615 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6616 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6617 /* What the heck? We are not supposed to be here! */
6618 abort();
6619 }
6620 }
6621 if (bi->count==-1) {
6622 optlev++;
6623 while (!optcount[optlev])
6624 optlev++;
6625 bi->count=optcount[optlev]-1;
6626 }
6627 current_block_pc_p=(uintptr)pc_hist[0].location;
6628
6629 remove_deps(bi); /* We are about to create new code */
6630 bi->optlevel=optlev;
6631 bi->pc_p=(uae_u8*)pc_hist[0].location;
6632 #if USE_CHECKSUM_INFO
6633 free_checksum_info_chain(bi->csi);
6634 bi->csi = NULL;
6635 #endif
6636
6637 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6638 i=blocklen;
6639 while (i--) {
6640 uae_u16* currpcp=pc_hist[i].location;
6641 uae_u32 op=DO_GET_OPCODE(currpcp);
6642
6643 #if USE_CHECKSUM_INFO
6644 trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6645 if (follow_const_jumps && is_const_jump(op)) {
6646 checksum_info *csi = alloc_checksum_info();
6647 csi->start_p = (uae_u8 *)min_pcp;
6648 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6649 csi->next = bi->csi;
6650 bi->csi = csi;
6651 max_pcp = (uintptr)currpcp;
6652 }
6653 min_pcp = (uintptr)currpcp;
6654 #else
6655 if ((uintptr)currpcp<min_pcp)
6656 min_pcp=(uintptr)currpcp;
6657 if ((uintptr)currpcp>max_pcp)
6658 max_pcp=(uintptr)currpcp;
6659 #endif
6660
6661 liveflags[i]=((liveflags[i+1]&
6662 (~prop[op].set_flags))|
6663 prop[op].use_flags);
6664 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6665 liveflags[i]&= ~FLAG_Z;
6666 }
6667
6668 #if USE_CHECKSUM_INFO
6669 checksum_info *csi = alloc_checksum_info();
6670 csi->start_p = (uae_u8 *)min_pcp;
6671 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6672 csi->next = bi->csi;
6673 bi->csi = csi;
6674 #endif
6675
6676 bi->needed_flags=liveflags[0];
6677
6678 align_target(align_loops);
6679 was_comp=0;
6680
6681 bi->direct_handler=(cpuop_func *)get_target();
6682 set_dhtu(bi,bi->direct_handler);
6683 bi->status=BI_COMPILING;
6684 current_block_start_target=(uintptr)get_target();
6685
6686 log_startblock();
6687
6688 if (bi->count>=0) { /* Need to generate countdown code */
6689 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6690 raw_sub_l_mi((uintptr)&(bi->count),1);
6691 raw_jl((uintptr)popall_recompile_block);
6692 }
6693 if (optlev==0) { /* No need to actually translate */
6694 /* Execute normally without keeping stats */
6695 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6696 raw_jmp((uintptr)popall_exec_nostats);
6697 }
6698 else {
6699 reg_alloc_run=0;
6700 next_pc_p=0;
6701 taken_pc_p=0;
6702 branch_cc=0;
6703
6704 comp_pc_p=(uae_u8*)pc_hist[0].location;
6705 init_comp();
6706 was_comp=1;
6707
6708 #ifdef USE_CPU_EMUL_SERVICES
6709 raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
6710 raw_jcc_b_oponly(NATIVE_CC_GT);
6711 uae_s8 *branchadd=(uae_s8*)get_target();
6712 emit_byte(0);
6713 raw_call((uintptr)cpu_do_check_ticks);
6714 *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
6715 #endif
6716
6717 #if JIT_DEBUG
6718 if (JITDebug) {
6719 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6720 raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6721 }
6722 #endif
6723
6724 for (i=0;i<blocklen &&
6725 get_target_noopt()<max_compile_start;i++) {
6726 cpuop_func **cputbl;
6727 compop_func **comptbl;
6728 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6729 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6730 if (!needed_flags) {
6731 cputbl=nfcpufunctbl;
6732 comptbl=nfcompfunctbl;
6733 }
6734 else {
6735 cputbl=cpufunctbl;
6736 comptbl=compfunctbl;
6737 }
6738
6739 #if FLIGHT_RECORDER
6740 {
6741 mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
6742 clobber_flags();
6743 remove_all_offsets();
6744 int arg = readreg_specific(S1,4,REG_PAR1);
6745 prepare_for_call_1();
6746 unlock2(arg);
6747 prepare_for_call_2();
6748 raw_call((uintptr)m68k_record_step);
6749 }
6750 #endif
6751
6752 failure = 1; // gb-- defaults to failure state
6753 if (comptbl[opcode] && optlev>1) {
6754 failure=0;
6755 if (!was_comp) {
6756 comp_pc_p=(uae_u8*)pc_hist[i].location;
6757 init_comp();
6758 }
6759 was_comp=1;
6760
6761 comptbl[opcode](opcode);
6762 freescratch();
6763 if (!(liveflags[i+1] & FLAG_CZNV)) {
6764 /* We can forget about flags */
6765 dont_care_flags();
6766 }
6767 #if INDIVIDUAL_INST
6768 flush(1);
6769 nop();
6770 flush(1);
6771 was_comp=0;
6772 #endif
6773 }
6774
6775 if (failure) {
6776 if (was_comp) {
6777 flush(1);
6778 was_comp=0;
6779 }
6780 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6781 #if USE_NORMAL_CALLING_CONVENTION
6782 raw_push_l_r(REG_PAR1);
6783 #endif
6784 raw_mov_l_mi((uintptr)&regs.pc_p,
6785 (uintptr)pc_hist[i].location);
6786 raw_call((uintptr)cputbl[opcode]);
6787 #if PROFILE_UNTRANSLATED_INSNS
6788 // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6789 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6790 #endif
6791 #if USE_NORMAL_CALLING_CONVENTION
6792 raw_inc_sp(4);
6793 #endif
6794
6795 if (i < blocklen - 1) {
6796 uae_s8* branchadd;
6797
6798 raw_mov_l_rm(0,(uintptr)specflags);
6799 raw_test_l_rr(0,0);
6800 raw_jz_b_oponly();
6801 branchadd=(uae_s8 *)get_target();
6802 emit_byte(0);
6803 raw_jmp((uintptr)popall_do_nothing);
6804 *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6805 }
6806 }
6807 }
6808 #if 1 /* This isn't completely kosher yet; It really needs to be
6809 be integrated into a general inter-block-dependency scheme */
6810 if (next_pc_p && taken_pc_p &&
6811 was_comp && taken_pc_p==current_block_pc_p) {
6812 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6813 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6814 uae_u8 x=bi1->needed_flags;
6815
6816 if (x==0xff || 1) { /* To be on the safe side */
6817 uae_u16* next=(uae_u16*)next_pc_p;
6818 uae_u32 op=DO_GET_OPCODE(next);
6819
6820 x=0x1f;
6821 x&=(~prop[op].set_flags);
6822 x|=prop[op].use_flags;
6823 }
6824
6825 x|=bi2->needed_flags;
6826 if (!(x & FLAG_CZNV)) {
6827 /* We can forget about flags */
6828 dont_care_flags();
6829 extra_len+=2; /* The next instruction now is part of this
6830 block */
6831 }
6832
6833 }
6834 #endif
6835 log_flush();
6836
6837 if (next_pc_p) { /* A branch was registered */
6838 uintptr t1=next_pc_p;
6839 uintptr t2=taken_pc_p;
6840 int cc=branch_cc;
6841
6842 uae_u32* branchadd;
6843 uae_u32* tba;
6844 bigstate tmp;
6845 blockinfo* tbi;
6846
6847 if (taken_pc_p<next_pc_p) {
6848 /* backward branch. Optimize for the "taken" case ---
6849 which means the raw_jcc should fall through when
6850 the 68k branch is taken. */
6851 t1=taken_pc_p;
6852 t2=next_pc_p;
6853 cc=branch_cc^1;
6854 }
6855
6856 tmp=live; /* ouch! This is big... */
6857 raw_jcc_l_oponly(cc);
6858 branchadd=(uae_u32*)get_target();
6859 emit_long(0);
6860
6861 /* predicted outcome */
6862 tbi=get_blockinfo_addr_new((void*)t1,1);
6863 match_states(tbi);
6864 raw_cmp_l_mi((uintptr)specflags,0);
6865 raw_jcc_l_oponly(4);
6866 tba=(uae_u32*)get_target();
6867 emit_long(get_handler(t1)-((uintptr)tba+4));
6868 raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6869 flush_reg_count();
6870 raw_jmp((uintptr)popall_do_nothing);
6871 create_jmpdep(bi,0,tba,t1);
6872
6873 align_target(align_jumps);
6874 /* not-predicted outcome */
6875 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6876 live=tmp; /* Ouch again */
6877 tbi=get_blockinfo_addr_new((void*)t2,1);
6878 match_states(tbi);
6879
6880 //flush(1); /* Can only get here if was_comp==1 */
6881 raw_cmp_l_mi((uintptr)specflags,0);
6882 raw_jcc_l_oponly(4);
6883 tba=(uae_u32*)get_target();
6884 emit_long(get_handler(t2)-((uintptr)tba+4));
6885 raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6886 flush_reg_count();
6887 raw_jmp((uintptr)popall_do_nothing);
6888 create_jmpdep(bi,1,tba,t2);
6889 }
6890 else
6891 {
6892 if (was_comp) {
6893 flush(1);
6894 }
6895 flush_reg_count();
6896
6897 /* Let's find out where next_handler is... */
6898 if (was_comp && isinreg(PC_P)) {
6899 r=live.state[PC_P].realreg;
6900 raw_and_l_ri(r,TAGMASK);
6901 int r2 = (r==0) ? 1 : 0;
6902 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6903 raw_cmp_l_mi((uintptr)specflags,0);
6904 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6905 raw_jmp_r(r2);
6906 }
6907 else if (was_comp && isconst(PC_P)) {
6908 uae_u32 v=live.state[PC_P].val;
6909 uae_u32* tba;
6910 blockinfo* tbi;
6911
6912 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6913 match_states(tbi);
6914
6915 raw_cmp_l_mi((uintptr)specflags,0);
6916 raw_jcc_l_oponly(4);
6917 tba=(uae_u32*)get_target();
6918 emit_long(get_handler(v)-((uintptr)tba+4));
6919 raw_mov_l_mi((uintptr)&regs.pc_p,v);
6920 raw_jmp((uintptr)popall_do_nothing);
6921 create_jmpdep(bi,0,tba,v);
6922 }
6923 else {
6924 r=REG_PC_TMP;
6925 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6926 raw_and_l_ri(r,TAGMASK);
6927 int r2 = (r==0) ? 1 : 0;
6928 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6929 raw_cmp_l_mi((uintptr)specflags,0);
6930 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6931 raw_jmp_r(r2);
6932 }
6933 }
6934 }
6935
6936 #if USE_MATCH
6937 if (callers_need_recompile(&live,&(bi->env))) {
6938 mark_callers_recompile(bi);
6939 }
6940
6941 big_to_small_state(&live,&(bi->env));
6942 #endif
6943
6944 #if USE_CHECKSUM_INFO
6945 remove_from_list(bi);
6946 if (trace_in_rom) {
6947 // No need to checksum that block trace on cache invalidation
6948 free_checksum_info_chain(bi->csi);
6949 bi->csi = NULL;
6950 add_to_dormant(bi);
6951 }
6952 else {
6953 calc_checksum(bi,&(bi->c1),&(bi->c2));
6954 add_to_active(bi);
6955 }
6956 #else
6957 if (next_pc_p+extra_len>=max_pcp &&
6958 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6959 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6960 else
6961 max_pcp+=LONGEST_68K_INST;
6962
6963 bi->len=max_pcp-min_pcp;
6964 bi->min_pcp=min_pcp;
6965
6966 remove_from_list(bi);
6967 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6968 add_to_dormant(bi); /* No need to checksum it on cache flush.
6969 Please don't start changing ROMs in
6970 flight! */
6971 }
6972 else {
6973 calc_checksum(bi,&(bi->c1),&(bi->c2));
6974 add_to_active(bi);
6975 }
6976 #endif
6977
6978 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6979
6980 #if JIT_DEBUG
6981 if (JITDebug)
6982 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6983
6984 if (JITDebug && disasm_block) {
6985 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6986 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6987 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6988 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6989 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6990 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6991 getchar();
6992 }
6993 #endif
6994
6995 log_dump();
6996 align_target(align_jumps);
6997
6998 /* This is the non-direct handler */
6999 bi->handler=
7000 bi->handler_to_use=(cpuop_func *)get_target();
7001 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
7002 raw_jnz((uintptr)popall_cache_miss);
7003 comp_pc_p=(uae_u8*)pc_hist[0].location;
7004
7005 bi->status=BI_FINALIZING;
7006 init_comp();
7007 match_states(bi);
7008 flush(1);
7009
7010 raw_jmp((uintptr)bi->direct_handler);
7011
7012 current_compile_p=get_target();
7013 raise_in_cl_list(bi);
7014
7015 /* We will flush soon, anyway, so let's do it now */
7016 if (current_compile_p>=max_compile_start)
7017 flush_icache_hard(7);
7018
7019 bi->status=BI_ACTIVE;
7020 if (redo_current_block)
7021 block_need_recompile(bi);
7022
7023 #if PROFILE_COMPILE_TIME
7024 compile_time += (clock() - start_time);
7025 #endif
7026 }
7027
7028 /* Account for compilation time */
7029 cpu_do_check_ticks();
7030 }
7031
7032 void do_nothing(void)
7033 {
7034 /* What did you expect this to do? */
7035 }
7036
7037 void exec_nostats(void)
7038 {
7039 for (;;) {
7040 uae_u32 opcode = GET_OPCODE;
7041 #if FLIGHT_RECORDER
7042 m68k_record_step(m68k_getpc());
7043 #endif
7044 (*cpufunctbl[opcode])(opcode);
7045 cpu_check_ticks();
7046 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7047 return; /* We will deal with the spcflags in the caller */
7048 }
7049 }
7050 }
7051
7052 void execute_normal(void)
7053 {
7054 if (!check_for_cache_miss()) {
7055 cpu_history pc_hist[MAXRUN];
7056 int blocklen = 0;
7057 #if REAL_ADDRESSING || DIRECT_ADDRESSING
7058 start_pc_p = regs.pc_p;
7059 start_pc = get_virtual_address(regs.pc_p);
7060 #else
7061 start_pc_p = regs.pc_oldp;
7062 start_pc = regs.pc;
7063 #endif
7064 for (;;) { /* Take note: This is the do-it-normal loop */
7065 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7066 uae_u32 opcode = GET_OPCODE;
7067 #if FLIGHT_RECORDER
7068 m68k_record_step(m68k_getpc());
7069 #endif
7070 (*cpufunctbl[opcode])(opcode);
7071 cpu_check_ticks();
7072 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7073 compile_block(pc_hist, blocklen);
7074 return; /* We will deal with the spcflags in the caller */
7075 }
7076 /* No need to check regs.spcflags, because if they were set,
7077 we'd have ended up inside that "if" */
7078 }
7079 }
7080 }
7081
7082 typedef void (*compiled_handler)(void);
7083
7084 static void m68k_do_compile_execute(void)
7085 {
7086 for (;;) {
7087 ((compiled_handler)(pushall_call_handler))();
7088 /* Whenever we return from that, we should check spcflags */
7089 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7090 if (m68k_do_specialties ())
7091 return;
7092 }
7093 }
7094 }
7095
7096 void m68k_compile_execute (void)
7097 {
7098 for (;;) {
7099 if (quit_program)
7100 break;
7101 m68k_do_compile_execute();
7102 }
7103 }