ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.30
Committed: 2005-04-21T09:08:57Z (19 years, 2 months ago) by gbeauche
Branch: MAIN
Changes since 1.29: +1 -0 lines
Log Message:
Recognize lahf_lm from Dual Core Opterons. This enables use of LAHF/SETO
instructions in long mode (64-bit). However, there seems to be another bug
in the JIT preventing it from being fully supported. m68k.h & codegen_x86.h
are easily fixed bug another patch is still needed.

File Contents

# Content
1 /*
2 * compiler/compemu_support.cpp - Core dynamic translation engine
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2005 Christian Bauer
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28 #endif
29
30 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31 #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32 #endif
33
34 /* NOTE: support for AMD64 assumes translation cache and other code
35 * buffers are allocated into a 32-bit address space because (i) B2/JIT
36 * code is not 64-bit clean and (ii) it's faster to resolve branches
37 * that way.
38 */
39 #if !defined(__i386__) && !defined(__x86_64__)
40 #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41 #endif
42
43 #define USE_MATCH 0
44
45 /* kludge for Brian, so he can compile under MSVC++ */
46 #define USE_NORMAL_CALLING_CONVENTION 0
47
48 #ifndef WIN32
49 #include <unistd.h>
50 #include <sys/types.h>
51 #include <sys/mman.h>
52 #endif
53
54 #include <stdlib.h>
55 #include <fcntl.h>
56 #include <errno.h>
57
58 #include "sysdeps.h"
59 #include "cpu_emulation.h"
60 #include "main.h"
61 #include "prefs.h"
62 #include "user_strings.h"
63 #include "vm_alloc.h"
64
65 #include "m68k.h"
66 #include "memory.h"
67 #include "readcpu.h"
68 #include "newcpu.h"
69 #include "comptbl.h"
70 #include "compiler/compemu.h"
71 #include "fpu/fpu.h"
72 #include "fpu/flags.h"
73
74 #define DEBUG 1
75 #include "debug.h"
76
77 #ifdef ENABLE_MON
78 #include "mon.h"
79 #endif
80
81 #ifndef WIN32
82 #define PROFILE_COMPILE_TIME 1
83 #define PROFILE_UNTRANSLATED_INSNS 1
84 #endif
85
86 #if defined(__x86_64__) && 0
87 #define RECORD_REGISTER_USAGE 1
88 #endif
89
90 #ifdef WIN32
91 #undef write_log
92 #define write_log dummy_write_log
93 static void dummy_write_log(const char *, ...) { }
94 #endif
95
96 #if JIT_DEBUG
97 #undef abort
98 #define abort() do { \
99 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100 exit(EXIT_FAILURE); \
101 } while (0)
102 #endif
103
104 #if RECORD_REGISTER_USAGE
105 static uint64 reg_count[16];
106 static int reg_count_local[16];
107
108 static int reg_count_compare(const void *ap, const void *bp)
109 {
110 const int a = *((int *)ap);
111 const int b = *((int *)bp);
112 return reg_count[b] - reg_count[a];
113 }
114 #endif
115
116 #if PROFILE_COMPILE_TIME
117 #include <time.h>
118 static uae_u32 compile_count = 0;
119 static clock_t compile_time = 0;
120 static clock_t emul_start_time = 0;
121 static clock_t emul_end_time = 0;
122 #endif
123
124 #if PROFILE_UNTRANSLATED_INSNS
125 const int untranslated_top_ten = 20;
126 static uae_u32 raw_cputbl_count[65536] = { 0, };
127 static uae_u16 opcode_nums[65536];
128
129 static int untranslated_compfn(const void *e1, const void *e2)
130 {
131 return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132 }
133 #endif
134
135 static compop_func *compfunctbl[65536];
136 static compop_func *nfcompfunctbl[65536];
137 static cpuop_func *nfcpufunctbl[65536];
138 uae_u8* comp_pc_p;
139
140 // From main_unix.cpp
141 extern bool ThirtyThreeBitAddressing;
142
143 // From newcpu.cpp
144 extern bool quit_program;
145
146 // gb-- Extra data for Basilisk II/JIT
147 #if JIT_DEBUG
148 static bool JITDebug = false; // Enable runtime disassemblers through mon?
149 #else
150 const bool JITDebug = false; // Don't use JIT debug mode at all
151 #endif
152
153 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
154 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
155 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
156 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
157 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
158 static bool have_cmov = false; // target has CMOV instructions ?
159 static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
160 static bool have_rat_stall = true; // target has partial register stalls ?
161 const bool tune_alignment = true; // Tune code alignments for running CPU ?
162 const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
163 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
164 static int align_loops = 32; // Align the start of loops
165 static int align_jumps = 32; // Align the start of jumps
166 static int optcount[10] = {
167 10, // How often a block has to be executed before it is translated
168 0, // How often to use naive translation
169 0, 0, 0, 0,
170 -1, -1, -1, -1
171 };
172
173 struct op_properties {
174 uae_u8 use_flags;
175 uae_u8 set_flags;
176 uae_u8 is_addx;
177 uae_u8 cflow;
178 };
179 static op_properties prop[65536];
180
181 static inline int end_block(uae_u32 opcode)
182 {
183 return (prop[opcode].cflow & fl_end_block);
184 }
185
186 static inline bool is_const_jump(uae_u32 opcode)
187 {
188 return (prop[opcode].cflow == fl_const_jump);
189 }
190
191 static inline bool may_trap(uae_u32 opcode)
192 {
193 return (prop[opcode].cflow & fl_trap);
194 }
195
196 static inline unsigned int cft_map (unsigned int f)
197 {
198 #ifndef HAVE_GET_WORD_UNSWAPPED
199 return f;
200 #else
201 return ((f >> 8) & 255) | ((f & 255) << 8);
202 #endif
203 }
204
205 uae_u8* start_pc_p;
206 uae_u32 start_pc;
207 uae_u32 current_block_pc_p;
208 static uintptr current_block_start_target;
209 uae_u32 needed_flags;
210 static uintptr next_pc_p;
211 static uintptr taken_pc_p;
212 static int branch_cc;
213 static int redo_current_block;
214
215 int segvcount=0;
216 int soft_flush_count=0;
217 int hard_flush_count=0;
218 int checksum_count=0;
219 static uae_u8* current_compile_p=NULL;
220 static uae_u8* max_compile_start;
221 static uae_u8* compiled_code=NULL;
222 static uae_s32 reg_alloc_run;
223 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
224 static uae_u8* popallspace=NULL;
225
226 void* pushall_call_handler=NULL;
227 static void* popall_do_nothing=NULL;
228 static void* popall_exec_nostats=NULL;
229 static void* popall_execute_normal=NULL;
230 static void* popall_cache_miss=NULL;
231 static void* popall_recompile_block=NULL;
232 static void* popall_check_checksum=NULL;
233
234 /* The 68k only ever executes from even addresses. So right now, we
235 * waste half the entries in this array
236 * UPDATE: We now use those entries to store the start of the linked
237 * lists that we maintain for each hash result.
238 */
239 cacheline cache_tags[TAGSIZE];
240 int letit=0;
241 blockinfo* hold_bi[MAX_HOLD_BI];
242 blockinfo* active;
243 blockinfo* dormant;
244
245 /* 68040 */
246 extern struct cputbl op_smalltbl_0_nf[];
247 extern struct comptbl op_smalltbl_0_comp_nf[];
248 extern struct comptbl op_smalltbl_0_comp_ff[];
249
250 /* 68020 + 68881 */
251 extern struct cputbl op_smalltbl_1_nf[];
252
253 /* 68020 */
254 extern struct cputbl op_smalltbl_2_nf[];
255
256 /* 68010 */
257 extern struct cputbl op_smalltbl_3_nf[];
258
259 /* 68000 */
260 extern struct cputbl op_smalltbl_4_nf[];
261
262 /* 68000 slow but compatible. */
263 extern struct cputbl op_smalltbl_5_nf[];
264
265 static void flush_icache_hard(int n);
266 static void flush_icache_lazy(int n);
267 static void flush_icache_none(int n);
268 void (*flush_icache)(int n) = flush_icache_none;
269
270
271
272 bigstate live;
273 smallstate empty_ss;
274 smallstate default_ss;
275 static int optlev;
276
277 static int writereg(int r, int size);
278 static void unlock2(int r);
279 static void setlock(int r);
280 static int readreg_specific(int r, int size, int spec);
281 static int writereg_specific(int r, int size, int spec);
282 static void prepare_for_call_1(void);
283 static void prepare_for_call_2(void);
284 static void align_target(uae_u32 a);
285
286 static uae_s32 nextused[VREGS];
287
288 uae_u32 m68k_pc_offset;
289
290 /* Some arithmetic ooperations can be optimized away if the operands
291 * are known to be constant. But that's only a good idea when the
292 * side effects they would have on the flags are not important. This
293 * variable indicates whether we need the side effects or not
294 */
295 uae_u32 needflags=0;
296
297 /* Flag handling is complicated.
298 *
299 * x86 instructions create flags, which quite often are exactly what we
300 * want. So at times, the "68k" flags are actually in the x86 flags.
301 *
302 * Then again, sometimes we do x86 instructions that clobber the x86
303 * flags, but don't represent a corresponding m68k instruction. In that
304 * case, we have to save them.
305 *
306 * We used to save them to the stack, but now store them back directly
307 * into the regflags.cznv of the traditional emulation. Thus some odd
308 * names.
309 *
310 * So flags can be in either of two places (used to be three; boy were
311 * things complicated back then!); And either place can contain either
312 * valid flags or invalid trash (and on the stack, there was also the
313 * option of "nothing at all", now gone). A couple of variables keep
314 * track of the respective states.
315 *
316 * To make things worse, we might or might not be interested in the flags.
317 * by default, we are, but a call to dont_care_flags can change that
318 * until the next call to live_flags. If we are not, pretty much whatever
319 * is in the register and/or the native flags is seen as valid.
320 */
321
322 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
323 {
324 return cache_tags[cl+1].bi;
325 }
326
327 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
328 {
329 blockinfo* bi=get_blockinfo(cacheline(addr));
330
331 while (bi) {
332 if (bi->pc_p==addr)
333 return bi;
334 bi=bi->next_same_cl;
335 }
336 return NULL;
337 }
338
339
340 /*******************************************************************
341 * All sorts of list related functions for all of the lists *
342 *******************************************************************/
343
344 static __inline__ void remove_from_cl_list(blockinfo* bi)
345 {
346 uae_u32 cl=cacheline(bi->pc_p);
347
348 if (bi->prev_same_cl_p)
349 *(bi->prev_same_cl_p)=bi->next_same_cl;
350 if (bi->next_same_cl)
351 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
352 if (cache_tags[cl+1].bi)
353 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
354 else
355 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
356 }
357
358 static __inline__ void remove_from_list(blockinfo* bi)
359 {
360 if (bi->prev_p)
361 *(bi->prev_p)=bi->next;
362 if (bi->next)
363 bi->next->prev_p=bi->prev_p;
364 }
365
366 static __inline__ void remove_from_lists(blockinfo* bi)
367 {
368 remove_from_list(bi);
369 remove_from_cl_list(bi);
370 }
371
372 static __inline__ void add_to_cl_list(blockinfo* bi)
373 {
374 uae_u32 cl=cacheline(bi->pc_p);
375
376 if (cache_tags[cl+1].bi)
377 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
378 bi->next_same_cl=cache_tags[cl+1].bi;
379
380 cache_tags[cl+1].bi=bi;
381 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
382
383 cache_tags[cl].handler=bi->handler_to_use;
384 }
385
386 static __inline__ void raise_in_cl_list(blockinfo* bi)
387 {
388 remove_from_cl_list(bi);
389 add_to_cl_list(bi);
390 }
391
392 static __inline__ void add_to_active(blockinfo* bi)
393 {
394 if (active)
395 active->prev_p=&(bi->next);
396 bi->next=active;
397
398 active=bi;
399 bi->prev_p=&active;
400 }
401
402 static __inline__ void add_to_dormant(blockinfo* bi)
403 {
404 if (dormant)
405 dormant->prev_p=&(bi->next);
406 bi->next=dormant;
407
408 dormant=bi;
409 bi->prev_p=&dormant;
410 }
411
412 static __inline__ void remove_dep(dependency* d)
413 {
414 if (d->prev_p)
415 *(d->prev_p)=d->next;
416 if (d->next)
417 d->next->prev_p=d->prev_p;
418 d->prev_p=NULL;
419 d->next=NULL;
420 }
421
422 /* This block's code is about to be thrown away, so it no longer
423 depends on anything else */
424 static __inline__ void remove_deps(blockinfo* bi)
425 {
426 remove_dep(&(bi->dep[0]));
427 remove_dep(&(bi->dep[1]));
428 }
429
430 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
431 {
432 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
433 }
434
435 /********************************************************************
436 * Soft flush handling support functions *
437 ********************************************************************/
438
439 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
440 {
441 //write_log("bi is %p\n",bi);
442 if (dh!=bi->direct_handler_to_use) {
443 dependency* x=bi->deplist;
444 //write_log("bi->deplist=%p\n",bi->deplist);
445 while (x) {
446 //write_log("x is %p\n",x);
447 //write_log("x->next is %p\n",x->next);
448 //write_log("x->prev_p is %p\n",x->prev_p);
449
450 if (x->jmp_off) {
451 adjust_jmpdep(x,dh);
452 }
453 x=x->next;
454 }
455 bi->direct_handler_to_use=dh;
456 }
457 }
458
459 static __inline__ void invalidate_block(blockinfo* bi)
460 {
461 int i;
462
463 bi->optlevel=0;
464 bi->count=optcount[0]-1;
465 bi->handler=NULL;
466 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
467 bi->direct_handler=NULL;
468 set_dhtu(bi,bi->direct_pen);
469 bi->needed_flags=0xff;
470 bi->status=BI_INVALID;
471 for (i=0;i<2;i++) {
472 bi->dep[i].jmp_off=NULL;
473 bi->dep[i].target=NULL;
474 }
475 remove_deps(bi);
476 }
477
478 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
479 {
480 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
481
482 Dif(!tbi) {
483 write_log("Could not create jmpdep!\n");
484 abort();
485 }
486 bi->dep[i].jmp_off=jmpaddr;
487 bi->dep[i].source=bi;
488 bi->dep[i].target=tbi;
489 bi->dep[i].next=tbi->deplist;
490 if (bi->dep[i].next)
491 bi->dep[i].next->prev_p=&(bi->dep[i].next);
492 bi->dep[i].prev_p=&(tbi->deplist);
493 tbi->deplist=&(bi->dep[i]);
494 }
495
496 static __inline__ void block_need_recompile(blockinfo * bi)
497 {
498 uae_u32 cl = cacheline(bi->pc_p);
499
500 set_dhtu(bi, bi->direct_pen);
501 bi->direct_handler = bi->direct_pen;
502
503 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
504 bi->handler = (cpuop_func *)popall_execute_normal;
505 if (bi == cache_tags[cl + 1].bi)
506 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
507 bi->status = BI_NEED_RECOMP;
508 }
509
510 static __inline__ void mark_callers_recompile(blockinfo * bi)
511 {
512 dependency *x = bi->deplist;
513
514 while (x) {
515 dependency *next = x->next; /* This disappears when we mark for
516 * recompilation and thus remove the
517 * blocks from the lists */
518 if (x->jmp_off) {
519 blockinfo *cbi = x->source;
520
521 Dif(cbi->status == BI_INVALID) {
522 // write_log("invalid block in dependency list\n"); // FIXME?
523 // abort();
524 }
525 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
526 block_need_recompile(cbi);
527 mark_callers_recompile(cbi);
528 }
529 else if (cbi->status == BI_COMPILING) {
530 redo_current_block = 1;
531 }
532 else if (cbi->status == BI_NEED_RECOMP) {
533 /* nothing */
534 }
535 else {
536 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
537 }
538 }
539 x = next;
540 }
541 }
542
543 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
544 {
545 blockinfo* bi=get_blockinfo_addr(addr);
546 int i;
547
548 if (!bi) {
549 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
550 if (hold_bi[i]) {
551 uae_u32 cl=cacheline(addr);
552
553 bi=hold_bi[i];
554 hold_bi[i]=NULL;
555 bi->pc_p=(uae_u8 *)addr;
556 invalidate_block(bi);
557 add_to_active(bi);
558 add_to_cl_list(bi);
559
560 }
561 }
562 }
563 if (!bi) {
564 write_log("Looking for blockinfo, can't find free one\n");
565 abort();
566 }
567 return bi;
568 }
569
570 static void prepare_block(blockinfo* bi);
571
572 /* Managment of blockinfos.
573
574 A blockinfo struct is allocated whenever a new block has to be
575 compiled. If the list of free blockinfos is empty, we allocate a new
576 pool of blockinfos and link the newly created blockinfos altogether
577 into the list of free blockinfos. Otherwise, we simply pop a structure
578 off the free list.
579
580 Blockinfo are lazily deallocated, i.e. chained altogether in the
581 list of free blockinfos whenvever a translation cache flush (hard or
582 soft) request occurs.
583 */
584
585 template< class T >
586 class LazyBlockAllocator
587 {
588 enum {
589 kPoolSize = 1 + 4096 / sizeof(T)
590 };
591 struct Pool {
592 T chunk[kPoolSize];
593 Pool * next;
594 };
595 Pool * mPools;
596 T * mChunks;
597 public:
598 LazyBlockAllocator() : mPools(0), mChunks(0) { }
599 ~LazyBlockAllocator();
600 T * acquire();
601 void release(T * const);
602 };
603
604 template< class T >
605 LazyBlockAllocator<T>::~LazyBlockAllocator()
606 {
607 Pool * currentPool = mPools;
608 while (currentPool) {
609 Pool * deadPool = currentPool;
610 currentPool = currentPool->next;
611 free(deadPool);
612 }
613 }
614
615 template< class T >
616 T * LazyBlockAllocator<T>::acquire()
617 {
618 if (!mChunks) {
619 // There is no chunk left, allocate a new pool and link the
620 // chunks into the free list
621 Pool * newPool = (Pool *)malloc(sizeof(Pool));
622 for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
623 chunk->next = mChunks;
624 mChunks = chunk;
625 }
626 newPool->next = mPools;
627 mPools = newPool;
628 }
629 T * chunk = mChunks;
630 mChunks = chunk->next;
631 return chunk;
632 }
633
634 template< class T >
635 void LazyBlockAllocator<T>::release(T * const chunk)
636 {
637 chunk->next = mChunks;
638 mChunks = chunk;
639 }
640
641 template< class T >
642 class HardBlockAllocator
643 {
644 public:
645 T * acquire() {
646 T * data = (T *)current_compile_p;
647 current_compile_p += sizeof(T);
648 return data;
649 }
650
651 void release(T * const chunk) {
652 // Deallocated on invalidation
653 }
654 };
655
656 #if USE_SEPARATE_BIA
657 static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
658 static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
659 #else
660 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
661 static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
662 #endif
663
664 static __inline__ checksum_info *alloc_checksum_info(void)
665 {
666 checksum_info *csi = ChecksumInfoAllocator.acquire();
667 csi->next = NULL;
668 return csi;
669 }
670
671 static __inline__ void free_checksum_info(checksum_info *csi)
672 {
673 csi->next = NULL;
674 ChecksumInfoAllocator.release(csi);
675 }
676
677 static __inline__ void free_checksum_info_chain(checksum_info *csi)
678 {
679 while (csi != NULL) {
680 checksum_info *csi2 = csi->next;
681 free_checksum_info(csi);
682 csi = csi2;
683 }
684 }
685
686 static __inline__ blockinfo *alloc_blockinfo(void)
687 {
688 blockinfo *bi = BlockInfoAllocator.acquire();
689 #if USE_CHECKSUM_INFO
690 bi->csi = NULL;
691 #endif
692 return bi;
693 }
694
695 static __inline__ void free_blockinfo(blockinfo *bi)
696 {
697 #if USE_CHECKSUM_INFO
698 free_checksum_info_chain(bi->csi);
699 bi->csi = NULL;
700 #endif
701 BlockInfoAllocator.release(bi);
702 }
703
704 static __inline__ void alloc_blockinfos(void)
705 {
706 int i;
707 blockinfo* bi;
708
709 for (i=0;i<MAX_HOLD_BI;i++) {
710 if (hold_bi[i])
711 return;
712 bi=hold_bi[i]=alloc_blockinfo();
713 prepare_block(bi);
714 }
715 }
716
717 /********************************************************************
718 * Functions to emit data into memory, and other general support *
719 ********************************************************************/
720
721 static uae_u8* target;
722
723 static void emit_init(void)
724 {
725 }
726
727 static __inline__ void emit_byte(uae_u8 x)
728 {
729 *target++=x;
730 }
731
732 static __inline__ void emit_word(uae_u16 x)
733 {
734 *((uae_u16*)target)=x;
735 target+=2;
736 }
737
738 static __inline__ void emit_long(uae_u32 x)
739 {
740 *((uae_u32*)target)=x;
741 target+=4;
742 }
743
744 static __inline__ void emit_quad(uae_u64 x)
745 {
746 *((uae_u64*)target)=x;
747 target+=8;
748 }
749
750 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
751 {
752 memcpy((uae_u8 *)target,block,blocklen);
753 target+=blocklen;
754 }
755
756 static __inline__ uae_u32 reverse32(uae_u32 v)
757 {
758 #if 1
759 // gb-- We have specialized byteswapping functions, just use them
760 return do_byteswap_32(v);
761 #else
762 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
763 #endif
764 }
765
766 /********************************************************************
767 * Getting the information about the target CPU *
768 ********************************************************************/
769
770 #include "codegen_x86.cpp"
771
772 void set_target(uae_u8* t)
773 {
774 target=t;
775 }
776
777 static __inline__ uae_u8* get_target_noopt(void)
778 {
779 return target;
780 }
781
782 __inline__ uae_u8* get_target(void)
783 {
784 return get_target_noopt();
785 }
786
787
788 /********************************************************************
789 * Flags status handling. EMIT TIME! *
790 ********************************************************************/
791
792 static void bt_l_ri_noclobber(R4 r, IMM i);
793
794 static void make_flags_live_internal(void)
795 {
796 if (live.flags_in_flags==VALID)
797 return;
798 Dif (live.flags_on_stack==TRASH) {
799 write_log("Want flags, got something on stack, but it is TRASH\n");
800 abort();
801 }
802 if (live.flags_on_stack==VALID) {
803 int tmp;
804 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
805 raw_reg_to_flags(tmp);
806 unlock2(tmp);
807
808 live.flags_in_flags=VALID;
809 return;
810 }
811 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
812 live.flags_in_flags,live.flags_on_stack);
813 abort();
814 }
815
816 static void flags_to_stack(void)
817 {
818 if (live.flags_on_stack==VALID)
819 return;
820 if (!live.flags_are_important) {
821 live.flags_on_stack=VALID;
822 return;
823 }
824 Dif (live.flags_in_flags!=VALID)
825 abort();
826 else {
827 int tmp;
828 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
829 raw_flags_to_reg(tmp);
830 unlock2(tmp);
831 }
832 live.flags_on_stack=VALID;
833 }
834
835 static __inline__ void clobber_flags(void)
836 {
837 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
838 flags_to_stack();
839 live.flags_in_flags=TRASH;
840 }
841
842 /* Prepare for leaving the compiled stuff */
843 static __inline__ void flush_flags(void)
844 {
845 flags_to_stack();
846 return;
847 }
848
849 int touchcnt;
850
851 /********************************************************************
852 * Partial register flushing for optimized calls *
853 ********************************************************************/
854
855 struct regusage {
856 uae_u16 rmask;
857 uae_u16 wmask;
858 };
859
860 static inline void ru_set(uae_u16 *mask, int reg)
861 {
862 #if USE_OPTIMIZED_CALLS
863 *mask |= 1 << reg;
864 #endif
865 }
866
867 static inline bool ru_get(const uae_u16 *mask, int reg)
868 {
869 #if USE_OPTIMIZED_CALLS
870 return (*mask & (1 << reg));
871 #else
872 /* Default: instruction reads & write to register */
873 return true;
874 #endif
875 }
876
877 static inline void ru_set_read(regusage *ru, int reg)
878 {
879 ru_set(&ru->rmask, reg);
880 }
881
882 static inline void ru_set_write(regusage *ru, int reg)
883 {
884 ru_set(&ru->wmask, reg);
885 }
886
887 static inline bool ru_read_p(const regusage *ru, int reg)
888 {
889 return ru_get(&ru->rmask, reg);
890 }
891
892 static inline bool ru_write_p(const regusage *ru, int reg)
893 {
894 return ru_get(&ru->wmask, reg);
895 }
896
897 static void ru_fill_ea(regusage *ru, int reg, amodes mode,
898 wordsizes size, int write_mode)
899 {
900 switch (mode) {
901 case Areg:
902 reg += 8;
903 /* fall through */
904 case Dreg:
905 ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
906 break;
907 case Ad16:
908 /* skip displacment */
909 m68k_pc_offset += 2;
910 case Aind:
911 case Aipi:
912 case Apdi:
913 ru_set_read(ru, reg+8);
914 break;
915 case Ad8r:
916 ru_set_read(ru, reg+8);
917 /* fall through */
918 case PC8r: {
919 uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
920 reg = (dp >> 12) & 15;
921 ru_set_read(ru, reg);
922 if (dp & 0x100)
923 m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
924 break;
925 }
926 case PC16:
927 case absw:
928 case imm0:
929 case imm1:
930 m68k_pc_offset += 2;
931 break;
932 case absl:
933 case imm2:
934 m68k_pc_offset += 4;
935 break;
936 case immi:
937 m68k_pc_offset += (size == sz_long) ? 4 : 2;
938 break;
939 }
940 }
941
942 /* TODO: split into a static initialization part and a dynamic one
943 (instructions depending on extension words) */
944 static void ru_fill(regusage *ru, uae_u32 opcode)
945 {
946 m68k_pc_offset += 2;
947
948 /* Default: no register is used or written to */
949 ru->rmask = 0;
950 ru->wmask = 0;
951
952 uae_u32 real_opcode = cft_map(opcode);
953 struct instr *dp = &table68k[real_opcode];
954
955 bool rw_dest = true;
956 bool handled = false;
957
958 /* Handle some instructions specifically */
959 uae_u16 reg, ext;
960 switch (dp->mnemo) {
961 case i_BFCHG:
962 case i_BFCLR:
963 case i_BFEXTS:
964 case i_BFEXTU:
965 case i_BFFFO:
966 case i_BFINS:
967 case i_BFSET:
968 case i_BFTST:
969 ext = comp_get_iword((m68k_pc_offset+=2)-2);
970 if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
971 if (ext & 0x020) ru_set_read(ru, ext & 7);
972 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
973 if (dp->dmode == Dreg)
974 ru_set_read(ru, dp->dreg);
975 switch (dp->mnemo) {
976 case i_BFEXTS:
977 case i_BFEXTU:
978 case i_BFFFO:
979 ru_set_write(ru, (ext >> 12) & 7);
980 break;
981 case i_BFINS:
982 ru_set_read(ru, (ext >> 12) & 7);
983 /* fall through */
984 case i_BFCHG:
985 case i_BFCLR:
986 case i_BSET:
987 if (dp->dmode == Dreg)
988 ru_set_write(ru, dp->dreg);
989 break;
990 }
991 handled = true;
992 rw_dest = false;
993 break;
994
995 case i_BTST:
996 rw_dest = false;
997 break;
998
999 case i_CAS:
1000 {
1001 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1002 int Du = ext & 7;
1003 ru_set_read(ru, Du);
1004 int Dc = (ext >> 6) & 7;
1005 ru_set_read(ru, Dc);
1006 ru_set_write(ru, Dc);
1007 break;
1008 }
1009 case i_CAS2:
1010 {
1011 int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1012 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1013 Rn1 = (ext >> 12) & 15;
1014 Du1 = (ext >> 6) & 7;
1015 Dc1 = ext & 7;
1016 ru_set_read(ru, Rn1);
1017 ru_set_read(ru, Du1);
1018 ru_set_read(ru, Dc1);
1019 ru_set_write(ru, Dc1);
1020 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1021 Rn2 = (ext >> 12) & 15;
1022 Du2 = (ext >> 6) & 7;
1023 Dc2 = ext & 7;
1024 ru_set_read(ru, Rn2);
1025 ru_set_read(ru, Du2);
1026 ru_set_write(ru, Dc2);
1027 break;
1028 }
1029 case i_DIVL: case i_MULL:
1030 m68k_pc_offset += 2;
1031 break;
1032 case i_LEA:
1033 case i_MOVE: case i_MOVEA: case i_MOVE16:
1034 rw_dest = false;
1035 break;
1036 case i_PACK: case i_UNPK:
1037 rw_dest = false;
1038 m68k_pc_offset += 2;
1039 break;
1040 case i_TRAPcc:
1041 m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1042 break;
1043 case i_RTR:
1044 /* do nothing, just for coverage debugging */
1045 break;
1046 /* TODO: handle EXG instruction */
1047 }
1048
1049 /* Handle A-Traps better */
1050 if ((real_opcode & 0xf000) == 0xa000) {
1051 handled = true;
1052 }
1053
1054 /* Handle EmulOps better */
1055 if ((real_opcode & 0xff00) == 0x7100) {
1056 handled = true;
1057 ru->rmask = 0xffff;
1058 ru->wmask = 0;
1059 }
1060
1061 if (dp->suse && !handled)
1062 ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1063
1064 if (dp->duse && !handled)
1065 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1066
1067 if (rw_dest)
1068 ru->rmask |= ru->wmask;
1069
1070 handled = handled || dp->suse || dp->duse;
1071
1072 /* Mark all registers as used/written if the instruction may trap */
1073 if (may_trap(opcode)) {
1074 handled = true;
1075 ru->rmask = 0xffff;
1076 ru->wmask = 0xffff;
1077 }
1078
1079 if (!handled) {
1080 write_log("ru_fill: %04x = { %04x, %04x }\n",
1081 real_opcode, ru->rmask, ru->wmask);
1082 abort();
1083 }
1084 }
1085
1086 /********************************************************************
1087 * register allocation per block logging *
1088 ********************************************************************/
1089
1090 static uae_s8 vstate[VREGS];
1091 static uae_s8 vwritten[VREGS];
1092 static uae_s8 nstate[N_REGS];
1093
1094 #define L_UNKNOWN -127
1095 #define L_UNAVAIL -1
1096 #define L_NEEDED -2
1097 #define L_UNNEEDED -3
1098
1099 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1100 {
1101 int i;
1102
1103 for (i = 0; i < VREGS; i++)
1104 s->virt[i] = vstate[i];
1105 for (i = 0; i < N_REGS; i++)
1106 s->nat[i] = nstate[i];
1107 }
1108
1109 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1110 {
1111 int i;
1112 int reverse = 0;
1113
1114 for (i = 0; i < VREGS; i++) {
1115 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1116 return 1;
1117 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1118 reverse++;
1119 }
1120 for (i = 0; i < N_REGS; i++) {
1121 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1122 return 1;
1123 if (nstate[i] < 0 && s->nat[i] >= 0)
1124 reverse++;
1125 }
1126 if (reverse >= 2 && USE_MATCH)
1127 return 1; /* In this case, it might be worth recompiling the
1128 * callers */
1129 return 0;
1130 }
1131
1132 static __inline__ void log_startblock(void)
1133 {
1134 int i;
1135
1136 for (i = 0; i < VREGS; i++) {
1137 vstate[i] = L_UNKNOWN;
1138 vwritten[i] = 0;
1139 }
1140 for (i = 0; i < N_REGS; i++)
1141 nstate[i] = L_UNKNOWN;
1142 }
1143
1144 /* Using an n-reg for a temp variable */
1145 static __inline__ void log_isused(int n)
1146 {
1147 if (nstate[n] == L_UNKNOWN)
1148 nstate[n] = L_UNAVAIL;
1149 }
1150
1151 static __inline__ void log_visused(int r)
1152 {
1153 if (vstate[r] == L_UNKNOWN)
1154 vstate[r] = L_NEEDED;
1155 }
1156
1157 static __inline__ void do_load_reg(int n, int r)
1158 {
1159 if (r == FLAGTMP)
1160 raw_load_flagreg(n, r);
1161 else if (r == FLAGX)
1162 raw_load_flagx(n, r);
1163 else
1164 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1165 }
1166
1167 static __inline__ void check_load_reg(int n, int r)
1168 {
1169 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1170 }
1171
1172 static __inline__ void log_vwrite(int r)
1173 {
1174 vwritten[r] = 1;
1175 }
1176
1177 /* Using an n-reg to hold a v-reg */
1178 static __inline__ void log_isreg(int n, int r)
1179 {
1180 static int count = 0;
1181
1182 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1183 nstate[n] = r;
1184 else {
1185 do_load_reg(n, r);
1186 if (nstate[n] == L_UNKNOWN)
1187 nstate[n] = L_UNAVAIL;
1188 }
1189 if (vstate[r] == L_UNKNOWN)
1190 vstate[r] = L_NEEDED;
1191 }
1192
1193 static __inline__ void log_clobberreg(int r)
1194 {
1195 if (vstate[r] == L_UNKNOWN)
1196 vstate[r] = L_UNNEEDED;
1197 }
1198
1199 /* This ends all possibility of clever register allocation */
1200
1201 static __inline__ void log_flush(void)
1202 {
1203 int i;
1204
1205 for (i = 0; i < VREGS; i++)
1206 if (vstate[i] == L_UNKNOWN)
1207 vstate[i] = L_NEEDED;
1208 for (i = 0; i < N_REGS; i++)
1209 if (nstate[i] == L_UNKNOWN)
1210 nstate[i] = L_UNAVAIL;
1211 }
1212
1213 static __inline__ void log_dump(void)
1214 {
1215 int i;
1216
1217 return;
1218
1219 write_log("----------------------\n");
1220 for (i = 0; i < N_REGS; i++) {
1221 switch (nstate[i]) {
1222 case L_UNKNOWN:
1223 write_log("Nat %d : UNKNOWN\n", i);
1224 break;
1225 case L_UNAVAIL:
1226 write_log("Nat %d : UNAVAIL\n", i);
1227 break;
1228 default:
1229 write_log("Nat %d : %d\n", i, nstate[i]);
1230 break;
1231 }
1232 }
1233 for (i = 0; i < VREGS; i++) {
1234 if (vstate[i] == L_UNNEEDED)
1235 write_log("Virt %d: UNNEEDED\n", i);
1236 }
1237 }
1238
1239 /********************************************************************
1240 * register status handling. EMIT TIME! *
1241 ********************************************************************/
1242
1243 static __inline__ void set_status(int r, int status)
1244 {
1245 if (status == ISCONST)
1246 log_clobberreg(r);
1247 live.state[r].status=status;
1248 }
1249
1250 static __inline__ int isinreg(int r)
1251 {
1252 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1253 }
1254
1255 static __inline__ void adjust_nreg(int r, uae_u32 val)
1256 {
1257 if (!val)
1258 return;
1259 raw_lea_l_brr(r,r,val);
1260 }
1261
1262 static void tomem(int r)
1263 {
1264 int rr=live.state[r].realreg;
1265
1266 if (isinreg(r)) {
1267 if (live.state[r].val && live.nat[rr].nholds==1
1268 && !live.nat[rr].locked) {
1269 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1270 // live.state[r].val,r,rr,target);
1271 adjust_nreg(rr,live.state[r].val);
1272 live.state[r].val=0;
1273 live.state[r].dirtysize=4;
1274 set_status(r,DIRTY);
1275 }
1276 }
1277
1278 if (live.state[r].status==DIRTY) {
1279 switch (live.state[r].dirtysize) {
1280 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1281 case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1282 case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1283 default: abort();
1284 }
1285 log_vwrite(r);
1286 set_status(r,CLEAN);
1287 live.state[r].dirtysize=0;
1288 }
1289 }
1290
1291 static __inline__ int isconst(int r)
1292 {
1293 return live.state[r].status==ISCONST;
1294 }
1295
1296 int is_const(int r)
1297 {
1298 return isconst(r);
1299 }
1300
1301 static __inline__ void writeback_const(int r)
1302 {
1303 if (!isconst(r))
1304 return;
1305 Dif (live.state[r].needflush==NF_HANDLER) {
1306 write_log("Trying to write back constant NF_HANDLER!\n");
1307 abort();
1308 }
1309
1310 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1311 log_vwrite(r);
1312 live.state[r].val=0;
1313 set_status(r,INMEM);
1314 }
1315
1316 static __inline__ void tomem_c(int r)
1317 {
1318 if (isconst(r)) {
1319 writeback_const(r);
1320 }
1321 else
1322 tomem(r);
1323 }
1324
1325 static void evict(int r)
1326 {
1327 int rr;
1328
1329 if (!isinreg(r))
1330 return;
1331 tomem(r);
1332 rr=live.state[r].realreg;
1333
1334 Dif (live.nat[rr].locked &&
1335 live.nat[rr].nholds==1) {
1336 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1337 abort();
1338 }
1339
1340 live.nat[rr].nholds--;
1341 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1342 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1343 int thisind=live.state[r].realind;
1344
1345 live.nat[rr].holds[thisind]=topreg;
1346 live.state[topreg].realind=thisind;
1347 }
1348 live.state[r].realreg=-1;
1349 set_status(r,INMEM);
1350 }
1351
1352 static __inline__ void free_nreg(int r)
1353 {
1354 int i=live.nat[r].nholds;
1355
1356 while (i) {
1357 int vr;
1358
1359 --i;
1360 vr=live.nat[r].holds[i];
1361 evict(vr);
1362 }
1363 Dif (live.nat[r].nholds!=0) {
1364 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1365 abort();
1366 }
1367 }
1368
1369 /* Use with care! */
1370 static __inline__ void isclean(int r)
1371 {
1372 if (!isinreg(r))
1373 return;
1374 live.state[r].validsize=4;
1375 live.state[r].dirtysize=0;
1376 live.state[r].val=0;
1377 set_status(r,CLEAN);
1378 }
1379
1380 static __inline__ void disassociate(int r)
1381 {
1382 isclean(r);
1383 evict(r);
1384 }
1385
1386 static __inline__ void set_const(int r, uae_u32 val)
1387 {
1388 disassociate(r);
1389 live.state[r].val=val;
1390 set_status(r,ISCONST);
1391 }
1392
1393 static __inline__ uae_u32 get_offset(int r)
1394 {
1395 return live.state[r].val;
1396 }
1397
1398 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1399 {
1400 int bestreg;
1401 uae_s32 when;
1402 int i;
1403 uae_s32 badness=0; /* to shut up gcc */
1404 bestreg=-1;
1405 when=2000000000;
1406
1407 for (i=N_REGS;i--;) {
1408 badness=live.nat[i].touched;
1409 if (live.nat[i].nholds==0)
1410 badness=0;
1411 if (i==hint)
1412 badness-=200000000;
1413 if (!live.nat[i].locked && badness<when) {
1414 if ((size==1 && live.nat[i].canbyte) ||
1415 (size==2 && live.nat[i].canword) ||
1416 (size==4)) {
1417 bestreg=i;
1418 when=badness;
1419 if (live.nat[i].nholds==0 && hint<0)
1420 break;
1421 if (i==hint)
1422 break;
1423 }
1424 }
1425 }
1426 Dif (bestreg==-1)
1427 abort();
1428
1429 if (live.nat[bestreg].nholds>0) {
1430 free_nreg(bestreg);
1431 }
1432 if (isinreg(r)) {
1433 int rr=live.state[r].realreg;
1434 /* This will happen if we read a partially dirty register at a
1435 bigger size */
1436 Dif (willclobber || live.state[r].validsize>=size)
1437 abort();
1438 Dif (live.nat[rr].nholds!=1)
1439 abort();
1440 if (size==4 && live.state[r].validsize==2) {
1441 log_isused(bestreg);
1442 log_visused(r);
1443 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1444 raw_bswap_32(bestreg);
1445 raw_zero_extend_16_rr(rr,rr);
1446 raw_zero_extend_16_rr(bestreg,bestreg);
1447 raw_bswap_32(bestreg);
1448 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1449 live.state[r].validsize=4;
1450 live.nat[rr].touched=touchcnt++;
1451 return rr;
1452 }
1453 if (live.state[r].validsize==1) {
1454 /* Nothing yet */
1455 }
1456 evict(r);
1457 }
1458
1459 if (!willclobber) {
1460 if (live.state[r].status!=UNDEF) {
1461 if (isconst(r)) {
1462 raw_mov_l_ri(bestreg,live.state[r].val);
1463 live.state[r].val=0;
1464 live.state[r].dirtysize=4;
1465 set_status(r,DIRTY);
1466 log_isused(bestreg);
1467 }
1468 else {
1469 log_isreg(bestreg, r); /* This will also load it! */
1470 live.state[r].dirtysize=0;
1471 set_status(r,CLEAN);
1472 }
1473 }
1474 else {
1475 live.state[r].val=0;
1476 live.state[r].dirtysize=0;
1477 set_status(r,CLEAN);
1478 log_isused(bestreg);
1479 }
1480 live.state[r].validsize=4;
1481 }
1482 else { /* this is the easiest way, but not optimal. FIXME! */
1483 /* Now it's trickier, but hopefully still OK */
1484 if (!isconst(r) || size==4) {
1485 live.state[r].validsize=size;
1486 live.state[r].dirtysize=size;
1487 live.state[r].val=0;
1488 set_status(r,DIRTY);
1489 if (size == 4) {
1490 log_clobberreg(r);
1491 log_isused(bestreg);
1492 }
1493 else {
1494 log_visused(r);
1495 log_isused(bestreg);
1496 }
1497 }
1498 else {
1499 if (live.state[r].status!=UNDEF)
1500 raw_mov_l_ri(bestreg,live.state[r].val);
1501 live.state[r].val=0;
1502 live.state[r].validsize=4;
1503 live.state[r].dirtysize=4;
1504 set_status(r,DIRTY);
1505 log_isused(bestreg);
1506 }
1507 }
1508 live.state[r].realreg=bestreg;
1509 live.state[r].realind=live.nat[bestreg].nholds;
1510 live.nat[bestreg].touched=touchcnt++;
1511 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1512 live.nat[bestreg].nholds++;
1513
1514 return bestreg;
1515 }
1516
1517 static int alloc_reg(int r, int size, int willclobber)
1518 {
1519 return alloc_reg_hinted(r,size,willclobber,-1);
1520 }
1521
1522 static void unlock2(int r)
1523 {
1524 Dif (!live.nat[r].locked)
1525 abort();
1526 live.nat[r].locked--;
1527 }
1528
1529 static void setlock(int r)
1530 {
1531 live.nat[r].locked++;
1532 }
1533
1534
1535 static void mov_nregs(int d, int s)
1536 {
1537 int ns=live.nat[s].nholds;
1538 int nd=live.nat[d].nholds;
1539 int i;
1540
1541 if (s==d)
1542 return;
1543
1544 if (nd>0)
1545 free_nreg(d);
1546
1547 log_isused(d);
1548 raw_mov_l_rr(d,s);
1549
1550 for (i=0;i<live.nat[s].nholds;i++) {
1551 int vs=live.nat[s].holds[i];
1552
1553 live.state[vs].realreg=d;
1554 live.state[vs].realind=i;
1555 live.nat[d].holds[i]=vs;
1556 }
1557 live.nat[d].nholds=live.nat[s].nholds;
1558
1559 live.nat[s].nholds=0;
1560 }
1561
1562
1563 static __inline__ void make_exclusive(int r, int size, int spec)
1564 {
1565 int clobber;
1566 reg_status oldstate;
1567 int rr=live.state[r].realreg;
1568 int nr;
1569 int nind;
1570 int ndirt=0;
1571 int i;
1572
1573 if (!isinreg(r))
1574 return;
1575 if (live.nat[rr].nholds==1)
1576 return;
1577 for (i=0;i<live.nat[rr].nholds;i++) {
1578 int vr=live.nat[rr].holds[i];
1579 if (vr!=r &&
1580 (live.state[vr].status==DIRTY || live.state[vr].val))
1581 ndirt++;
1582 }
1583 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1584 /* Everything else is clean, so let's keep this register */
1585 for (i=0;i<live.nat[rr].nholds;i++) {
1586 int vr=live.nat[rr].holds[i];
1587 if (vr!=r) {
1588 evict(vr);
1589 i--; /* Try that index again! */
1590 }
1591 }
1592 Dif (live.nat[rr].nholds!=1) {
1593 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1594 rr,live.nat[rr].nholds,r);
1595 abort();
1596 }
1597 return;
1598 }
1599
1600 /* We have to split the register */
1601 oldstate=live.state[r];
1602
1603 setlock(rr); /* Make sure this doesn't go away */
1604 /* Forget about r being in the register rr */
1605 disassociate(r);
1606 /* Get a new register, that we will clobber completely */
1607 if (oldstate.status==DIRTY) {
1608 /* If dirtysize is <4, we need a register that can handle the
1609 eventual smaller memory store! Thanks to Quake68k for exposing
1610 this detail ;-) */
1611 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1612 }
1613 else {
1614 nr=alloc_reg_hinted(r,4,1,spec);
1615 }
1616 nind=live.state[r].realind;
1617 live.state[r]=oldstate; /* Keep all the old state info */
1618 live.state[r].realreg=nr;
1619 live.state[r].realind=nind;
1620
1621 if (size<live.state[r].validsize) {
1622 if (live.state[r].val) {
1623 /* Might as well compensate for the offset now */
1624 raw_lea_l_brr(nr,rr,oldstate.val);
1625 live.state[r].val=0;
1626 live.state[r].dirtysize=4;
1627 set_status(r,DIRTY);
1628 }
1629 else
1630 raw_mov_l_rr(nr,rr); /* Make another copy */
1631 }
1632 unlock2(rr);
1633 }
1634
1635 static __inline__ void add_offset(int r, uae_u32 off)
1636 {
1637 live.state[r].val+=off;
1638 }
1639
1640 static __inline__ void remove_offset(int r, int spec)
1641 {
1642 reg_status oldstate;
1643 int rr;
1644
1645 if (isconst(r))
1646 return;
1647 if (live.state[r].val==0)
1648 return;
1649 if (isinreg(r) && live.state[r].validsize<4)
1650 evict(r);
1651
1652 if (!isinreg(r))
1653 alloc_reg_hinted(r,4,0,spec);
1654
1655 Dif (live.state[r].validsize!=4) {
1656 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1657 abort();
1658 }
1659 make_exclusive(r,0,-1);
1660 /* make_exclusive might have done the job already */
1661 if (live.state[r].val==0)
1662 return;
1663
1664 rr=live.state[r].realreg;
1665
1666 if (live.nat[rr].nholds==1) {
1667 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1668 // live.state[r].val,r,rr,target);
1669 adjust_nreg(rr,live.state[r].val);
1670 live.state[r].dirtysize=4;
1671 live.state[r].val=0;
1672 set_status(r,DIRTY);
1673 return;
1674 }
1675 write_log("Failed in remove_offset\n");
1676 abort();
1677 }
1678
1679 static __inline__ void remove_all_offsets(void)
1680 {
1681 int i;
1682
1683 for (i=0;i<VREGS;i++)
1684 remove_offset(i,-1);
1685 }
1686
1687 static inline void flush_reg_count(void)
1688 {
1689 #if RECORD_REGISTER_USAGE
1690 for (int r = 0; r < 16; r++)
1691 if (reg_count_local[r])
1692 ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1693 #endif
1694 }
1695
1696 static inline void record_register(int r)
1697 {
1698 #if RECORD_REGISTER_USAGE
1699 if (r < 16)
1700 reg_count_local[r]++;
1701 #endif
1702 }
1703
1704 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1705 {
1706 int n;
1707 int answer=-1;
1708
1709 record_register(r);
1710 if (live.state[r].status==UNDEF) {
1711 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1712 }
1713 if (!can_offset)
1714 remove_offset(r,spec);
1715
1716 if (isinreg(r) && live.state[r].validsize>=size) {
1717 n=live.state[r].realreg;
1718 switch(size) {
1719 case 1:
1720 if (live.nat[n].canbyte || spec>=0) {
1721 answer=n;
1722 }
1723 break;
1724 case 2:
1725 if (live.nat[n].canword || spec>=0) {
1726 answer=n;
1727 }
1728 break;
1729 case 4:
1730 answer=n;
1731 break;
1732 default: abort();
1733 }
1734 if (answer<0)
1735 evict(r);
1736 }
1737 /* either the value was in memory to start with, or it was evicted and
1738 is in memory now */
1739 if (answer<0) {
1740 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1741 }
1742
1743 if (spec>=0 && spec!=answer) {
1744 /* Too bad */
1745 mov_nregs(spec,answer);
1746 answer=spec;
1747 }
1748 live.nat[answer].locked++;
1749 live.nat[answer].touched=touchcnt++;
1750 return answer;
1751 }
1752
1753
1754
1755 static int readreg(int r, int size)
1756 {
1757 return readreg_general(r,size,-1,0);
1758 }
1759
1760 static int readreg_specific(int r, int size, int spec)
1761 {
1762 return readreg_general(r,size,spec,0);
1763 }
1764
1765 static int readreg_offset(int r, int size)
1766 {
1767 return readreg_general(r,size,-1,1);
1768 }
1769
1770 /* writereg_general(r, size, spec)
1771 *
1772 * INPUT
1773 * - r : mid-layer register
1774 * - size : requested size (1/2/4)
1775 * - spec : -1 if find or make a register free, otherwise specifies
1776 * the physical register to use in any case
1777 *
1778 * OUTPUT
1779 * - hard (physical, x86 here) register allocated to virtual register r
1780 */
1781 static __inline__ int writereg_general(int r, int size, int spec)
1782 {
1783 int n;
1784 int answer=-1;
1785
1786 record_register(r);
1787 if (size<4) {
1788 remove_offset(r,spec);
1789 }
1790
1791 make_exclusive(r,size,spec);
1792 if (isinreg(r)) {
1793 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1794 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1795 n=live.state[r].realreg;
1796
1797 Dif (live.nat[n].nholds!=1)
1798 abort();
1799 switch(size) {
1800 case 1:
1801 if (live.nat[n].canbyte || spec>=0) {
1802 live.state[r].dirtysize=ndsize;
1803 live.state[r].validsize=nvsize;
1804 answer=n;
1805 }
1806 break;
1807 case 2:
1808 if (live.nat[n].canword || spec>=0) {
1809 live.state[r].dirtysize=ndsize;
1810 live.state[r].validsize=nvsize;
1811 answer=n;
1812 }
1813 break;
1814 case 4:
1815 live.state[r].dirtysize=ndsize;
1816 live.state[r].validsize=nvsize;
1817 answer=n;
1818 break;
1819 default: abort();
1820 }
1821 if (answer<0)
1822 evict(r);
1823 }
1824 /* either the value was in memory to start with, or it was evicted and
1825 is in memory now */
1826 if (answer<0) {
1827 answer=alloc_reg_hinted(r,size,1,spec);
1828 }
1829 if (spec>=0 && spec!=answer) {
1830 mov_nregs(spec,answer);
1831 answer=spec;
1832 }
1833 if (live.state[r].status==UNDEF)
1834 live.state[r].validsize=4;
1835 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1836 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1837
1838 live.nat[answer].locked++;
1839 live.nat[answer].touched=touchcnt++;
1840 if (size==4) {
1841 live.state[r].val=0;
1842 }
1843 else {
1844 Dif (live.state[r].val) {
1845 write_log("Problem with val\n");
1846 abort();
1847 }
1848 }
1849 set_status(r,DIRTY);
1850 return answer;
1851 }
1852
1853 static int writereg(int r, int size)
1854 {
1855 return writereg_general(r,size,-1);
1856 }
1857
1858 static int writereg_specific(int r, int size, int spec)
1859 {
1860 return writereg_general(r,size,spec);
1861 }
1862
1863 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1864 {
1865 int n;
1866 int answer=-1;
1867
1868 record_register(r);
1869 if (live.state[r].status==UNDEF) {
1870 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1871 }
1872 remove_offset(r,spec);
1873 make_exclusive(r,0,spec);
1874
1875 Dif (wsize<rsize) {
1876 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1877 abort();
1878 }
1879 if (isinreg(r) && live.state[r].validsize>=rsize) {
1880 n=live.state[r].realreg;
1881 Dif (live.nat[n].nholds!=1)
1882 abort();
1883
1884 switch(rsize) {
1885 case 1:
1886 if (live.nat[n].canbyte || spec>=0) {
1887 answer=n;
1888 }
1889 break;
1890 case 2:
1891 if (live.nat[n].canword || spec>=0) {
1892 answer=n;
1893 }
1894 break;
1895 case 4:
1896 answer=n;
1897 break;
1898 default: abort();
1899 }
1900 if (answer<0)
1901 evict(r);
1902 }
1903 /* either the value was in memory to start with, or it was evicted and
1904 is in memory now */
1905 if (answer<0) {
1906 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1907 }
1908
1909 if (spec>=0 && spec!=answer) {
1910 /* Too bad */
1911 mov_nregs(spec,answer);
1912 answer=spec;
1913 }
1914 if (wsize>live.state[r].dirtysize)
1915 live.state[r].dirtysize=wsize;
1916 if (wsize>live.state[r].validsize)
1917 live.state[r].validsize=wsize;
1918 set_status(r,DIRTY);
1919
1920 live.nat[answer].locked++;
1921 live.nat[answer].touched=touchcnt++;
1922
1923 Dif (live.state[r].val) {
1924 write_log("Problem with val(rmw)\n");
1925 abort();
1926 }
1927 return answer;
1928 }
1929
1930 static int rmw(int r, int wsize, int rsize)
1931 {
1932 return rmw_general(r,wsize,rsize,-1);
1933 }
1934
1935 static int rmw_specific(int r, int wsize, int rsize, int spec)
1936 {
1937 return rmw_general(r,wsize,rsize,spec);
1938 }
1939
1940
1941 /* needed for restoring the carry flag on non-P6 cores */
1942 static void bt_l_ri_noclobber(R4 r, IMM i)
1943 {
1944 int size=4;
1945 if (i<16)
1946 size=2;
1947 r=readreg(r,size);
1948 raw_bt_l_ri(r,i);
1949 unlock2(r);
1950 }
1951
1952 /********************************************************************
1953 * FPU register status handling. EMIT TIME! *
1954 ********************************************************************/
1955
1956 static void f_tomem(int r)
1957 {
1958 if (live.fate[r].status==DIRTY) {
1959 #if USE_LONG_DOUBLE
1960 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1961 #else
1962 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1963 #endif
1964 live.fate[r].status=CLEAN;
1965 }
1966 }
1967
1968 static void f_tomem_drop(int r)
1969 {
1970 if (live.fate[r].status==DIRTY) {
1971 #if USE_LONG_DOUBLE
1972 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1973 #else
1974 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1975 #endif
1976 live.fate[r].status=INMEM;
1977 }
1978 }
1979
1980
1981 static __inline__ int f_isinreg(int r)
1982 {
1983 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1984 }
1985
1986 static void f_evict(int r)
1987 {
1988 int rr;
1989
1990 if (!f_isinreg(r))
1991 return;
1992 rr=live.fate[r].realreg;
1993 if (live.fat[rr].nholds==1)
1994 f_tomem_drop(r);
1995 else
1996 f_tomem(r);
1997
1998 Dif (live.fat[rr].locked &&
1999 live.fat[rr].nholds==1) {
2000 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2001 abort();
2002 }
2003
2004 live.fat[rr].nholds--;
2005 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2006 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2007 int thisind=live.fate[r].realind;
2008 live.fat[rr].holds[thisind]=topreg;
2009 live.fate[topreg].realind=thisind;
2010 }
2011 live.fate[r].status=INMEM;
2012 live.fate[r].realreg=-1;
2013 }
2014
2015 static __inline__ void f_free_nreg(int r)
2016 {
2017 int i=live.fat[r].nholds;
2018
2019 while (i) {
2020 int vr;
2021
2022 --i;
2023 vr=live.fat[r].holds[i];
2024 f_evict(vr);
2025 }
2026 Dif (live.fat[r].nholds!=0) {
2027 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2028 abort();
2029 }
2030 }
2031
2032
2033 /* Use with care! */
2034 static __inline__ void f_isclean(int r)
2035 {
2036 if (!f_isinreg(r))
2037 return;
2038 live.fate[r].status=CLEAN;
2039 }
2040
2041 static __inline__ void f_disassociate(int r)
2042 {
2043 f_isclean(r);
2044 f_evict(r);
2045 }
2046
2047
2048
2049 static int f_alloc_reg(int r, int willclobber)
2050 {
2051 int bestreg;
2052 uae_s32 when;
2053 int i;
2054 uae_s32 badness;
2055 bestreg=-1;
2056 when=2000000000;
2057 for (i=N_FREGS;i--;) {
2058 badness=live.fat[i].touched;
2059 if (live.fat[i].nholds==0)
2060 badness=0;
2061
2062 if (!live.fat[i].locked && badness<when) {
2063 bestreg=i;
2064 when=badness;
2065 if (live.fat[i].nholds==0)
2066 break;
2067 }
2068 }
2069 Dif (bestreg==-1)
2070 abort();
2071
2072 if (live.fat[bestreg].nholds>0) {
2073 f_free_nreg(bestreg);
2074 }
2075 if (f_isinreg(r)) {
2076 f_evict(r);
2077 }
2078
2079 if (!willclobber) {
2080 if (live.fate[r].status!=UNDEF) {
2081 #if USE_LONG_DOUBLE
2082 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2083 #else
2084 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2085 #endif
2086 }
2087 live.fate[r].status=CLEAN;
2088 }
2089 else {
2090 live.fate[r].status=DIRTY;
2091 }
2092 live.fate[r].realreg=bestreg;
2093 live.fate[r].realind=live.fat[bestreg].nholds;
2094 live.fat[bestreg].touched=touchcnt++;
2095 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2096 live.fat[bestreg].nholds++;
2097
2098 return bestreg;
2099 }
2100
2101 static void f_unlock(int r)
2102 {
2103 Dif (!live.fat[r].locked)
2104 abort();
2105 live.fat[r].locked--;
2106 }
2107
2108 static void f_setlock(int r)
2109 {
2110 live.fat[r].locked++;
2111 }
2112
2113 static __inline__ int f_readreg(int r)
2114 {
2115 int n;
2116 int answer=-1;
2117
2118 if (f_isinreg(r)) {
2119 n=live.fate[r].realreg;
2120 answer=n;
2121 }
2122 /* either the value was in memory to start with, or it was evicted and
2123 is in memory now */
2124 if (answer<0)
2125 answer=f_alloc_reg(r,0);
2126
2127 live.fat[answer].locked++;
2128 live.fat[answer].touched=touchcnt++;
2129 return answer;
2130 }
2131
2132 static __inline__ void f_make_exclusive(int r, int clobber)
2133 {
2134 freg_status oldstate;
2135 int rr=live.fate[r].realreg;
2136 int nr;
2137 int nind;
2138 int ndirt=0;
2139 int i;
2140
2141 if (!f_isinreg(r))
2142 return;
2143 if (live.fat[rr].nholds==1)
2144 return;
2145 for (i=0;i<live.fat[rr].nholds;i++) {
2146 int vr=live.fat[rr].holds[i];
2147 if (vr!=r && live.fate[vr].status==DIRTY)
2148 ndirt++;
2149 }
2150 if (!ndirt && !live.fat[rr].locked) {
2151 /* Everything else is clean, so let's keep this register */
2152 for (i=0;i<live.fat[rr].nholds;i++) {
2153 int vr=live.fat[rr].holds[i];
2154 if (vr!=r) {
2155 f_evict(vr);
2156 i--; /* Try that index again! */
2157 }
2158 }
2159 Dif (live.fat[rr].nholds!=1) {
2160 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2161 for (i=0;i<live.fat[rr].nholds;i++) {
2162 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2163 live.fate[live.fat[rr].holds[i]].realreg,
2164 live.fate[live.fat[rr].holds[i]].realind);
2165 }
2166 write_log("\n");
2167 abort();
2168 }
2169 return;
2170 }
2171
2172 /* We have to split the register */
2173 oldstate=live.fate[r];
2174
2175 f_setlock(rr); /* Make sure this doesn't go away */
2176 /* Forget about r being in the register rr */
2177 f_disassociate(r);
2178 /* Get a new register, that we will clobber completely */
2179 nr=f_alloc_reg(r,1);
2180 nind=live.fate[r].realind;
2181 if (!clobber)
2182 raw_fmov_rr(nr,rr); /* Make another copy */
2183 live.fate[r]=oldstate; /* Keep all the old state info */
2184 live.fate[r].realreg=nr;
2185 live.fate[r].realind=nind;
2186 f_unlock(rr);
2187 }
2188
2189
2190 static __inline__ int f_writereg(int r)
2191 {
2192 int n;
2193 int answer=-1;
2194
2195 f_make_exclusive(r,1);
2196 if (f_isinreg(r)) {
2197 n=live.fate[r].realreg;
2198 answer=n;
2199 }
2200 if (answer<0) {
2201 answer=f_alloc_reg(r,1);
2202 }
2203 live.fate[r].status=DIRTY;
2204 live.fat[answer].locked++;
2205 live.fat[answer].touched=touchcnt++;
2206 return answer;
2207 }
2208
2209 static int f_rmw(int r)
2210 {
2211 int n;
2212
2213 f_make_exclusive(r,0);
2214 if (f_isinreg(r)) {
2215 n=live.fate[r].realreg;
2216 }
2217 else
2218 n=f_alloc_reg(r,0);
2219 live.fate[r].status=DIRTY;
2220 live.fat[n].locked++;
2221 live.fat[n].touched=touchcnt++;
2222 return n;
2223 }
2224
2225 static void fflags_into_flags_internal(uae_u32 tmp)
2226 {
2227 int r;
2228
2229 clobber_flags();
2230 r=f_readreg(FP_RESULT);
2231 if (FFLAG_NREG_CLOBBER_CONDITION) {
2232 int tmp2=tmp;
2233 tmp=writereg_specific(tmp,4,FFLAG_NREG);
2234 raw_fflags_into_flags(r);
2235 unlock2(tmp);
2236 forget_about(tmp2);
2237 }
2238 else
2239 raw_fflags_into_flags(r);
2240 f_unlock(r);
2241 live_flags();
2242 }
2243
2244
2245
2246
2247 /********************************************************************
2248 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2249 ********************************************************************/
2250
2251 /*
2252 * RULES FOR HANDLING REGISTERS:
2253 *
2254 * * In the function headers, order the parameters
2255 * - 1st registers written to
2256 * - 2nd read/modify/write registers
2257 * - 3rd registers read from
2258 * * Before calling raw_*, you must call readreg, writereg or rmw for
2259 * each register
2260 * * The order for this is
2261 * - 1st call remove_offset for all registers written to with size<4
2262 * - 2nd call readreg for all registers read without offset
2263 * - 3rd call rmw for all rmw registers
2264 * - 4th call readreg_offset for all registers that can handle offsets
2265 * - 5th call get_offset for all the registers from the previous step
2266 * - 6th call writereg for all written-to registers
2267 * - 7th call raw_*
2268 * - 8th unlock2 all registers that were locked
2269 */
2270
2271 MIDFUNC(0,live_flags,(void))
2272 {
2273 live.flags_on_stack=TRASH;
2274 live.flags_in_flags=VALID;
2275 live.flags_are_important=1;
2276 }
2277 MENDFUNC(0,live_flags,(void))
2278
2279 MIDFUNC(0,dont_care_flags,(void))
2280 {
2281 live.flags_are_important=0;
2282 }
2283 MENDFUNC(0,dont_care_flags,(void))
2284
2285
2286 MIDFUNC(0,duplicate_carry,(void))
2287 {
2288 evict(FLAGX);
2289 make_flags_live_internal();
2290 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2291 log_vwrite(FLAGX);
2292 }
2293 MENDFUNC(0,duplicate_carry,(void))
2294
2295 MIDFUNC(0,restore_carry,(void))
2296 {
2297 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2298 bt_l_ri_noclobber(FLAGX,0);
2299 }
2300 else { /* Avoid the stall the above creates.
2301 This is slow on non-P6, though.
2302 */
2303 COMPCALL(rol_b_ri(FLAGX,8));
2304 isclean(FLAGX);
2305 }
2306 }
2307 MENDFUNC(0,restore_carry,(void))
2308
2309 MIDFUNC(0,start_needflags,(void))
2310 {
2311 needflags=1;
2312 }
2313 MENDFUNC(0,start_needflags,(void))
2314
2315 MIDFUNC(0,end_needflags,(void))
2316 {
2317 needflags=0;
2318 }
2319 MENDFUNC(0,end_needflags,(void))
2320
2321 MIDFUNC(0,make_flags_live,(void))
2322 {
2323 make_flags_live_internal();
2324 }
2325 MENDFUNC(0,make_flags_live,(void))
2326
2327 MIDFUNC(1,fflags_into_flags,(W2 tmp))
2328 {
2329 clobber_flags();
2330 fflags_into_flags_internal(tmp);
2331 }
2332 MENDFUNC(1,fflags_into_flags,(W2 tmp))
2333
2334
2335 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2336 {
2337 int size=4;
2338 if (i<16)
2339 size=2;
2340 CLOBBER_BT;
2341 r=readreg(r,size);
2342 raw_bt_l_ri(r,i);
2343 unlock2(r);
2344 }
2345 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2346
2347 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2348 {
2349 CLOBBER_BT;
2350 r=readreg(r,4);
2351 b=readreg(b,4);
2352 raw_bt_l_rr(r,b);
2353 unlock2(r);
2354 unlock2(b);
2355 }
2356 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2357
2358 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2359 {
2360 int size=4;
2361 if (i<16)
2362 size=2;
2363 CLOBBER_BT;
2364 r=rmw(r,size,size);
2365 raw_btc_l_ri(r,i);
2366 unlock2(r);
2367 }
2368 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2369
2370 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2371 {
2372 CLOBBER_BT;
2373 b=readreg(b,4);
2374 r=rmw(r,4,4);
2375 raw_btc_l_rr(r,b);
2376 unlock2(r);
2377 unlock2(b);
2378 }
2379 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2380
2381
2382 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2383 {
2384 int size=4;
2385 if (i<16)
2386 size=2;
2387 CLOBBER_BT;
2388 r=rmw(r,size,size);
2389 raw_btr_l_ri(r,i);
2390 unlock2(r);
2391 }
2392 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2393
2394 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2395 {
2396 CLOBBER_BT;
2397 b=readreg(b,4);
2398 r=rmw(r,4,4);
2399 raw_btr_l_rr(r,b);
2400 unlock2(r);
2401 unlock2(b);
2402 }
2403 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2404
2405
2406 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2407 {
2408 int size=4;
2409 if (i<16)
2410 size=2;
2411 CLOBBER_BT;
2412 r=rmw(r,size,size);
2413 raw_bts_l_ri(r,i);
2414 unlock2(r);
2415 }
2416 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2417
2418 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2419 {
2420 CLOBBER_BT;
2421 b=readreg(b,4);
2422 r=rmw(r,4,4);
2423 raw_bts_l_rr(r,b);
2424 unlock2(r);
2425 unlock2(b);
2426 }
2427 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2428
2429 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2430 {
2431 CLOBBER_MOV;
2432 d=writereg(d,4);
2433 raw_mov_l_rm(d,s);
2434 unlock2(d);
2435 }
2436 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2437
2438
2439 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2440 {
2441 r=readreg(r,4);
2442 raw_call_r(r);
2443 unlock2(r);
2444 }
2445 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2446
2447 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2448 {
2449 CLOBBER_SUB;
2450 raw_sub_l_mi(d,s) ;
2451 }
2452 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2453
2454 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2455 {
2456 CLOBBER_MOV;
2457 raw_mov_l_mi(d,s) ;
2458 }
2459 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2460
2461 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2462 {
2463 CLOBBER_MOV;
2464 raw_mov_w_mi(d,s) ;
2465 }
2466 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2467
2468 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2469 {
2470 CLOBBER_MOV;
2471 raw_mov_b_mi(d,s) ;
2472 }
2473 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2474
2475 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2476 {
2477 if (!i && !needflags)
2478 return;
2479 CLOBBER_ROL;
2480 r=rmw(r,1,1);
2481 raw_rol_b_ri(r,i);
2482 unlock2(r);
2483 }
2484 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2485
2486 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2487 {
2488 if (!i && !needflags)
2489 return;
2490 CLOBBER_ROL;
2491 r=rmw(r,2,2);
2492 raw_rol_w_ri(r,i);
2493 unlock2(r);
2494 }
2495 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2496
2497 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2498 {
2499 if (!i && !needflags)
2500 return;
2501 CLOBBER_ROL;
2502 r=rmw(r,4,4);
2503 raw_rol_l_ri(r,i);
2504 unlock2(r);
2505 }
2506 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2507
2508 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2509 {
2510 if (isconst(r)) {
2511 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2512 return;
2513 }
2514 CLOBBER_ROL;
2515 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2516 d=rmw(d,4,4);
2517 Dif (r!=1) {
2518 write_log("Illegal register %d in raw_rol_b\n",r);
2519 abort();
2520 }
2521 raw_rol_l_rr(d,r) ;
2522 unlock2(r);
2523 unlock2(d);
2524 }
2525 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2526
2527 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2528 { /* Can only do this with r==1, i.e. cl */
2529
2530 if (isconst(r)) {
2531 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2532 return;
2533 }
2534 CLOBBER_ROL;
2535 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2536 d=rmw(d,2,2);
2537 Dif (r!=1) {
2538 write_log("Illegal register %d in raw_rol_b\n",r);
2539 abort();
2540 }
2541 raw_rol_w_rr(d,r) ;
2542 unlock2(r);
2543 unlock2(d);
2544 }
2545 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2546
2547 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2548 { /* Can only do this with r==1, i.e. cl */
2549
2550 if (isconst(r)) {
2551 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2552 return;
2553 }
2554
2555 CLOBBER_ROL;
2556 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2557 d=rmw(d,1,1);
2558 Dif (r!=1) {
2559 write_log("Illegal register %d in raw_rol_b\n",r);
2560 abort();
2561 }
2562 raw_rol_b_rr(d,r) ;
2563 unlock2(r);
2564 unlock2(d);
2565 }
2566 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2567
2568
2569 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2570 {
2571 if (isconst(r)) {
2572 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2573 return;
2574 }
2575 CLOBBER_SHLL;
2576 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2577 d=rmw(d,4,4);
2578 Dif (r!=1) {
2579 write_log("Illegal register %d in raw_rol_b\n",r);
2580 abort();
2581 }
2582 raw_shll_l_rr(d,r) ;
2583 unlock2(r);
2584 unlock2(d);
2585 }
2586 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2587
2588 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2589 { /* Can only do this with r==1, i.e. cl */
2590
2591 if (isconst(r)) {
2592 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2593 return;
2594 }
2595 CLOBBER_SHLL;
2596 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2597 d=rmw(d,2,2);
2598 Dif (r!=1) {
2599 write_log("Illegal register %d in raw_shll_b\n",r);
2600 abort();
2601 }
2602 raw_shll_w_rr(d,r) ;
2603 unlock2(r);
2604 unlock2(d);
2605 }
2606 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2607
2608 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2609 { /* Can only do this with r==1, i.e. cl */
2610
2611 if (isconst(r)) {
2612 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2613 return;
2614 }
2615
2616 CLOBBER_SHLL;
2617 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2618 d=rmw(d,1,1);
2619 Dif (r!=1) {
2620 write_log("Illegal register %d in raw_shll_b\n",r);
2621 abort();
2622 }
2623 raw_shll_b_rr(d,r) ;
2624 unlock2(r);
2625 unlock2(d);
2626 }
2627 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2628
2629
2630 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2631 {
2632 if (!i && !needflags)
2633 return;
2634 CLOBBER_ROR;
2635 r=rmw(r,1,1);
2636 raw_ror_b_ri(r,i);
2637 unlock2(r);
2638 }
2639 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2640
2641 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2642 {
2643 if (!i && !needflags)
2644 return;
2645 CLOBBER_ROR;
2646 r=rmw(r,2,2);
2647 raw_ror_w_ri(r,i);
2648 unlock2(r);
2649 }
2650 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2651
2652 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2653 {
2654 if (!i && !needflags)
2655 return;
2656 CLOBBER_ROR;
2657 r=rmw(r,4,4);
2658 raw_ror_l_ri(r,i);
2659 unlock2(r);
2660 }
2661 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2662
2663 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2664 {
2665 if (isconst(r)) {
2666 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2667 return;
2668 }
2669 CLOBBER_ROR;
2670 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2671 d=rmw(d,4,4);
2672 raw_ror_l_rr(d,r) ;
2673 unlock2(r);
2674 unlock2(d);
2675 }
2676 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2677
2678 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2679 {
2680 if (isconst(r)) {
2681 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2682 return;
2683 }
2684 CLOBBER_ROR;
2685 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2686 d=rmw(d,2,2);
2687 raw_ror_w_rr(d,r) ;
2688 unlock2(r);
2689 unlock2(d);
2690 }
2691 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2692
2693 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2694 {
2695 if (isconst(r)) {
2696 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2697 return;
2698 }
2699
2700 CLOBBER_ROR;
2701 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2702 d=rmw(d,1,1);
2703 raw_ror_b_rr(d,r) ;
2704 unlock2(r);
2705 unlock2(d);
2706 }
2707 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2708
2709 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2710 {
2711 if (isconst(r)) {
2712 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2713 return;
2714 }
2715 CLOBBER_SHRL;
2716 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2717 d=rmw(d,4,4);
2718 Dif (r!=1) {
2719 write_log("Illegal register %d in raw_rol_b\n",r);
2720 abort();
2721 }
2722 raw_shrl_l_rr(d,r) ;
2723 unlock2(r);
2724 unlock2(d);
2725 }
2726 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2727
2728 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2729 { /* Can only do this with r==1, i.e. cl */
2730
2731 if (isconst(r)) {
2732 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2733 return;
2734 }
2735 CLOBBER_SHRL;
2736 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2737 d=rmw(d,2,2);
2738 Dif (r!=1) {
2739 write_log("Illegal register %d in raw_shrl_b\n",r);
2740 abort();
2741 }
2742 raw_shrl_w_rr(d,r) ;
2743 unlock2(r);
2744 unlock2(d);
2745 }
2746 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2747
2748 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2749 { /* Can only do this with r==1, i.e. cl */
2750
2751 if (isconst(r)) {
2752 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2753 return;
2754 }
2755
2756 CLOBBER_SHRL;
2757 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2758 d=rmw(d,1,1);
2759 Dif (r!=1) {
2760 write_log("Illegal register %d in raw_shrl_b\n",r);
2761 abort();
2762 }
2763 raw_shrl_b_rr(d,r) ;
2764 unlock2(r);
2765 unlock2(d);
2766 }
2767 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2768
2769
2770
2771 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2772 {
2773 if (!i && !needflags)
2774 return;
2775 if (isconst(r) && !needflags) {
2776 live.state[r].val<<=i;
2777 return;
2778 }
2779 CLOBBER_SHLL;
2780 r=rmw(r,4,4);
2781 raw_shll_l_ri(r,i);
2782 unlock2(r);
2783 }
2784 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2785
2786 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2787 {
2788 if (!i && !needflags)
2789 return;
2790 CLOBBER_SHLL;
2791 r=rmw(r,2,2);
2792 raw_shll_w_ri(r,i);
2793 unlock2(r);
2794 }
2795 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2796
2797 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2798 {
2799 if (!i && !needflags)
2800 return;
2801 CLOBBER_SHLL;
2802 r=rmw(r,1,1);
2803 raw_shll_b_ri(r,i);
2804 unlock2(r);
2805 }
2806 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2807
2808 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2809 {
2810 if (!i && !needflags)
2811 return;
2812 if (isconst(r) && !needflags) {
2813 live.state[r].val>>=i;
2814 return;
2815 }
2816 CLOBBER_SHRL;
2817 r=rmw(r,4,4);
2818 raw_shrl_l_ri(r,i);
2819 unlock2(r);
2820 }
2821 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2822
2823 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2824 {
2825 if (!i && !needflags)
2826 return;
2827 CLOBBER_SHRL;
2828 r=rmw(r,2,2);
2829 raw_shrl_w_ri(r,i);
2830 unlock2(r);
2831 }
2832 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2833
2834 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2835 {
2836 if (!i && !needflags)
2837 return;
2838 CLOBBER_SHRL;
2839 r=rmw(r,1,1);
2840 raw_shrl_b_ri(r,i);
2841 unlock2(r);
2842 }
2843 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2844
2845 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2846 {
2847 if (!i && !needflags)
2848 return;
2849 CLOBBER_SHRA;
2850 r=rmw(r,4,4);
2851 raw_shra_l_ri(r,i);
2852 unlock2(r);
2853 }
2854 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2855
2856 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2857 {
2858 if (!i && !needflags)
2859 return;
2860 CLOBBER_SHRA;
2861 r=rmw(r,2,2);
2862 raw_shra_w_ri(r,i);
2863 unlock2(r);
2864 }
2865 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2866
2867 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2868 {
2869 if (!i && !needflags)
2870 return;
2871 CLOBBER_SHRA;
2872 r=rmw(r,1,1);
2873 raw_shra_b_ri(r,i);
2874 unlock2(r);
2875 }
2876 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2877
2878 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2879 {
2880 if (isconst(r)) {
2881 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2882 return;
2883 }
2884 CLOBBER_SHRA;
2885 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2886 d=rmw(d,4,4);
2887 Dif (r!=1) {
2888 write_log("Illegal register %d in raw_rol_b\n",r);
2889 abort();
2890 }
2891 raw_shra_l_rr(d,r) ;
2892 unlock2(r);
2893 unlock2(d);
2894 }
2895 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2896
2897 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2898 { /* Can only do this with r==1, i.e. cl */
2899
2900 if (isconst(r)) {
2901 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2902 return;
2903 }
2904 CLOBBER_SHRA;
2905 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2906 d=rmw(d,2,2);
2907 Dif (r!=1) {
2908 write_log("Illegal register %d in raw_shra_b\n",r);
2909 abort();
2910 }
2911 raw_shra_w_rr(d,r) ;
2912 unlock2(r);
2913 unlock2(d);
2914 }
2915 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2916
2917 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2918 { /* Can only do this with r==1, i.e. cl */
2919
2920 if (isconst(r)) {
2921 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2922 return;
2923 }
2924
2925 CLOBBER_SHRA;
2926 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2927 d=rmw(d,1,1);
2928 Dif (r!=1) {
2929 write_log("Illegal register %d in raw_shra_b\n",r);
2930 abort();
2931 }
2932 raw_shra_b_rr(d,r) ;
2933 unlock2(r);
2934 unlock2(d);
2935 }
2936 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2937
2938
2939 MIDFUNC(2,setcc,(W1 d, IMM cc))
2940 {
2941 CLOBBER_SETCC;
2942 d=writereg(d,1);
2943 raw_setcc(d,cc);
2944 unlock2(d);
2945 }
2946 MENDFUNC(2,setcc,(W1 d, IMM cc))
2947
2948 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2949 {
2950 CLOBBER_SETCC;
2951 raw_setcc_m(d,cc);
2952 }
2953 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2954
2955 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2956 {
2957 if (d==s)
2958 return;
2959 CLOBBER_CMOV;
2960 s=readreg(s,4);
2961 d=rmw(d,4,4);
2962 raw_cmov_l_rr(d,s,cc);
2963 unlock2(s);
2964 unlock2(d);
2965 }
2966 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2967
2968 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2969 {
2970 CLOBBER_CMOV;
2971 d=rmw(d,4,4);
2972 raw_cmov_l_rm(d,s,cc);
2973 unlock2(d);
2974 }
2975 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2976
2977 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2978 {
2979 CLOBBER_BSF;
2980 s = readreg(s, 4);
2981 d = writereg(d, 4);
2982 raw_bsf_l_rr(d, s);
2983 unlock2(s);
2984 unlock2(d);
2985 }
2986 MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2987
2988 /* Set the Z flag depending on the value in s. Note that the
2989 value has to be 0 or -1 (or, more precisely, for non-zero
2990 values, bit 14 must be set)! */
2991 MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2992 {
2993 CLOBBER_BSF;
2994 s=rmw_specific(s,4,4,FLAG_NREG3);
2995 tmp=writereg(tmp,4);
2996 raw_flags_set_zero(s, tmp);
2997 unlock2(tmp);
2998 unlock2(s);
2999 }
3000 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3001
3002 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3003 {
3004 CLOBBER_MUL;
3005 s=readreg(s,4);
3006 d=rmw(d,4,4);
3007 raw_imul_32_32(d,s);
3008 unlock2(s);
3009 unlock2(d);
3010 }
3011 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3012
3013 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3014 {
3015 CLOBBER_MUL;
3016 s=rmw_specific(s,4,4,MUL_NREG2);
3017 d=rmw_specific(d,4,4,MUL_NREG1);
3018 raw_imul_64_32(d,s);
3019 unlock2(s);
3020 unlock2(d);
3021 }
3022 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3023
3024 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3025 {
3026 CLOBBER_MUL;
3027 s=rmw_specific(s,4,4,MUL_NREG2);
3028 d=rmw_specific(d,4,4,MUL_NREG1);
3029 raw_mul_64_32(d,s);
3030 unlock2(s);
3031 unlock2(d);
3032 }
3033 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3034
3035 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3036 {
3037 CLOBBER_MUL;
3038 s=readreg(s,4);
3039 d=rmw(d,4,4);
3040 raw_mul_32_32(d,s);
3041 unlock2(s);
3042 unlock2(d);
3043 }
3044 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3045
3046 #if SIZEOF_VOID_P == 8
3047 MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3048 {
3049 int isrmw;
3050
3051 if (isconst(s)) {
3052 set_const(d,(uae_s32)live.state[s].val);
3053 return;
3054 }
3055
3056 CLOBBER_SE32;
3057 isrmw=(s==d);
3058 if (!isrmw) {
3059 s=readreg(s,4);
3060 d=writereg(d,4);
3061 }
3062 else { /* If we try to lock this twice, with different sizes, we
3063 are int trouble! */
3064 s=d=rmw(s,4,4);
3065 }
3066 raw_sign_extend_32_rr(d,s);
3067 if (!isrmw) {
3068 unlock2(d);
3069 unlock2(s);
3070 }
3071 else {
3072 unlock2(s);
3073 }
3074 }
3075 MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3076 #endif
3077
3078 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3079 {
3080 int isrmw;
3081
3082 if (isconst(s)) {
3083 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3084 return;
3085 }
3086
3087 CLOBBER_SE16;
3088 isrmw=(s==d);
3089 if (!isrmw) {
3090 s=readreg(s,2);
3091 d=writereg(d,4);
3092 }
3093 else { /* If we try to lock this twice, with different sizes, we
3094 are int trouble! */
3095 s=d=rmw(s,4,2);
3096 }
3097 raw_sign_extend_16_rr(d,s);
3098 if (!isrmw) {
3099 unlock2(d);
3100 unlock2(s);
3101 }
3102 else {
3103 unlock2(s);
3104 }
3105 }
3106 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3107
3108 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3109 {
3110 int isrmw;
3111
3112 if (isconst(s)) {
3113 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3114 return;
3115 }
3116
3117 isrmw=(s==d);
3118 CLOBBER_SE8;
3119 if (!isrmw) {
3120 s=readreg(s,1);
3121 d=writereg(d,4);
3122 }
3123 else { /* If we try to lock this twice, with different sizes, we
3124 are int trouble! */
3125 s=d=rmw(s,4,1);
3126 }
3127
3128 raw_sign_extend_8_rr(d,s);
3129
3130 if (!isrmw) {
3131 unlock2(d);
3132 unlock2(s);
3133 }
3134 else {
3135 unlock2(s);
3136 }
3137 }
3138 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3139
3140
3141 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3142 {
3143 int isrmw;
3144
3145 if (isconst(s)) {
3146 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3147 return;
3148 }
3149
3150 isrmw=(s==d);
3151 CLOBBER_ZE16;
3152 if (!isrmw) {
3153 s=readreg(s,2);
3154 d=writereg(d,4);
3155 }
3156 else { /* If we try to lock this twice, with different sizes, we
3157 are int trouble! */
3158 s=d=rmw(s,4,2);
3159 }
3160 raw_zero_extend_16_rr(d,s);
3161 if (!isrmw) {
3162 unlock2(d);
3163 unlock2(s);
3164 }
3165 else {
3166 unlock2(s);
3167 }
3168 }
3169 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3170
3171 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3172 {
3173 int isrmw;
3174 if (isconst(s)) {
3175 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3176 return;
3177 }
3178
3179 isrmw=(s==d);
3180 CLOBBER_ZE8;
3181 if (!isrmw) {
3182 s=readreg(s,1);
3183 d=writereg(d,4);
3184 }
3185 else { /* If we try to lock this twice, with different sizes, we
3186 are int trouble! */
3187 s=d=rmw(s,4,1);
3188 }
3189
3190 raw_zero_extend_8_rr(d,s);
3191
3192 if (!isrmw) {
3193 unlock2(d);
3194 unlock2(s);
3195 }
3196 else {
3197 unlock2(s);
3198 }
3199 }
3200 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3201
3202 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3203 {
3204 if (d==s)
3205 return;
3206 if (isconst(s)) {
3207 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3208 return;
3209 }
3210
3211 CLOBBER_MOV;
3212 s=readreg(s,1);
3213 d=writereg(d,1);
3214 raw_mov_b_rr(d,s);
3215 unlock2(d);
3216 unlock2(s);
3217 }
3218 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3219
3220 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3221 {
3222 if (d==s)
3223 return;
3224 if (isconst(s)) {
3225 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3226 return;
3227 }
3228
3229 CLOBBER_MOV;
3230 s=readreg(s,2);
3231 d=writereg(d,2);
3232 raw_mov_w_rr(d,s);
3233 unlock2(d);
3234 unlock2(s);
3235 }
3236 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3237
3238
3239 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3240 {
3241 CLOBBER_MOV;
3242 baser=readreg(baser,4);
3243 index=readreg(index,4);
3244 d=writereg(d,4);
3245
3246 raw_mov_l_rrm_indexed(d,baser,index,factor);
3247 unlock2(d);
3248 unlock2(baser);
3249 unlock2(index);
3250 }
3251 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3252
3253 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3254 {
3255 CLOBBER_MOV;
3256 baser=readreg(baser,4);
3257 index=readreg(index,4);
3258 d=writereg(d,2);
3259
3260 raw_mov_w_rrm_indexed(d,baser,index,factor);
3261 unlock2(d);
3262 unlock2(baser);
3263 unlock2(index);
3264 }
3265 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3266
3267 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3268 {
3269 CLOBBER_MOV;
3270 baser=readreg(baser,4);
3271 index=readreg(index,4);
3272 d=writereg(d,1);
3273
3274 raw_mov_b_rrm_indexed(d,baser,index,factor);
3275
3276 unlock2(d);
3277 unlock2(baser);
3278 unlock2(index);
3279 }
3280 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3281
3282
3283 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3284 {
3285 CLOBBER_MOV;
3286 baser=readreg(baser,4);
3287 index=readreg(index,4);
3288 s=readreg(s,4);
3289
3290 Dif (baser==s || index==s)
3291 abort();
3292
3293
3294 raw_mov_l_mrr_indexed(baser,index,factor,s);
3295 unlock2(s);
3296 unlock2(baser);
3297 unlock2(index);
3298 }
3299 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3300
3301 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3302 {
3303 CLOBBER_MOV;
3304 baser=readreg(baser,4);
3305 index=readreg(index,4);
3306 s=readreg(s,2);
3307
3308 raw_mov_w_mrr_indexed(baser,index,factor,s);
3309 unlock2(s);
3310 unlock2(baser);
3311 unlock2(index);
3312 }
3313 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3314
3315 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3316 {
3317 CLOBBER_MOV;
3318 s=readreg(s,1);
3319 baser=readreg(baser,4);
3320 index=readreg(index,4);
3321
3322 raw_mov_b_mrr_indexed(baser,index,factor,s);
3323 unlock2(s);
3324 unlock2(baser);
3325 unlock2(index);
3326 }
3327 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3328
3329
3330 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3331 {
3332 int basereg=baser;
3333 int indexreg=index;
3334
3335 CLOBBER_MOV;
3336 s=readreg(s,4);
3337 baser=readreg_offset(baser,4);
3338 index=readreg_offset(index,4);
3339
3340 base+=get_offset(basereg);
3341 base+=factor*get_offset(indexreg);
3342
3343 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3344 unlock2(s);
3345 unlock2(baser);
3346 unlock2(index);
3347 }
3348 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3349
3350 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3351 {
3352 int basereg=baser;
3353 int indexreg=index;
3354
3355 CLOBBER_MOV;
3356 s=readreg(s,2);
3357 baser=readreg_offset(baser,4);
3358 index=readreg_offset(index,4);
3359
3360 base+=get_offset(basereg);
3361 base+=factor*get_offset(indexreg);
3362
3363 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3364 unlock2(s);
3365 unlock2(baser);
3366 unlock2(index);
3367 }
3368 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3369
3370 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3371 {
3372 int basereg=baser;
3373 int indexreg=index;
3374
3375 CLOBBER_MOV;
3376 s=readreg(s,1);
3377 baser=readreg_offset(baser,4);
3378 index=readreg_offset(index,4);
3379
3380 base+=get_offset(basereg);
3381 base+=factor*get_offset(indexreg);
3382
3383 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3384 unlock2(s);
3385 unlock2(baser);
3386 unlock2(index);
3387 }
3388 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3389
3390
3391
3392 /* Read a long from base+baser+factor*index */
3393 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3394 {
3395 int basereg=baser;
3396 int indexreg=index;
3397
3398 CLOBBER_MOV;
3399 baser=readreg_offset(baser,4);
3400 index=readreg_offset(index,4);
3401 base+=get_offset(basereg);
3402 base+=factor*get_offset(indexreg);
3403 d=writereg(d,4);
3404 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3405 unlock2(d);
3406 unlock2(baser);
3407 unlock2(index);
3408 }
3409 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3410
3411
3412 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3413 {
3414 int basereg=baser;
3415 int indexreg=index;
3416
3417 CLOBBER_MOV;
3418 remove_offset(d,-1);
3419 baser=readreg_offset(baser,4);
3420 index=readreg_offset(index,4);
3421 base+=get_offset(basereg);
3422 base+=factor*get_offset(indexreg);
3423 d=writereg(d,2);
3424 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3425 unlock2(d);
3426 unlock2(baser);
3427 unlock2(index);
3428 }
3429 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3430
3431
3432 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3433 {
3434 int basereg=baser;
3435 int indexreg=index;
3436
3437 CLOBBER_MOV;
3438 remove_offset(d,-1);
3439 baser=readreg_offset(baser,4);
3440 index=readreg_offset(index,4);
3441 base+=get_offset(basereg);
3442 base+=factor*get_offset(indexreg);
3443 d=writereg(d,1);
3444 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3445 unlock2(d);
3446 unlock2(baser);
3447 unlock2(index);
3448 }
3449 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3450
3451 /* Read a long from base+factor*index */
3452 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3453 {
3454 int indexreg=index;
3455
3456 if (isconst(index)) {
3457 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3458 return;
3459 }
3460
3461 CLOBBER_MOV;
3462 index=readreg_offset(index,4);
3463 base+=get_offset(indexreg)*factor;
3464 d=writereg(d,4);
3465
3466 raw_mov_l_rm_indexed(d,base,index,factor);
3467 unlock2(index);
3468 unlock2(d);
3469 }
3470 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3471
3472
3473 /* read the long at the address contained in s+offset and store in d */
3474 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3475 {
3476 if (isconst(s)) {
3477 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3478 return;
3479 }
3480 CLOBBER_MOV;
3481 s=readreg(s,4);
3482 d=writereg(d,4);
3483
3484 raw_mov_l_rR(d,s,offset);
3485 unlock2(d);
3486 unlock2(s);
3487 }
3488 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3489
3490 /* read the word at the address contained in s+offset and store in d */
3491 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3492 {
3493 if (isconst(s)) {
3494 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3495 return;
3496 }
3497 CLOBBER_MOV;
3498 s=readreg(s,4);
3499 d=writereg(d,2);
3500
3501 raw_mov_w_rR(d,s,offset);
3502 unlock2(d);
3503 unlock2(s);
3504 }
3505 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3506
3507 /* read the word at the address contained in s+offset and store in d */
3508 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3509 {
3510 if (isconst(s)) {
3511 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3512 return;
3513 }
3514 CLOBBER_MOV;
3515 s=readreg(s,4);
3516 d=writereg(d,1);
3517
3518 raw_mov_b_rR(d,s,offset);
3519 unlock2(d);
3520 unlock2(s);
3521 }
3522 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3523
3524 /* read the long at the address contained in s+offset and store in d */
3525 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3526 {
3527 int sreg=s;
3528 if (isconst(s)) {
3529 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3530 return;
3531 }
3532 CLOBBER_MOV;
3533 s=readreg_offset(s,4);
3534 offset+=get_offset(sreg);
3535 d=writereg(d,4);
3536
3537 raw_mov_l_brR(d,s,offset);
3538 unlock2(d);
3539 unlock2(s);
3540 }
3541 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3542
3543 /* read the word at the address contained in s+offset and store in d */
3544 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3545 {
3546 int sreg=s;
3547 if (isconst(s)) {
3548 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3549 return;
3550 }
3551 CLOBBER_MOV;
3552 remove_offset(d,-1);
3553 s=readreg_offset(s,4);
3554 offset+=get_offset(sreg);
3555 d=writereg(d,2);
3556
3557 raw_mov_w_brR(d,s,offset);
3558 unlock2(d);
3559 unlock2(s);
3560 }
3561 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3562
3563 /* read the word at the address contained in s+offset and store in d */
3564 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3565 {
3566 int sreg=s;
3567 if (isconst(s)) {
3568 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3569 return;
3570 }
3571 CLOBBER_MOV;
3572 remove_offset(d,-1);
3573 s=readreg_offset(s,4);
3574 offset+=get_offset(sreg);
3575 d=writereg(d,1);
3576
3577 raw_mov_b_brR(d,s,offset);
3578 unlock2(d);
3579 unlock2(s);
3580 }
3581 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3582
3583 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3584 {
3585 int dreg=d;
3586 if (isconst(d)) {
3587 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3588 return;
3589 }
3590
3591 CLOBBER_MOV;
3592 d=readreg_offset(d,4);
3593 offset+=get_offset(dreg);
3594 raw_mov_l_Ri(d,i,offset);
3595 unlock2(d);
3596 }
3597 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3598
3599 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3600 {
3601 int dreg=d;
3602 if (isconst(d)) {
3603 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3604 return;
3605 }
3606
3607 CLOBBER_MOV;
3608 d=readreg_offset(d,4);
3609 offset+=get_offset(dreg);
3610 raw_mov_w_Ri(d,i,offset);
3611 unlock2(d);
3612 }
3613 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3614
3615 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3616 {
3617 int dreg=d;
3618 if (isconst(d)) {
3619 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3620 return;
3621 }
3622
3623 CLOBBER_MOV;
3624 d=readreg_offset(d,4);
3625 offset+=get_offset(dreg);
3626 raw_mov_b_Ri(d,i,offset);
3627 unlock2(d);
3628 }
3629 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3630
3631 /* Warning! OFFSET is byte sized only! */
3632 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3633 {
3634 if (isconst(d)) {
3635 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3636 return;
3637 }
3638 if (isconst(s)) {
3639 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3640 return;
3641 }
3642
3643 CLOBBER_MOV;
3644 s=readreg(s,4);
3645 d=readreg(d,4);
3646
3647 raw_mov_l_Rr(d,s,offset);
3648 unlock2(d);
3649 unlock2(s);
3650 }
3651 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3652
3653 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3654 {
3655 if (isconst(d)) {
3656 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3657 return;
3658 }
3659 if (isconst(s)) {
3660 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3661 return;
3662 }
3663
3664 CLOBBER_MOV;
3665 s=readreg(s,2);
3666 d=readreg(d,4);
3667 raw_mov_w_Rr(d,s,offset);
3668 unlock2(d);
3669 unlock2(s);
3670 }
3671 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3672
3673 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3674 {
3675 if (isconst(d)) {
3676 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3677 return;
3678 }
3679 if (isconst(s)) {
3680 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3681 return;
3682 }
3683
3684 CLOBBER_MOV;
3685 s=readreg(s,1);
3686 d=readreg(d,4);
3687 raw_mov_b_Rr(d,s,offset);
3688 unlock2(d);
3689 unlock2(s);
3690 }
3691 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3692
3693 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3694 {
3695 if (isconst(s)) {
3696 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3697 return;
3698 }
3699 #if USE_OFFSET
3700 if (d==s) {
3701 add_offset(d,offset);
3702 return;
3703 }
3704 #endif
3705 CLOBBER_LEA;
3706 s=readreg(s,4);
3707 d=writereg(d,4);
3708 raw_lea_l_brr(d,s,offset);
3709 unlock2(d);
3710 unlock2(s);
3711 }
3712 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3713
3714 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3715 {
3716 if (!offset) {
3717 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3718 return;
3719 }
3720 CLOBBER_LEA;
3721 s=readreg(s,4);
3722 index=readreg(index,4);
3723 d=writereg(d,4);
3724
3725 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3726 unlock2(d);
3727 unlock2(index);
3728 unlock2(s);
3729 }
3730 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3731
3732 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3733 {
3734 CLOBBER_LEA;
3735 s=readreg(s,4);
3736 index=readreg(index,4);
3737 d=writereg(d,4);
3738
3739 raw_lea_l_rr_indexed(d,s,index,factor);
3740 unlock2(d);
3741 unlock2(index);
3742 unlock2(s);
3743 }
3744 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3745
3746 /* write d to the long at the address contained in s+offset */
3747 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3748 {
3749 int dreg=d;
3750 if (isconst(d)) {
3751 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3752 return;
3753 }
3754
3755 CLOBBER_MOV;
3756 s=readreg(s,4);
3757 d=readreg_offset(d,4);
3758 offset+=get_offset(dreg);
3759
3760 raw_mov_l_bRr(d,s,offset);
3761 unlock2(d);
3762 unlock2(s);
3763 }
3764 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3765
3766 /* write the word at the address contained in s+offset and store in d */
3767 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3768 {
3769 int dreg=d;
3770
3771 if (isconst(d)) {
3772 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3773 return;
3774 }
3775
3776 CLOBBER_MOV;
3777 s=readreg(s,2);
3778 d=readreg_offset(d,4);
3779 offset+=get_offset(dreg);
3780 raw_mov_w_bRr(d,s,offset);
3781 unlock2(d);
3782 unlock2(s);
3783 }
3784 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3785
3786 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3787 {
3788 int dreg=d;
3789 if (isconst(d)) {
3790 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3791 return;
3792 }
3793
3794 CLOBBER_MOV;
3795 s=readreg(s,1);
3796 d=readreg_offset(d,4);
3797 offset+=get_offset(dreg);
3798 raw_mov_b_bRr(d,s,offset);
3799 unlock2(d);
3800 unlock2(s);
3801 }
3802 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3803
3804 MIDFUNC(1,bswap_32,(RW4 r))
3805 {
3806 int reg=r;
3807
3808 if (isconst(r)) {
3809 uae_u32 oldv=live.state[r].val;
3810 live.state[r].val=reverse32(oldv);
3811 return;
3812 }
3813
3814 CLOBBER_SW32;
3815 r=rmw(r,4,4);
3816 raw_bswap_32(r);
3817 unlock2(r);
3818 }
3819 MENDFUNC(1,bswap_32,(RW4 r))
3820
3821 MIDFUNC(1,bswap_16,(RW2 r))
3822 {
3823 if (isconst(r)) {
3824 uae_u32 oldv=live.state[r].val;
3825 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3826 (oldv&0xffff0000);
3827 return;
3828 }
3829
3830 CLOBBER_SW16;
3831 r=rmw(r,2,2);
3832
3833 raw_bswap_16(r);
3834 unlock2(r);
3835 }
3836 MENDFUNC(1,bswap_16,(RW2 r))
3837
3838
3839
3840 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3841 {
3842 int olds;
3843
3844 if (d==s) { /* How pointless! */
3845 return;
3846 }
3847 if (isconst(s)) {
3848 COMPCALL(mov_l_ri)(d,live.state[s].val);
3849 return;
3850 }
3851 olds=s;
3852 disassociate(d);
3853 s=readreg_offset(s,4);
3854 live.state[d].realreg=s;
3855 live.state[d].realind=live.nat[s].nholds;
3856 live.state[d].val=live.state[olds].val;
3857 live.state[d].validsize=4;
3858 live.state[d].dirtysize=4;
3859 set_status(d,DIRTY);
3860
3861 live.nat[s].holds[live.nat[s].nholds]=d;
3862 live.nat[s].nholds++;
3863 log_clobberreg(d);
3864 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3865 d,s,live.state[d].realind,live.nat[s].nholds); */
3866 unlock2(s);
3867 }
3868 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3869
3870 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3871 {
3872 if (isconst(s)) {
3873 COMPCALL(mov_l_mi)(d,live.state[s].val);
3874 return;
3875 }
3876 CLOBBER_MOV;
3877 s=readreg(s,4);
3878
3879 raw_mov_l_mr(d,s);
3880 unlock2(s);
3881 }
3882 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3883
3884
3885 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3886 {
3887 if (isconst(s)) {
3888 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3889 return;
3890 }
3891 CLOBBER_MOV;
3892 s=readreg(s,2);
3893
3894 raw_mov_w_mr(d,s);
3895 unlock2(s);
3896 }
3897 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3898
3899 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3900 {
3901 CLOBBER_MOV;
3902 d=writereg(d,2);
3903
3904 raw_mov_w_rm(d,s);
3905 unlock2(d);
3906 }
3907 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3908
3909 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3910 {
3911 if (isconst(s)) {
3912 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3913 return;
3914 }
3915
3916 CLOBBER_MOV;
3917 s=readreg(s,1);
3918
3919 raw_mov_b_mr(d,s);
3920 unlock2(s);
3921 }
3922 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3923
3924 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3925 {
3926 CLOBBER_MOV;
3927 d=writereg(d,1);
3928
3929 raw_mov_b_rm(d,s);
3930 unlock2(d);
3931 }
3932 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3933
3934 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3935 {
3936 set_const(d,s);
3937 return;
3938 }
3939 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3940
3941 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3942 {
3943 CLOBBER_MOV;
3944 d=writereg(d,2);
3945
3946 raw_mov_w_ri(d,s);
3947 unlock2(d);
3948 }
3949 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3950
3951 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3952 {
3953 CLOBBER_MOV;
3954 d=writereg(d,1);
3955
3956 raw_mov_b_ri(d,s);
3957 unlock2(d);
3958 }
3959 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3960
3961
3962 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3963 {
3964 CLOBBER_ADD;
3965 raw_add_l_mi(d,s) ;
3966 }
3967 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3968
3969 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3970 {
3971 CLOBBER_ADD;
3972 raw_add_w_mi(d,s) ;
3973 }
3974 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3975
3976 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3977 {
3978 CLOBBER_ADD;
3979 raw_add_b_mi(d,s) ;
3980 }
3981 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3982
3983
3984 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3985 {
3986 CLOBBER_TEST;
3987 d=readreg(d,4);
3988
3989 raw_test_l_ri(d,i);
3990 unlock2(d);
3991 }
3992 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3993
3994 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3995 {
3996 CLOBBER_TEST;
3997 d=readreg(d,4);
3998 s=readreg(s,4);
3999
4000 raw_test_l_rr(d,s);;
4001 unlock2(d);
4002 unlock2(s);
4003 }
4004 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4005
4006 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4007 {
4008 CLOBBER_TEST;
4009 d=readreg(d,2);
4010 s=readreg(s,2);
4011
4012 raw_test_w_rr(d,s);
4013 unlock2(d);
4014 unlock2(s);
4015 }
4016 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4017
4018 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4019 {
4020 CLOBBER_TEST;
4021 d=readreg(d,1);
4022 s=readreg(s,1);
4023
4024 raw_test_b_rr(d,s);
4025 unlock2(d);
4026 unlock2(s);
4027 }
4028 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4029
4030
4031 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4032 {
4033 if (isconst(d) && !needflags) {
4034 live.state[d].val &= i;
4035 return;
4036 }
4037
4038 CLOBBER_AND;
4039 d=rmw(d,4,4);
4040
4041 raw_and_l_ri(d,i);
4042 unlock2(d);
4043 }
4044 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4045
4046 MIDFUNC(2,and_l,(RW4 d, R4 s))
4047 {
4048 CLOBBER_AND;
4049 s=readreg(s,4);
4050 d=rmw(d,4,4);
4051
4052 raw_and_l(d,s);
4053 unlock2(d);
4054 unlock2(s);
4055 }
4056 MENDFUNC(2,and_l,(RW4 d, R4 s))
4057
4058 MIDFUNC(2,and_w,(RW2 d, R2 s))
4059 {
4060 CLOBBER_AND;
4061 s=readreg(s,2);
4062 d=rmw(d,2,2);
4063
4064 raw_and_w(d,s);
4065 unlock2(d);
4066 unlock2(s);
4067 }
4068 MENDFUNC(2,and_w,(RW2 d, R2 s))
4069
4070 MIDFUNC(2,and_b,(RW1 d, R1 s))
4071 {
4072 CLOBBER_AND;
4073 s=readreg(s,1);
4074 d=rmw(d,1,1);
4075
4076 raw_and_b(d,s);
4077 unlock2(d);
4078 unlock2(s);
4079 }
4080 MENDFUNC(2,and_b,(RW1 d, R1 s))
4081
4082 // gb-- used for making an fpcr value in compemu_fpp.cpp
4083 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4084 {
4085 CLOBBER_OR;
4086 d=rmw(d,4,4);
4087
4088 raw_or_l_rm(d,s);
4089 unlock2(d);
4090 }
4091 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4092
4093 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4094 {
4095 if (isconst(d) && !needflags) {
4096 live.state[d].val|=i;
4097 return;
4098 }
4099 CLOBBER_OR;
4100 d=rmw(d,4,4);
4101
4102 raw_or_l_ri(d,i);
4103 unlock2(d);
4104 }
4105 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4106
4107 MIDFUNC(2,or_l,(RW4 d, R4 s))
4108 {
4109 if (isconst(d) && isconst(s) && !needflags) {
4110 live.state[d].val|=live.state[s].val;
4111 return;
4112 }
4113 CLOBBER_OR;
4114 s=readreg(s,4);
4115 d=rmw(d,4,4);
4116
4117 raw_or_l(d,s);
4118 unlock2(d);
4119 unlock2(s);
4120 }
4121 MENDFUNC(2,or_l,(RW4 d, R4 s))
4122
4123 MIDFUNC(2,or_w,(RW2 d, R2 s))
4124 {
4125 CLOBBER_OR;
4126 s=readreg(s,2);
4127 d=rmw(d,2,2);
4128
4129 raw_or_w(d,s);
4130 unlock2(d);
4131 unlock2(s);
4132 }
4133 MENDFUNC(2,or_w,(RW2 d, R2 s))
4134
4135 MIDFUNC(2,or_b,(RW1 d, R1 s))
4136 {
4137 CLOBBER_OR;
4138 s=readreg(s,1);
4139 d=rmw(d,1,1);
4140
4141 raw_or_b(d,s);
4142 unlock2(d);
4143 unlock2(s);
4144 }
4145 MENDFUNC(2,or_b,(RW1 d, R1 s))
4146
4147 MIDFUNC(2,adc_l,(RW4 d, R4 s))
4148 {
4149 CLOBBER_ADC;
4150 s=readreg(s,4);
4151 d=rmw(d,4,4);
4152
4153 raw_adc_l(d,s);
4154
4155 unlock2(d);
4156 unlock2(s);
4157 }
4158 MENDFUNC(2,adc_l,(RW4 d, R4 s))
4159
4160 MIDFUNC(2,adc_w,(RW2 d, R2 s))
4161 {
4162 CLOBBER_ADC;
4163 s=readreg(s,2);
4164 d=rmw(d,2,2);
4165
4166 raw_adc_w(d,s);
4167 unlock2(d);
4168 unlock2(s);
4169 }
4170 MENDFUNC(2,adc_w,(RW2 d, R2 s))
4171
4172 MIDFUNC(2,adc_b,(RW1 d, R1 s))
4173 {
4174 CLOBBER_ADC;
4175 s=readreg(s,1);
4176 d=rmw(d,1,1);
4177
4178 raw_adc_b(d,s);
4179 unlock2(d);
4180 unlock2(s);
4181 }
4182 MENDFUNC(2,adc_b,(RW1 d, R1 s))
4183
4184 MIDFUNC(2,add_l,(RW4 d, R4 s))
4185 {
4186 if (isconst(s)) {
4187 COMPCALL(add_l_ri)(d,live.state[s].val);
4188 return;
4189 }
4190
4191 CLOBBER_ADD;
4192 s=readreg(s,4);
4193 d=rmw(d,4,4);
4194
4195 raw_add_l(d,s);
4196
4197 unlock2(d);
4198 unlock2(s);
4199 }
4200 MENDFUNC(2,add_l,(RW4 d, R4 s))
4201
4202 MIDFUNC(2,add_w,(RW2 d, R2 s))
4203 {
4204 if (isconst(s)) {
4205 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4206 return;
4207 }
4208
4209 CLOBBER_ADD;
4210 s=readreg(s,2);
4211 d=rmw(d,2,2);
4212
4213 raw_add_w(d,s);
4214 unlock2(d);
4215 unlock2(s);
4216 }
4217 MENDFUNC(2,add_w,(RW2 d, R2 s))
4218
4219 MIDFUNC(2,add_b,(RW1 d, R1 s))
4220 {
4221 if (isconst(s)) {
4222 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4223 return;
4224 }
4225
4226 CLOBBER_ADD;
4227 s=readreg(s,1);
4228 d=rmw(d,1,1);
4229
4230 raw_add_b(d,s);
4231 unlock2(d);
4232 unlock2(s);
4233 }
4234 MENDFUNC(2,add_b,(RW1 d, R1 s))
4235
4236 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4237 {
4238 if (!i && !needflags)
4239 return;
4240 if (isconst(d) && !needflags) {
4241 live.state[d].val-=i;
4242 return;
4243 }
4244 #if USE_OFFSET
4245 if (!needflags) {
4246 add_offset(d,-i);
4247 return;
4248 }
4249 #endif
4250
4251 CLOBBER_SUB;
4252 d=rmw(d,4,4);
4253
4254 raw_sub_l_ri(d,i);
4255 unlock2(d);
4256 }
4257 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4258
4259 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4260 {
4261 if (!i && !needflags)
4262 return;
4263
4264 CLOBBER_SUB;
4265 d=rmw(d,2,2);
4266
4267 raw_sub_w_ri(d,i);
4268 unlock2(d);
4269 }
4270 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4271
4272 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4273 {
4274 if (!i && !needflags)
4275 return;
4276
4277 CLOBBER_SUB;
4278 d=rmw(d,1,1);
4279
4280 raw_sub_b_ri(d,i);
4281
4282 unlock2(d);
4283 }
4284 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4285
4286 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4287 {
4288 if (!i && !needflags)
4289 return;
4290 if (isconst(d) && !needflags) {
4291 live.state[d].val+=i;
4292 return;
4293 }
4294 #if USE_OFFSET
4295 if (!needflags) {
4296 add_offset(d,i);
4297 return;
4298 }
4299 #endif
4300 CLOBBER_ADD;
4301 d=rmw(d,4,4);
4302 raw_add_l_ri(d,i);
4303 unlock2(d);
4304 }
4305 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4306
4307 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4308 {
4309 if (!i && !needflags)
4310 return;
4311
4312 CLOBBER_ADD;
4313 d=rmw(d,2,2);
4314
4315 raw_add_w_ri(d,i);
4316 unlock2(d);
4317 }
4318 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4319
4320 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4321 {
4322 if (!i && !needflags)
4323 return;
4324
4325 CLOBBER_ADD;
4326 d=rmw(d,1,1);
4327
4328 raw_add_b_ri(d,i);
4329
4330 unlock2(d);
4331 }
4332 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4333
4334 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4335 {
4336 CLOBBER_SBB;
4337 s=readreg(s,4);
4338 d=rmw(d,4,4);
4339
4340 raw_sbb_l(d,s);
4341 unlock2(d);
4342 unlock2(s);
4343 }
4344 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4345
4346 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4347 {
4348 CLOBBER_SBB;
4349 s=readreg(s,2);
4350 d=rmw(d,2,2);
4351
4352 raw_sbb_w(d,s);
4353 unlock2(d);
4354 unlock2(s);
4355 }
4356 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4357
4358 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4359 {
4360 CLOBBER_SBB;
4361 s=readreg(s,1);
4362 d=rmw(d,1,1);
4363
4364 raw_sbb_b(d,s);
4365 unlock2(d);
4366 unlock2(s);
4367 }
4368 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4369
4370 MIDFUNC(2,sub_l,(RW4 d, R4 s))
4371 {
4372 if (isconst(s)) {
4373 COMPCALL(sub_l_ri)(d,live.state[s].val);
4374 return;
4375 }
4376
4377 CLOBBER_SUB;
4378 s=readreg(s,4);
4379 d=rmw(d,4,4);
4380
4381 raw_sub_l(d,s);
4382 unlock2(d);
4383 unlock2(s);
4384 }
4385 MENDFUNC(2,sub_l,(RW4 d, R4 s))
4386
4387 MIDFUNC(2,sub_w,(RW2 d, R2 s))
4388 {
4389 if (isconst(s)) {
4390 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4391 return;
4392 }
4393
4394 CLOBBER_SUB;
4395 s=readreg(s,2);
4396 d=rmw(d,2,2);
4397
4398 raw_sub_w(d,s);
4399 unlock2(d);
4400 unlock2(s);
4401 }
4402 MENDFUNC(2,sub_w,(RW2 d, R2 s))
4403
4404 MIDFUNC(2,sub_b,(RW1 d, R1 s))
4405 {
4406 if (isconst(s)) {
4407 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4408 return;
4409 }
4410
4411 CLOBBER_SUB;
4412 s=readreg(s,1);
4413 d=rmw(d,1,1);
4414
4415 raw_sub_b(d,s);
4416 unlock2(d);
4417 unlock2(s);
4418 }
4419 MENDFUNC(2,sub_b,(RW1 d, R1 s))
4420
4421 MIDFUNC(2,cmp_l,(R4 d, R4 s))
4422 {
4423 CLOBBER_CMP;
4424 s=readreg(s,4);
4425 d=readreg(d,4);
4426
4427 raw_cmp_l(d,s);
4428 unlock2(d);
4429 unlock2(s);
4430 }
4431 MENDFUNC(2,cmp_l,(R4 d, R4 s))
4432
4433 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4434 {
4435 CLOBBER_CMP;
4436 r=readreg(r,4);
4437
4438 raw_cmp_l_ri(r,i);
4439 unlock2(r);
4440 }
4441 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4442
4443 MIDFUNC(2,cmp_w,(R2 d, R2 s))
4444 {
4445 CLOBBER_CMP;
4446 s=readreg(s,2);
4447 d=readreg(d,2);
4448
4449 raw_cmp_w(d,s);
4450 unlock2(d);
4451 unlock2(s);
4452 }
4453 MENDFUNC(2,cmp_w,(R2 d, R2 s))
4454
4455 MIDFUNC(2,cmp_b,(R1 d, R1 s))
4456 {
4457 CLOBBER_CMP;
4458 s=readreg(s,1);
4459 d=readreg(d,1);
4460
4461 raw_cmp_b(d,s);
4462 unlock2(d);
4463 unlock2(s);
4464 }
4465 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4466
4467
4468 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4469 {
4470 CLOBBER_XOR;
4471 s=readreg(s,4);
4472 d=rmw(d,4,4);
4473
4474 raw_xor_l(d,s);
4475 unlock2(d);
4476 unlock2(s);
4477 }
4478 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4479
4480 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4481 {
4482 CLOBBER_XOR;
4483 s=readreg(s,2);
4484 d=rmw(d,2,2);
4485
4486 raw_xor_w(d,s);
4487 unlock2(d);
4488 unlock2(s);
4489 }
4490 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4491
4492 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4493 {
4494 CLOBBER_XOR;
4495 s=readreg(s,1);
4496 d=rmw(d,1,1);
4497
4498 raw_xor_b(d,s);
4499 unlock2(d);
4500 unlock2(s);
4501 }
4502 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4503
4504 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4505 {
4506 clobber_flags();
4507 remove_all_offsets();
4508 if (osize==4) {
4509 if (out1!=in1 && out1!=r) {
4510 COMPCALL(forget_about)(out1);
4511 }
4512 }
4513 else {
4514 tomem_c(out1);
4515 }
4516
4517 in1=readreg_specific(in1,isize,REG_PAR1);
4518 r=readreg(r,4);
4519 prepare_for_call_1(); /* This should ensure that there won't be
4520 any need for swapping nregs in prepare_for_call_2
4521 */
4522 #if USE_NORMAL_CALLING_CONVENTION
4523 raw_push_l_r(in1);
4524 #endif
4525 unlock2(in1);
4526 unlock2(r);
4527
4528 prepare_for_call_2();
4529 raw_call_r(r);
4530
4531 #if USE_NORMAL_CALLING_CONVENTION
4532 raw_inc_sp(4);
4533 #endif
4534
4535
4536 live.nat[REG_RESULT].holds[0]=out1;
4537 live.nat[REG_RESULT].nholds=1;
4538 live.nat[REG_RESULT].touched=touchcnt++;
4539
4540 live.state[out1].realreg=REG_RESULT;
4541 live.state[out1].realind=0;
4542 live.state[out1].val=0;
4543 live.state[out1].validsize=osize;
4544 live.state[out1].dirtysize=osize;
4545 set_status(out1,DIRTY);
4546 }
4547 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4548
4549 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4550 {
4551 clobber_flags();
4552 remove_all_offsets();
4553 in1=readreg_specific(in1,isize1,REG_PAR1);
4554 in2=readreg_specific(in2,isize2,REG_PAR2);
4555 r=readreg(r,4);
4556 prepare_for_call_1(); /* This should ensure that there won't be
4557 any need for swapping nregs in prepare_for_call_2
4558 */
4559 #if USE_NORMAL_CALLING_CONVENTION
4560 raw_push_l_r(in2);
4561 raw_push_l_r(in1);
4562 #endif
4563 unlock2(r);
4564 unlock2(in1);
4565 unlock2(in2);
4566 prepare_for_call_2();
4567 raw_call_r(r);
4568 #if USE_NORMAL_CALLING_CONVENTION
4569 raw_inc_sp(8);
4570 #endif
4571 }
4572 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4573
4574 /* forget_about() takes a mid-layer register */
4575 MIDFUNC(1,forget_about,(W4 r))
4576 {
4577 if (isinreg(r))
4578 disassociate(r);
4579 live.state[r].val=0;
4580 set_status(r,UNDEF);
4581 }
4582 MENDFUNC(1,forget_about,(W4 r))
4583
4584 MIDFUNC(0,nop,(void))
4585 {
4586 raw_nop();
4587 }
4588 MENDFUNC(0,nop,(void))
4589
4590
4591 MIDFUNC(1,f_forget_about,(FW r))
4592 {
4593 if (f_isinreg(r))
4594 f_disassociate(r);
4595 live.fate[r].status=UNDEF;
4596 }
4597 MENDFUNC(1,f_forget_about,(FW r))
4598
4599 MIDFUNC(1,fmov_pi,(FW r))
4600 {
4601 r=f_writereg(r);
4602 raw_fmov_pi(r);
4603 f_unlock(r);
4604 }
4605 MENDFUNC(1,fmov_pi,(FW r))
4606
4607 MIDFUNC(1,fmov_log10_2,(FW r))
4608 {
4609 r=f_writereg(r);
4610 raw_fmov_log10_2(r);
4611 f_unlock(r);
4612 }
4613 MENDFUNC(1,fmov_log10_2,(FW r))
4614
4615 MIDFUNC(1,fmov_log2_e,(FW r))
4616 {
4617 r=f_writereg(r);
4618 raw_fmov_log2_e(r);
4619 f_unlock(r);
4620 }
4621 MENDFUNC(1,fmov_log2_e,(FW r))
4622
4623 MIDFUNC(1,fmov_loge_2,(FW r))
4624 {
4625 r=f_writereg(r);
4626 raw_fmov_loge_2(r);
4627 f_unlock(r);
4628 }
4629 MENDFUNC(1,fmov_loge_2,(FW r))
4630
4631 MIDFUNC(1,fmov_1,(FW r))
4632 {
4633 r=f_writereg(r);
4634 raw_fmov_1(r);
4635 f_unlock(r);
4636 }
4637 MENDFUNC(1,fmov_1,(FW r))
4638
4639 MIDFUNC(1,fmov_0,(FW r))
4640 {
4641 r=f_writereg(r);
4642 raw_fmov_0(r);
4643 f_unlock(r);
4644 }
4645 MENDFUNC(1,fmov_0,(FW r))
4646
4647 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4648 {
4649 r=f_writereg(r);
4650 raw_fmov_rm(r,m);
4651 f_unlock(r);
4652 }
4653 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4654
4655 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4656 {
4657 r=f_writereg(r);
4658 raw_fmovi_rm(r,m);
4659 f_unlock(r);
4660 }
4661 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4662
4663 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4664 {
4665 r=f_readreg(r);
4666 raw_fmovi_mr(m,r);
4667 f_unlock(r);
4668 }
4669 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4670
4671 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4672 {
4673 r=f_writereg(r);
4674 raw_fmovs_rm(r,m);
4675 f_unlock(r);
4676 }
4677 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4678
4679 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4680 {
4681 r=f_readreg(r);
4682 raw_fmovs_mr(m,r);
4683 f_unlock(r);
4684 }
4685 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4686
4687 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4688 {
4689 r=f_readreg(r);
4690 raw_fmov_ext_mr(m,r);
4691 f_unlock(r);
4692 }
4693 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4694
4695 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4696 {
4697 r=f_readreg(r);
4698 raw_fmov_mr(m,r);
4699 f_unlock(r);
4700 }
4701 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4702
4703 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4704 {
4705 r=f_writereg(r);
4706 raw_fmov_ext_rm(r,m);
4707 f_unlock(r);
4708 }
4709 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4710
4711 MIDFUNC(2,fmov_rr,(FW d, FR s))
4712 {
4713 if (d==s) { /* How pointless! */
4714 return;
4715 }
4716 #if USE_F_ALIAS
4717 f_disassociate(d);
4718 s=f_readreg(s);
4719 live.fate[d].realreg=s;
4720 live.fate[d].realind=live.fat[s].nholds;
4721 live.fate[d].status=DIRTY;
4722 live.fat[s].holds[live.fat[s].nholds]=d;
4723 live.fat[s].nholds++;
4724 f_unlock(s);
4725 #else
4726 s=f_readreg(s);
4727 d=f_writereg(d);
4728 raw_fmov_rr(d,s);
4729 f_unlock(s);
4730 f_unlock(d);
4731 #endif
4732 }
4733 MENDFUNC(2,fmov_rr,(FW d, FR s))
4734
4735 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4736 {
4737 index=readreg(index,4);
4738
4739 raw_fldcw_m_indexed(index,base);
4740 unlock2(index);
4741 }
4742 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4743
4744 MIDFUNC(1,ftst_r,(FR r))
4745 {
4746 r=f_readreg(r);
4747 raw_ftst_r(r);
4748 f_unlock(r);
4749 }
4750 MENDFUNC(1,ftst_r,(FR r))
4751
4752 MIDFUNC(0,dont_care_fflags,(void))
4753 {
4754 f_disassociate(FP_RESULT);
4755 }
4756 MENDFUNC(0,dont_care_fflags,(void))
4757
4758 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4759 {
4760 s=f_readreg(s);
4761 d=f_writereg(d);
4762 raw_fsqrt_rr(d,s);
4763 f_unlock(s);
4764 f_unlock(d);
4765 }
4766 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4767
4768 MIDFUNC(2,fabs_rr,(FW d, FR s))
4769 {
4770 s=f_readreg(s);
4771 d=f_writereg(d);
4772 raw_fabs_rr(d,s);
4773 f_unlock(s);
4774 f_unlock(d);
4775 }
4776 MENDFUNC(2,fabs_rr,(FW d, FR s))
4777
4778 MIDFUNC(2,fsin_rr,(FW d, FR s))
4779 {
4780 s=f_readreg(s);
4781 d=f_writereg(d);
4782 raw_fsin_rr(d,s);
4783 f_unlock(s);
4784 f_unlock(d);
4785 }
4786 MENDFUNC(2,fsin_rr,(FW d, FR s))
4787
4788 MIDFUNC(2,fcos_rr,(FW d, FR s))
4789 {
4790 s=f_readreg(s);
4791 d=f_writereg(d);
4792 raw_fcos_rr(d,s);
4793 f_unlock(s);
4794 f_unlock(d);
4795 }
4796 MENDFUNC(2,fcos_rr,(FW d, FR s))
4797
4798 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4799 {
4800 s=f_readreg(s);
4801 d=f_writereg(d);
4802 raw_ftwotox_rr(d,s);
4803 f_unlock(s);
4804 f_unlock(d);
4805 }
4806 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4807
4808 MIDFUNC(2,fetox_rr,(FW d, FR s))
4809 {
4810 s=f_readreg(s);
4811 d=f_writereg(d);
4812 raw_fetox_rr(d,s);
4813 f_unlock(s);
4814 f_unlock(d);
4815 }
4816 MENDFUNC(2,fetox_rr,(FW d, FR s))
4817
4818 MIDFUNC(2,frndint_rr,(FW d, FR s))
4819 {
4820 s=f_readreg(s);
4821 d=f_writereg(d);
4822 raw_frndint_rr(d,s);
4823 f_unlock(s);
4824 f_unlock(d);
4825 }
4826 MENDFUNC(2,frndint_rr,(FW d, FR s))
4827
4828 MIDFUNC(2,flog2_rr,(FW d, FR s))
4829 {
4830 s=f_readreg(s);
4831 d=f_writereg(d);
4832 raw_flog2_rr(d,s);
4833 f_unlock(s);
4834 f_unlock(d);
4835 }
4836 MENDFUNC(2,flog2_rr,(FW d, FR s))
4837
4838 MIDFUNC(2,fneg_rr,(FW d, FR s))
4839 {
4840 s=f_readreg(s);
4841 d=f_writereg(d);
4842 raw_fneg_rr(d,s);
4843 f_unlock(s);
4844 f_unlock(d);
4845 }
4846 MENDFUNC(2,fneg_rr,(FW d, FR s))
4847
4848 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4849 {
4850 s=f_readreg(s);
4851 d=f_rmw(d);
4852 raw_fadd_rr(d,s);
4853 f_unlock(s);
4854 f_unlock(d);
4855 }
4856 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4857
4858 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4859 {
4860 s=f_readreg(s);
4861 d=f_rmw(d);
4862 raw_fsub_rr(d,s);
4863 f_unlock(s);
4864 f_unlock(d);
4865 }
4866 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4867
4868 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4869 {
4870 d=f_readreg(d);
4871 s=f_readreg(s);
4872 raw_fcmp_rr(d,s);
4873 f_unlock(s);
4874 f_unlock(d);
4875 }
4876 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4877
4878 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4879 {
4880 s=f_readreg(s);
4881 d=f_rmw(d);
4882 raw_fdiv_rr(d,s);
4883 f_unlock(s);
4884 f_unlock(d);
4885 }
4886 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4887
4888 MIDFUNC(2,frem_rr,(FRW d, FR s))
4889 {
4890 s=f_readreg(s);
4891 d=f_rmw(d);
4892 raw_frem_rr(d,s);
4893 f_unlock(s);
4894 f_unlock(d);
4895 }
4896 MENDFUNC(2,frem_rr,(FRW d, FR s))
4897
4898 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4899 {
4900 s=f_readreg(s);
4901 d=f_rmw(d);
4902 raw_frem1_rr(d,s);
4903 f_unlock(s);
4904 f_unlock(d);
4905 }
4906 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4907
4908 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4909 {
4910 s=f_readreg(s);
4911 d=f_rmw(d);
4912 raw_fmul_rr(d,s);
4913 f_unlock(s);
4914 f_unlock(d);
4915 }
4916 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4917
4918 /********************************************************************
4919 * Support functions exposed to gencomp. CREATE time *
4920 ********************************************************************/
4921
4922 void set_zero(int r, int tmp)
4923 {
4924 if (setzflg_uses_bsf)
4925 bsf_l_rr(r,r);
4926 else
4927 simulate_bsf(tmp,r);
4928 }
4929
4930 int kill_rodent(int r)
4931 {
4932 return KILLTHERAT &&
4933 have_rat_stall &&
4934 (live.state[r].status==INMEM ||
4935 live.state[r].status==CLEAN ||
4936 live.state[r].status==ISCONST ||
4937 live.state[r].dirtysize==4);
4938 }
4939
4940 uae_u32 get_const(int r)
4941 {
4942 Dif (!isconst(r)) {
4943 write_log("Register %d should be constant, but isn't\n",r);
4944 abort();
4945 }
4946 return live.state[r].val;
4947 }
4948
4949 void sync_m68k_pc(void)
4950 {
4951 if (m68k_pc_offset) {
4952 add_l_ri(PC_P,m68k_pc_offset);
4953 comp_pc_p+=m68k_pc_offset;
4954 m68k_pc_offset=0;
4955 }
4956 }
4957
4958 /********************************************************************
4959 * Scratch registers management *
4960 ********************************************************************/
4961
4962 struct scratch_t {
4963 uae_u32 regs[VREGS];
4964 fpu_register fregs[VFREGS];
4965 };
4966
4967 static scratch_t scratch;
4968
4969 /********************************************************************
4970 * Support functions exposed to newcpu *
4971 ********************************************************************/
4972
4973 static inline const char *str_on_off(bool b)
4974 {
4975 return b ? "on" : "off";
4976 }
4977
4978 void compiler_init(void)
4979 {
4980 static bool initialized = false;
4981 if (initialized)
4982 return;
4983
4984 #if JIT_DEBUG
4985 // JIT debug mode ?
4986 JITDebug = PrefsFindBool("jitdebug");
4987 #endif
4988 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4989
4990 #ifdef USE_JIT_FPU
4991 // Use JIT compiler for FPU instructions ?
4992 avoid_fpu = !PrefsFindBool("jitfpu");
4993 #else
4994 // JIT FPU is always disabled
4995 avoid_fpu = true;
4996 #endif
4997 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4998
4999 // Get size of the translation cache (in KB)
5000 cache_size = PrefsFindInt32("jitcachesize");
5001 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5002
5003 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5004 raw_init_cpu();
5005 setzflg_uses_bsf = target_check_bsf();
5006 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5007 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5008 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5009
5010 // Translation cache flush mechanism
5011 lazy_flush = PrefsFindBool("jitlazyflush");
5012 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5013 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5014
5015 // Compiler features
5016 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5017 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5018 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5019 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
5020 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5021
5022 // Build compiler tables
5023 build_comp();
5024
5025 initialized = true;
5026
5027 #if PROFILE_UNTRANSLATED_INSNS
5028 write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5029 #endif
5030
5031 #if PROFILE_COMPILE_TIME
5032 write_log("<JIT compiler> : gather statistics on translation time\n");
5033 emul_start_time = clock();
5034 #endif
5035 }
5036
5037 void compiler_exit(void)
5038 {
5039 #if PROFILE_COMPILE_TIME
5040 emul_end_time = clock();
5041 #endif
5042
5043 // Deallocate translation cache
5044 if (compiled_code) {
5045 vm_release(compiled_code, cache_size * 1024);
5046 compiled_code = 0;
5047 }
5048
5049 // Deallocate popallspace
5050 if (popallspace) {
5051 vm_release(popallspace, POPALLSPACE_SIZE);
5052 popallspace = 0;
5053 }
5054
5055 #if PROFILE_COMPILE_TIME
5056 write_log("### Compile Block statistics\n");
5057 write_log("Number of calls to compile_block : %d\n", compile_count);
5058 uae_u32 emul_time = emul_end_time - emul_start_time;
5059 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5060 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5061 100.0*double(compile_time)/double(emul_time));
5062 write_log("\n");
5063 #endif
5064
5065 #if PROFILE_UNTRANSLATED_INSNS
5066 uae_u64 untranslated_count = 0;
5067 for (int i = 0; i < 65536; i++) {
5068 opcode_nums[i] = i;
5069 untranslated_count += raw_cputbl_count[i];
5070 }
5071 write_log("Sorting out untranslated instructions count...\n");
5072 qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5073 write_log("\nRank Opc Count Name\n");
5074 for (int i = 0; i < untranslated_top_ten; i++) {
5075 uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5076 struct instr *dp;
5077 struct mnemolookup *lookup;
5078 if (!count)
5079 break;
5080 dp = table68k + opcode_nums[i];
5081 for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5082 ;
5083 write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5084 }
5085 #endif
5086
5087 #if RECORD_REGISTER_USAGE
5088 int reg_count_ids[16];
5089 uint64 tot_reg_count = 0;
5090 for (int i = 0; i < 16; i++) {
5091 reg_count_ids[i] = i;
5092 tot_reg_count += reg_count[i];
5093 }
5094 qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5095 uint64 cum_reg_count = 0;
5096 for (int i = 0; i < 16; i++) {
5097 int r = reg_count_ids[i];
5098 cum_reg_count += reg_count[r];
5099 printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5100 reg_count[r],
5101 100.0*double(reg_count[r])/double(tot_reg_count),
5102 100.0*double(cum_reg_count)/double(tot_reg_count));
5103 }
5104 #endif
5105 }
5106
5107 bool compiler_use_jit(void)
5108 {
5109 // Check for the "jit" prefs item
5110 if (!PrefsFindBool("jit"))
5111 return false;
5112
5113 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5114 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5115 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5116 return false;
5117 }
5118
5119 // FIXME: there are currently problems with JIT compilation and anything below a 68040
5120 if (CPUType < 4) {
5121 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5122 return false;
5123 }
5124
5125 return true;
5126 }
5127
5128 void init_comp(void)
5129 {
5130 int i;
5131 uae_s8* cb=can_byte;
5132 uae_s8* cw=can_word;
5133 uae_s8* au=always_used;
5134
5135 #if RECORD_REGISTER_USAGE
5136 for (i=0;i<16;i++)
5137 reg_count_local[i] = 0;
5138 #endif
5139
5140 for (i=0;i<VREGS;i++) {
5141 live.state[i].realreg=-1;
5142 live.state[i].needflush=NF_SCRATCH;
5143 live.state[i].val=0;
5144 set_status(i,UNDEF);
5145 }
5146
5147 for (i=0;i<VFREGS;i++) {
5148 live.fate[i].status=UNDEF;
5149 live.fate[i].realreg=-1;
5150 live.fate[i].needflush=NF_SCRATCH;
5151 }
5152
5153 for (i=0;i<VREGS;i++) {
5154 if (i<16) { /* First 16 registers map to 68k registers */
5155 live.state[i].mem=((uae_u32*)&regs)+i;
5156 live.state[i].needflush=NF_TOMEM;
5157 set_status(i,INMEM);
5158 }
5159 else
5160 live.state[i].mem=scratch.regs+i;
5161 }
5162 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5163 live.state[PC_P].needflush=NF_TOMEM;
5164 set_const(PC_P,(uintptr)comp_pc_p);
5165
5166 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5167 live.state[FLAGX].needflush=NF_TOMEM;
5168 set_status(FLAGX,INMEM);
5169
5170 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5171 live.state[FLAGTMP].needflush=NF_TOMEM;
5172 set_status(FLAGTMP,INMEM);
5173
5174 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5175 set_status(NEXT_HANDLER,UNDEF);
5176
5177 for (i=0;i<VFREGS;i++) {
5178 if (i<8) { /* First 8 registers map to 68k FPU registers */
5179 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5180 live.fate[i].needflush=NF_TOMEM;
5181 live.fate[i].status=INMEM;
5182 }
5183 else if (i==FP_RESULT) {
5184 live.fate[i].mem=(uae_u32*)(&fpu.result);
5185 live.fate[i].needflush=NF_TOMEM;
5186 live.fate[i].status=INMEM;
5187 }
5188 else
5189 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5190 }
5191
5192
5193 for (i=0;i<N_REGS;i++) {
5194 live.nat[i].touched=0;
5195 live.nat[i].nholds=0;
5196 live.nat[i].locked=0;
5197 if (*cb==i) {
5198 live.nat[i].canbyte=1; cb++;
5199 } else live.nat[i].canbyte=0;
5200 if (*cw==i) {
5201 live.nat[i].canword=1; cw++;
5202 } else live.nat[i].canword=0;
5203 if (*au==i) {
5204 live.nat[i].locked=1; au++;
5205 }
5206 }
5207
5208 for (i=0;i<N_FREGS;i++) {
5209 live.fat[i].touched=0;
5210 live.fat[i].nholds=0;
5211 live.fat[i].locked=0;
5212 }
5213
5214 touchcnt=1;
5215 m68k_pc_offset=0;
5216 live.flags_in_flags=TRASH;
5217 live.flags_on_stack=VALID;
5218 live.flags_are_important=1;
5219
5220 raw_fp_init();
5221 }
5222
5223 /* Only do this if you really mean it! The next call should be to init!*/
5224 void flush(int save_regs)
5225 {
5226 int fi,i;
5227
5228 log_flush();
5229 flush_flags(); /* low level */
5230 sync_m68k_pc(); /* mid level */
5231
5232 if (save_regs) {
5233 for (i=0;i<VFREGS;i++) {
5234 if (live.fate[i].needflush==NF_SCRATCH ||
5235 live.fate[i].status==CLEAN) {
5236 f_disassociate(i);
5237 }
5238 }
5239 for (i=0;i<VREGS;i++) {
5240 if (live.state[i].needflush==NF_TOMEM) {
5241 switch(live.state[i].status) {
5242 case INMEM:
5243 if (live.state[i].val) {
5244 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5245 log_vwrite(i);
5246 live.state[i].val=0;
5247 }
5248 break;
5249 case CLEAN:
5250 case DIRTY:
5251 remove_offset(i,-1); tomem(i); break;
5252 case ISCONST:
5253 if (i!=PC_P)
5254 writeback_const(i);
5255 break;
5256 default: break;
5257 }
5258 Dif (live.state[i].val && i!=PC_P) {
5259 write_log("Register %d still has val %x\n",
5260 i,live.state[i].val);
5261 }
5262 }
5263 }
5264 for (i=0;i<VFREGS;i++) {
5265 if (live.fate[i].needflush==NF_TOMEM &&
5266 live.fate[i].status==DIRTY) {
5267 f_evict(i);
5268 }
5269 }
5270 raw_fp_cleanup_drop();
5271 }
5272 if (needflags) {
5273 write_log("Warning! flush with needflags=1!\n");
5274 }
5275 }
5276
5277 static void flush_keepflags(void)
5278 {
5279 int fi,i;
5280
5281 for (i=0;i<VFREGS;i++) {
5282 if (live.fate[i].needflush==NF_SCRATCH ||
5283 live.fate[i].status==CLEAN) {
5284 f_disassociate(i);
5285 }
5286 }
5287 for (i=0;i<VREGS;i++) {
5288 if (live.state[i].needflush==NF_TOMEM) {
5289 switch(live.state[i].status) {
5290 case INMEM:
5291 /* Can't adjust the offset here --- that needs "add" */
5292 break;
5293 case CLEAN:
5294 case DIRTY:
5295 remove_offset(i,-1); tomem(i); break;
5296 case ISCONST:
5297 if (i!=PC_P)
5298 writeback_const(i);
5299 break;
5300 default: break;
5301 }
5302 }
5303 }
5304 for (i=0;i<VFREGS;i++) {
5305 if (live.fate[i].needflush==NF_TOMEM &&
5306 live.fate[i].status==DIRTY) {
5307 f_evict(i);
5308 }
5309 }
5310 raw_fp_cleanup_drop();
5311 }
5312
5313 void freescratch(void)
5314 {
5315 int i;
5316 for (i=0;i<N_REGS;i++)
5317 if (live.nat[i].locked && i!=4)
5318 write_log("Warning! %d is locked\n",i);
5319
5320 for (i=0;i<VREGS;i++)
5321 if (live.state[i].needflush==NF_SCRATCH) {
5322 forget_about(i);
5323 }
5324
5325 for (i=0;i<VFREGS;i++)
5326 if (live.fate[i].needflush==NF_SCRATCH) {
5327 f_forget_about(i);
5328 }
5329 }
5330
5331 /********************************************************************
5332 * Support functions, internal *
5333 ********************************************************************/
5334
5335
5336 static void align_target(uae_u32 a)
5337 {
5338 if (!a)
5339 return;
5340
5341 if (tune_nop_fillers)
5342 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5343 else {
5344 /* Fill with NOPs --- makes debugging with gdb easier */
5345 while ((uintptr)target&(a-1))
5346 *target++=0x90;
5347 }
5348 }
5349
5350 static __inline__ int isinrom(uintptr addr)
5351 {
5352 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5353 }
5354
5355 static void flush_all(void)
5356 {
5357 int i;
5358
5359 log_flush();
5360 for (i=0;i<VREGS;i++)
5361 if (live.state[i].status==DIRTY) {
5362 if (!call_saved[live.state[i].realreg]) {
5363 tomem(i);
5364 }
5365 }
5366 for (i=0;i<VFREGS;i++)
5367 if (f_isinreg(i))
5368 f_evict(i);
5369 raw_fp_cleanup_drop();
5370 }
5371
5372 /* Make sure all registers that will get clobbered by a call are
5373 save and sound in memory */
5374 static void prepare_for_call_1(void)
5375 {
5376 flush_all(); /* If there are registers that don't get clobbered,
5377 * we should be a bit more selective here */
5378 }
5379
5380 /* We will call a C routine in a moment. That will clobber all registers,
5381 so we need to disassociate everything */
5382 static void prepare_for_call_2(void)
5383 {
5384 int i;
5385 for (i=0;i<N_REGS;i++)
5386 if (!call_saved[i] && live.nat[i].nholds>0)
5387 free_nreg(i);
5388
5389 for (i=0;i<N_FREGS;i++)
5390 if (live.fat[i].nholds>0)
5391 f_free_nreg(i);
5392
5393 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5394 flags at the very start of the call_r
5395 functions! */
5396 }
5397
5398 /********************************************************************
5399 * Memory access and related functions, CREATE time *
5400 ********************************************************************/
5401
5402 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5403 {
5404 next_pc_p=not_taken;
5405 taken_pc_p=taken;
5406 branch_cc=cond;
5407 }
5408
5409
5410 static uae_u32 get_handler_address(uae_u32 addr)
5411 {
5412 uae_u32 cl=cacheline(addr);
5413 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5414 return (uintptr)&(bi->direct_handler_to_use);
5415 }
5416
5417 static uae_u32 get_handler(uae_u32 addr)
5418 {
5419 uae_u32 cl=cacheline(addr);
5420 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5421 return (uintptr)bi->direct_handler_to_use;
5422 }
5423
5424 static void load_handler(int reg, uae_u32 addr)
5425 {
5426 mov_l_rm(reg,get_handler_address(addr));
5427 }
5428
5429 /* This version assumes that it is writing *real* memory, and *will* fail
5430 * if that assumption is wrong! No branches, no second chances, just
5431 * straight go-for-it attitude */
5432
5433 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5434 {
5435 int f=tmp;
5436
5437 if (clobber)
5438 f=source;
5439
5440 #if SIZEOF_VOID_P == 8
5441 if (!ThirtyThreeBitAddressing)
5442 sign_extend_32_rr(address, address);
5443 #endif
5444
5445 switch(size) {
5446 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5447 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5448 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5449 }
5450 forget_about(tmp);
5451 forget_about(f);
5452 }
5453
5454 void writebyte(int address, int source, int tmp)
5455 {
5456 writemem_real(address,source,1,tmp,0);
5457 }
5458
5459 static __inline__ void writeword_general(int address, int source, int tmp,
5460 int clobber)
5461 {
5462 writemem_real(address,source,2,tmp,clobber);
5463 }
5464
5465 void writeword_clobber(int address, int source, int tmp)
5466 {
5467 writeword_general(address,source,tmp,1);
5468 }
5469
5470 void writeword(int address, int source, int tmp)
5471 {
5472 writeword_general(address,source,tmp,0);
5473 }
5474
5475 static __inline__ void writelong_general(int address, int source, int tmp,
5476 int clobber)
5477 {
5478 writemem_real(address,source,4,tmp,clobber);
5479 }
5480
5481 void writelong_clobber(int address, int source, int tmp)
5482 {
5483 writelong_general(address,source,tmp,1);
5484 }
5485
5486 void writelong(int address, int source, int tmp)
5487 {
5488 writelong_general(address,source,tmp,0);
5489 }
5490
5491
5492
5493 /* This version assumes that it is reading *real* memory, and *will* fail
5494 * if that assumption is wrong! No branches, no second chances, just
5495 * straight go-for-it attitude */
5496
5497 static void readmem_real(int address, int dest, int size, int tmp)
5498 {
5499 int f=tmp;
5500
5501 if (size==4 && address!=dest)
5502 f=dest;
5503
5504 #if SIZEOF_VOID_P == 8
5505 if (!ThirtyThreeBitAddressing)
5506 sign_extend_32_rr(address, address);
5507 #endif
5508
5509 switch(size) {
5510 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5511 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5512 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5513 }
5514 forget_about(tmp);
5515 }
5516
5517 void readbyte(int address, int dest, int tmp)
5518 {
5519 readmem_real(address,dest,1,tmp);
5520 }
5521
5522 void readword(int address, int dest, int tmp)
5523 {
5524 readmem_real(address,dest,2,tmp);
5525 }
5526
5527 void readlong(int address, int dest, int tmp)
5528 {
5529 readmem_real(address,dest,4,tmp);
5530 }
5531
5532 void get_n_addr(int address, int dest, int tmp)
5533 {
5534 // a is the register containing the virtual address
5535 // after the offset had been fetched
5536 int a=tmp;
5537
5538 // f is the register that will contain the offset
5539 int f=tmp;
5540
5541 // a == f == tmp if (address == dest)
5542 if (address!=dest) {
5543 a=address;
5544 f=dest;
5545 }
5546
5547 #if REAL_ADDRESSING
5548 mov_l_rr(dest, address);
5549 #elif DIRECT_ADDRESSING
5550 lea_l_brr(dest,address,MEMBaseDiff);
5551 #endif
5552 forget_about(tmp);
5553 }
5554
5555 void get_n_addr_jmp(int address, int dest, int tmp)
5556 {
5557 /* For this, we need to get the same address as the rest of UAE
5558 would --- otherwise we end up translating everything twice */
5559 get_n_addr(address,dest,tmp);
5560 }
5561
5562
5563 /* base is a register, but dp is an actual value.
5564 target is a register, as is tmp */
5565 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5566 {
5567 int reg = (dp >> 12) & 15;
5568 int regd_shift=(dp >> 9) & 3;
5569
5570 if (dp & 0x100) {
5571 int ignorebase=(dp&0x80);
5572 int ignorereg=(dp&0x40);
5573 int addbase=0;
5574 int outer=0;
5575
5576 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5577 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5578
5579 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5580 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5581
5582 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5583 if (!ignorereg) {
5584 if ((dp & 0x800) == 0)
5585 sign_extend_16_rr(target,reg);
5586 else
5587 mov_l_rr(target,reg);
5588 shll_l_ri(target,regd_shift);
5589 }
5590 else
5591 mov_l_ri(target,0);
5592
5593 /* target is now regd */
5594 if (!ignorebase)
5595 add_l(target,base);
5596 add_l_ri(target,addbase);
5597 if (dp&0x03) readlong(target,target,tmp);
5598 } else { /* do the getlong first, then add regd */
5599 if (!ignorebase) {
5600 mov_l_rr(target,base);
5601 add_l_ri(target,addbase);
5602 }
5603 else
5604 mov_l_ri(target,addbase);
5605 if (dp&0x03) readlong(target,target,tmp);
5606
5607 if (!ignorereg) {
5608 if ((dp & 0x800) == 0)
5609 sign_extend_16_rr(tmp,reg);
5610 else
5611 mov_l_rr(tmp,reg);
5612 shll_l_ri(tmp,regd_shift);
5613 /* tmp is now regd */
5614 add_l(target,tmp);
5615 }
5616 }
5617 add_l_ri(target,outer);
5618 }
5619 else { /* 68000 version */
5620 if ((dp & 0x800) == 0) { /* Sign extend */
5621 sign_extend_16_rr(target,reg);
5622 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5623 }
5624 else {
5625 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5626 }
5627 }
5628 forget_about(tmp);
5629 }
5630
5631
5632
5633
5634
5635 void set_cache_state(int enabled)
5636 {
5637 if (enabled!=letit)
5638 flush_icache_hard(77);
5639 letit=enabled;
5640 }
5641
5642 int get_cache_state(void)
5643 {
5644 return letit;
5645 }
5646
5647 uae_u32 get_jitted_size(void)
5648 {
5649 if (compiled_code)
5650 return current_compile_p-compiled_code;
5651 return 0;
5652 }
5653
5654 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5655 const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5656
5657 static uint8 *do_alloc_code(uint32 size, int depth)
5658 {
5659 #if defined(__linux__) && 0
5660 /*
5661 This is a really awful hack that is known to work on Linux at
5662 least.
5663
5664 The trick here is to make sure the allocated cache is nearby
5665 code segment, and more precisely in the positive half of a
5666 32-bit address space. i.e. addr < 0x80000000. Actually, it
5667 turned out that a 32-bit binary run on AMD64 yields a cache
5668 allocated around 0xa0000000, thus causing some troubles when
5669 translating addresses from m68k to x86.
5670 */
5671 static uint8 * code_base = NULL;
5672 if (code_base == NULL) {
5673 uintptr page_size = getpagesize();
5674 uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5675 if (boundaries < page_size)
5676 boundaries = page_size;
5677 code_base = (uint8 *)sbrk(0);
5678 for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5679 if (vm_acquire_fixed(code_base, size) == 0) {
5680 uint8 *code = code_base;
5681 code_base += size;
5682 return code;
5683 }
5684 code_base += boundaries;
5685 }
5686 return NULL;
5687 }
5688
5689 if (vm_acquire_fixed(code_base, size) == 0) {
5690 uint8 *code = code_base;
5691 code_base += size;
5692 return code;
5693 }
5694
5695 if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5696 return NULL;
5697
5698 return do_alloc_code(size, depth + 1);
5699 #else
5700 uint8 *code = (uint8 *)vm_acquire(size);
5701 return code == VM_MAP_FAILED ? NULL : code;
5702 #endif
5703 }
5704
5705 static inline uint8 *alloc_code(uint32 size)
5706 {
5707 return do_alloc_code(size, 0);
5708 }
5709
5710 void alloc_cache(void)
5711 {
5712 if (compiled_code) {
5713 flush_icache_hard(6);
5714 vm_release(compiled_code, cache_size * 1024);
5715 compiled_code = 0;
5716 }
5717
5718 if (cache_size == 0)
5719 return;
5720
5721 while (!compiled_code && cache_size) {
5722 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5723 compiled_code = 0;
5724 cache_size /= 2;
5725 }
5726 }
5727 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5728
5729 if (compiled_code) {
5730 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5731 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5732 current_compile_p = compiled_code;
5733 current_cache_size = 0;
5734 }
5735 }
5736
5737
5738
5739 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5740
5741 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5742 {
5743 uae_u32 k1 = 0;
5744 uae_u32 k2 = 0;
5745
5746 #if USE_CHECKSUM_INFO
5747 checksum_info *csi = bi->csi;
5748 Dif(!csi) abort();
5749 while (csi) {
5750 uae_s32 len = csi->length;
5751 uintptr tmp = (uintptr)csi->start_p;
5752 #else
5753 uae_s32 len = bi->len;
5754 uintptr tmp = (uintptr)bi->min_pcp;
5755 #endif
5756 uae_u32*pos;
5757
5758 len += (tmp & 3);
5759 tmp &= ~((uintptr)3);
5760 pos = (uae_u32 *)tmp;
5761
5762 if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5763 while (len > 0) {
5764 k1 += *pos;
5765 k2 ^= *pos;
5766 pos++;
5767 len -= 4;
5768 }
5769 }
5770
5771 #if USE_CHECKSUM_INFO
5772 csi = csi->next;
5773 }
5774 #endif
5775
5776 *c1 = k1;
5777 *c2 = k2;
5778 }
5779
5780 #if 0
5781 static void show_checksum(CSI_TYPE* csi)
5782 {
5783 uae_u32 k1=0;
5784 uae_u32 k2=0;
5785 uae_s32 len=CSI_LENGTH(csi);
5786 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5787 uae_u32* pos;
5788
5789 len+=(tmp&3);
5790 tmp&=(~3);
5791 pos=(uae_u32*)tmp;
5792
5793 if (len<0 || len>MAX_CHECKSUM_LEN) {
5794 return;
5795 }
5796 else {
5797 while (len>0) {
5798 write_log("%08x ",*pos);
5799 pos++;
5800 len-=4;
5801 }
5802 write_log(" bla\n");
5803 }
5804 }
5805 #endif
5806
5807
5808 int check_for_cache_miss(void)
5809 {
5810 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5811
5812 if (bi) {
5813 int cl=cacheline(regs.pc_p);
5814 if (bi!=cache_tags[cl+1].bi) {
5815 raise_in_cl_list(bi);
5816 return 1;
5817 }
5818 }
5819 return 0;
5820 }
5821
5822
5823 static void recompile_block(void)
5824 {
5825 /* An existing block's countdown code has expired. We need to make
5826 sure that execute_normal doesn't refuse to recompile due to a
5827 perceived cache miss... */
5828 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5829
5830 Dif (!bi)
5831 abort();
5832 raise_in_cl_list(bi);
5833 execute_normal();
5834 return;
5835 }
5836 static void cache_miss(void)
5837 {
5838 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5839 uae_u32 cl=cacheline(regs.pc_p);
5840 blockinfo* bi2=get_blockinfo(cl);
5841
5842 if (!bi) {
5843 execute_normal(); /* Compile this block now */
5844 return;
5845 }
5846 Dif (!bi2 || bi==bi2) {
5847 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5848 abort();
5849 }
5850 raise_in_cl_list(bi);
5851 return;
5852 }
5853
5854 static int called_check_checksum(blockinfo* bi);
5855
5856 static inline int block_check_checksum(blockinfo* bi)
5857 {
5858 uae_u32 c1,c2;
5859 bool isgood;
5860
5861 if (bi->status!=BI_NEED_CHECK)
5862 return 1; /* This block is in a checked state */
5863
5864 checksum_count++;
5865
5866 if (bi->c1 || bi->c2)
5867 calc_checksum(bi,&c1,&c2);
5868 else {
5869 c1=c2=1; /* Make sure it doesn't match */
5870 }
5871
5872 isgood=(c1==bi->c1 && c2==bi->c2);
5873
5874 if (isgood) {
5875 /* This block is still OK. So we reactivate. Of course, that
5876 means we have to move it into the needs-to-be-flushed list */
5877 bi->handler_to_use=bi->handler;
5878 set_dhtu(bi,bi->direct_handler);
5879 bi->status=BI_CHECKING;
5880 isgood=called_check_checksum(bi);
5881 }
5882 if (isgood) {
5883 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5884 c1,c2,bi->c1,bi->c2);*/
5885 remove_from_list(bi);
5886 add_to_active(bi);
5887 raise_in_cl_list(bi);
5888 bi->status=BI_ACTIVE;
5889 }
5890 else {
5891 /* This block actually changed. We need to invalidate it,
5892 and set it up to be recompiled */
5893 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5894 c1,c2,bi->c1,bi->c2); */
5895 invalidate_block(bi);
5896 raise_in_cl_list(bi);
5897 }
5898 return isgood;
5899 }
5900
5901 static int called_check_checksum(blockinfo* bi)
5902 {
5903 dependency* x=bi->deplist;
5904 int isgood=1;
5905 int i;
5906
5907 for (i=0;i<2 && isgood;i++) {
5908 if (bi->dep[i].jmp_off) {
5909 isgood=block_check_checksum(bi->dep[i].target);
5910 }
5911 }
5912 return isgood;
5913 }
5914
5915 static void check_checksum(void)
5916 {
5917 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5918 uae_u32 cl=cacheline(regs.pc_p);
5919 blockinfo* bi2=get_blockinfo(cl);
5920
5921 /* These are not the droids you are looking for... */
5922 if (!bi) {
5923 /* Whoever is the primary target is in a dormant state, but
5924 calling it was accidental, and we should just compile this
5925 new block */
5926 execute_normal();
5927 return;
5928 }
5929 if (bi!=bi2) {
5930 /* The block was hit accidentally, but it does exist. Cache miss */
5931 cache_miss();
5932 return;
5933 }
5934
5935 if (!block_check_checksum(bi))
5936 execute_normal();
5937 }
5938
5939 static __inline__ void match_states(blockinfo* bi)
5940 {
5941 int i;
5942 smallstate* s=&(bi->env);
5943
5944 if (bi->status==BI_NEED_CHECK) {
5945 block_check_checksum(bi);
5946 }
5947 if (bi->status==BI_ACTIVE ||
5948 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5949 block makes (about not using
5950 certain vregs) */
5951 for (i=0;i<16;i++) {
5952 if (s->virt[i]==L_UNNEEDED) {
5953 // write_log("unneeded reg %d at %p\n",i,target);
5954 COMPCALL(forget_about)(i); // FIXME
5955 }
5956 }
5957 }
5958 flush(1);
5959
5960 /* And now deal with the *demands* the block makes */
5961 for (i=0;i<N_REGS;i++) {
5962 int v=s->nat[i];
5963 if (v>=0) {
5964 // printf("Loading reg %d into %d at %p\n",v,i,target);
5965 readreg_specific(v,4,i);
5966 // do_load_reg(i,v);
5967 // setlock(i);
5968 }
5969 }
5970 for (i=0;i<N_REGS;i++) {
5971 int v=s->nat[i];
5972 if (v>=0) {
5973 unlock2(i);
5974 }
5975 }
5976 }
5977
5978 static __inline__ void create_popalls(void)
5979 {
5980 int i,r;
5981
5982 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5983 write_log("FATAL: Could not allocate popallspace!\n");
5984 abort();
5985 }
5986 vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5987
5988 current_compile_p=popallspace;
5989 set_target(current_compile_p);
5990 #if USE_PUSH_POP
5991 /* If we can't use gcc inline assembly, we need to pop some
5992 registers before jumping back to the various get-out routines.
5993 This generates the code for it.
5994 */
5995 align_target(align_jumps);
5996 popall_do_nothing=get_target();
5997 for (i=0;i<N_REGS;i++) {
5998 if (need_to_preserve[i])
5999 raw_pop_l_r(i);
6000 }
6001 raw_jmp((uintptr)do_nothing);
6002
6003 align_target(align_jumps);
6004 popall_execute_normal=get_target();
6005 for (i=0;i<N_REGS;i++) {
6006 if (need_to_preserve[i])
6007 raw_pop_l_r(i);
6008 }
6009 raw_jmp((uintptr)execute_normal);
6010
6011 align_target(align_jumps);
6012 popall_cache_miss=get_target();
6013 for (i=0;i<N_REGS;i++) {
6014 if (need_to_preserve[i])
6015 raw_pop_l_r(i);
6016 }
6017 raw_jmp((uintptr)cache_miss);
6018
6019 align_target(align_jumps);
6020 popall_recompile_block=get_target();
6021 for (i=0;i<N_REGS;i++) {
6022 if (need_to_preserve[i])
6023 raw_pop_l_r(i);
6024 }
6025 raw_jmp((uintptr)recompile_block);
6026
6027 align_target(align_jumps);
6028 popall_exec_nostats=get_target();
6029 for (i=0;i<N_REGS;i++) {
6030 if (need_to_preserve[i])
6031 raw_pop_l_r(i);
6032 }
6033 raw_jmp((uintptr)exec_nostats);
6034
6035 align_target(align_jumps);
6036 popall_check_checksum=get_target();
6037 for (i=0;i<N_REGS;i++) {
6038 if (need_to_preserve[i])
6039 raw_pop_l_r(i);
6040 }
6041 raw_jmp((uintptr)check_checksum);
6042
6043 align_target(align_jumps);
6044 current_compile_p=get_target();
6045 #else
6046 popall_exec_nostats=(void *)exec_nostats;
6047 popall_execute_normal=(void *)execute_normal;
6048 popall_cache_miss=(void *)cache_miss;
6049 popall_recompile_block=(void *)recompile_block;
6050 popall_do_nothing=(void *)do_nothing;
6051 popall_check_checksum=(void *)check_checksum;
6052 #endif
6053
6054 /* And now, the code to do the matching pushes and then jump
6055 into a handler routine */
6056 pushall_call_handler=get_target();
6057 #if USE_PUSH_POP
6058 for (i=N_REGS;i--;) {
6059 if (need_to_preserve[i])
6060 raw_push_l_r(i);
6061 }
6062 #endif
6063 r=REG_PC_TMP;
6064 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6065 raw_and_l_ri(r,TAGMASK);
6066 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6067
6068 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
6069 align_target(align_jumps);
6070 m68k_compile_execute = (void (*)(void))get_target();
6071 for (i=N_REGS;i--;) {
6072 if (need_to_preserve[i])
6073 raw_push_l_r(i);
6074 }
6075 align_target(align_loops);
6076 uae_u32 dispatch_loop = (uintptr)get_target();
6077 r=REG_PC_TMP;
6078 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6079 raw_and_l_ri(r,TAGMASK);
6080 raw_call_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6081 raw_cmp_l_mi((uintptr)&regs.spcflags,0);
6082 raw_jcc_b_oponly(NATIVE_CC_EQ);
6083 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6084 raw_call((uintptr)m68k_do_specialties);
6085 raw_test_l_rr(REG_RESULT,REG_RESULT);
6086 raw_jcc_b_oponly(NATIVE_CC_EQ);
6087 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6088 raw_cmp_b_mi((uintptr)&quit_program,0);
6089 raw_jcc_b_oponly(NATIVE_CC_EQ);
6090 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6091 for (i=0;i<N_REGS;i++) {
6092 if (need_to_preserve[i])
6093 raw_pop_l_r(i);
6094 }
6095 raw_ret();
6096 #endif
6097
6098 // no need to further write into popallspace
6099 vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6100 }
6101
6102 static __inline__ void reset_lists(void)
6103 {
6104 int i;
6105
6106 for (i=0;i<MAX_HOLD_BI;i++)
6107 hold_bi[i]=NULL;
6108 active=NULL;
6109 dormant=NULL;
6110 }
6111
6112 static void prepare_block(blockinfo* bi)
6113 {
6114 int i;
6115
6116 set_target(current_compile_p);
6117 align_target(align_jumps);
6118 bi->direct_pen=(cpuop_func *)get_target();
6119 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6120 raw_mov_l_mr((uintptr)&regs.pc_p,0);
6121 raw_jmp((uintptr)popall_execute_normal);
6122
6123 align_target(align_jumps);
6124 bi->direct_pcc=(cpuop_func *)get_target();
6125 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6126 raw_mov_l_mr((uintptr)&regs.pc_p,0);
6127 raw_jmp((uintptr)popall_check_checksum);
6128 current_compile_p=get_target();
6129
6130 bi->deplist=NULL;
6131 for (i=0;i<2;i++) {
6132 bi->dep[i].prev_p=NULL;
6133 bi->dep[i].next=NULL;
6134 }
6135 bi->env=default_ss;
6136 bi->status=BI_INVALID;
6137 bi->havestate=0;
6138 //bi->env=empty_ss;
6139 }
6140
6141 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6142 static inline void reset_compop(int opcode)
6143 {
6144 compfunctbl[opcode] = NULL;
6145 nfcompfunctbl[opcode] = NULL;
6146 }
6147
6148 static int read_opcode(const char *p)
6149 {
6150 int opcode = 0;
6151 for (int i = 0; i < 4; i++) {
6152 int op = p[i];
6153 switch (op) {
6154 case '0': case '1': case '2': case '3': case '4':
6155 case '5': case '6': case '7': case '8': case '9':
6156 opcode = (opcode << 4) | (op - '0');
6157 break;
6158 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6159 opcode = (opcode << 4) | ((op - 'a') + 10);
6160 break;
6161 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6162 opcode = (opcode << 4) | ((op - 'A') + 10);
6163 break;
6164 default:
6165 return -1;
6166 }
6167 }
6168 return opcode;
6169 }
6170
6171 static bool merge_blacklist()
6172 {
6173 const char *blacklist = PrefsFindString("jitblacklist");
6174 if (blacklist) {
6175 const char *p = blacklist;
6176 for (;;) {
6177 if (*p == 0)
6178 return true;
6179
6180 int opcode1 = read_opcode(p);
6181 if (opcode1 < 0)
6182 return false;
6183 p += 4;
6184
6185 int opcode2 = opcode1;
6186 if (*p == '-') {
6187 p++;
6188 opcode2 = read_opcode(p);
6189 if (opcode2 < 0)
6190 return false;
6191 p += 4;
6192 }
6193
6194 if (*p == 0 || *p == ';') {
6195 write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6196 for (int opcode = opcode1; opcode <= opcode2; opcode++)
6197 reset_compop(cft_map(opcode));
6198
6199 if (*p++ == ';')
6200 continue;
6201
6202 return true;
6203 }
6204
6205 return false;
6206 }
6207 }
6208 return true;
6209 }
6210
6211 void build_comp(void)
6212 {
6213 int i;
6214 int jumpcount=0;
6215 unsigned long opcode;
6216 struct comptbl* tbl=op_smalltbl_0_comp_ff;
6217 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6218 int count;
6219 int cpu_level = 0; // 68000 (default)
6220 if (CPUType == 4)
6221 cpu_level = 4; // 68040 with FPU
6222 else {
6223 if (FPUType)
6224 cpu_level = 3; // 68020 with FPU
6225 else if (CPUType >= 2)
6226 cpu_level = 2; // 68020
6227 else if (CPUType == 1)
6228 cpu_level = 1;
6229 }
6230 struct cputbl *nfctbl = (
6231 cpu_level == 4 ? op_smalltbl_0_nf
6232 : cpu_level == 3 ? op_smalltbl_1_nf
6233 : cpu_level == 2 ? op_smalltbl_2_nf
6234 : cpu_level == 1 ? op_smalltbl_3_nf
6235 : op_smalltbl_4_nf);
6236
6237 write_log ("<JIT compiler> : building compiler function tables\n");
6238
6239 for (opcode = 0; opcode < 65536; opcode++) {
6240 reset_compop(opcode);
6241 nfcpufunctbl[opcode] = op_illg_1;
6242 prop[opcode].use_flags = 0x1f;
6243 prop[opcode].set_flags = 0x1f;
6244 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6245 }
6246
6247 for (i = 0; tbl[i].opcode < 65536; i++) {
6248 int cflow = table68k[tbl[i].opcode].cflow;
6249 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6250 cflow = fl_const_jump;
6251 else
6252 cflow &= ~fl_const_jump;
6253 prop[cft_map(tbl[i].opcode)].cflow = cflow;
6254
6255 int uses_fpu = tbl[i].specific & 32;
6256 if (uses_fpu && avoid_fpu)
6257 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6258 else
6259 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6260 }
6261
6262 for (i = 0; nftbl[i].opcode < 65536; i++) {
6263 int uses_fpu = tbl[i].specific & 32;
6264 if (uses_fpu && avoid_fpu)
6265 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6266 else
6267 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6268
6269 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6270 }
6271
6272 for (i = 0; nfctbl[i].handler; i++) {
6273 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6274 }
6275
6276 for (opcode = 0; opcode < 65536; opcode++) {
6277 compop_func *f;
6278 compop_func *nff;
6279 cpuop_func *nfcf;
6280 int isaddx,cflow;
6281
6282 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6283 continue;
6284
6285 if (table68k[opcode].handler != -1) {
6286 f = compfunctbl[cft_map(table68k[opcode].handler)];
6287 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6288 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6289 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6290 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6291 prop[cft_map(opcode)].cflow = cflow;
6292 prop[cft_map(opcode)].is_addx = isaddx;
6293 compfunctbl[cft_map(opcode)] = f;
6294 nfcompfunctbl[cft_map(opcode)] = nff;
6295 Dif (nfcf == op_illg_1)
6296 abort();
6297 nfcpufunctbl[cft_map(opcode)] = nfcf;
6298 }
6299 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6300 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6301 }
6302 for (i = 0; nfctbl[i].handler != NULL; i++) {
6303 if (nfctbl[i].specific)
6304 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6305 }
6306
6307 /* Merge in blacklist */
6308 if (!merge_blacklist())
6309 write_log("<JIT compiler> : blacklist merge failure!\n");
6310
6311 count=0;
6312 for (opcode = 0; opcode < 65536; opcode++) {
6313 if (compfunctbl[cft_map(opcode)])
6314 count++;
6315 }
6316 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6317
6318 /* Initialise state */
6319 create_popalls();
6320 alloc_cache();
6321 reset_lists();
6322
6323 for (i=0;i<TAGSIZE;i+=2) {
6324 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6325 cache_tags[i+1].bi=NULL;
6326 }
6327
6328 #if 0
6329 for (i=0;i<N_REGS;i++) {
6330 empty_ss.nat[i].holds=-1;
6331 empty_ss.nat[i].validsize=0;
6332 empty_ss.nat[i].dirtysize=0;
6333 }
6334 #endif
6335 for (i=0;i<VREGS;i++) {
6336 empty_ss.virt[i]=L_NEEDED;
6337 }
6338 for (i=0;i<N_REGS;i++) {
6339 empty_ss.nat[i]=L_UNKNOWN;
6340 }
6341 default_ss=empty_ss;
6342 }
6343
6344
6345 static void flush_icache_none(int n)
6346 {
6347 /* Nothing to do. */
6348 }
6349
6350 static void flush_icache_hard(int n)
6351 {
6352 uae_u32 i;
6353 blockinfo* bi, *dbi;
6354
6355 hard_flush_count++;
6356 #if 0
6357 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6358 n,regs.pc,regs.pc_p,current_cache_size/1024);
6359 current_cache_size = 0;
6360 #endif
6361 bi=active;
6362 while(bi) {
6363 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6364 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6365 dbi=bi; bi=bi->next;
6366 free_blockinfo(dbi);
6367 }
6368 bi=dormant;
6369 while(bi) {
6370 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6371 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6372 dbi=bi; bi=bi->next;
6373 free_blockinfo(dbi);
6374 }
6375
6376 reset_lists();
6377 if (!compiled_code)
6378 return;
6379 current_compile_p=compiled_code;
6380 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6381 }
6382
6383
6384 /* "Soft flushing" --- instead of actually throwing everything away,
6385 we simply mark everything as "needs to be checked".
6386 */
6387
6388 static inline void flush_icache_lazy(int n)
6389 {
6390 uae_u32 i;
6391 blockinfo* bi;
6392 blockinfo* bi2;
6393
6394 soft_flush_count++;
6395 if (!active)
6396 return;
6397
6398 bi=active;
6399 while (bi) {
6400 uae_u32 cl=cacheline(bi->pc_p);
6401 if (bi->status==BI_INVALID ||
6402 bi->status==BI_NEED_RECOMP) {
6403 if (bi==cache_tags[cl+1].bi)
6404 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6405 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6406 set_dhtu(bi,bi->direct_pen);
6407 bi->status=BI_INVALID;
6408 }
6409 else {
6410 if (bi==cache_tags[cl+1].bi)
6411 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6412 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6413 set_dhtu(bi,bi->direct_pcc);
6414 bi->status=BI_NEED_CHECK;
6415 }
6416 bi2=bi;
6417 bi=bi->next;
6418 }
6419 /* bi2 is now the last entry in the active list */
6420 bi2->next=dormant;
6421 if (dormant)
6422 dormant->prev_p=&(bi2->next);
6423
6424 dormant=active;
6425 active->prev_p=&dormant;
6426 active=NULL;
6427 }
6428
6429 void flush_icache_range(uae_u32 start, uae_u32 length)
6430 {
6431 if (!active)
6432 return;
6433
6434 #if LAZY_FLUSH_ICACHE_RANGE
6435 uae_u8 *start_p = get_real_address(start);
6436 blockinfo *bi = active;
6437 while (bi) {
6438 #if USE_CHECKSUM_INFO
6439 bool invalidate = false;
6440 for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6441 invalidate = (((start_p - csi->start_p) < csi->length) ||
6442 ((csi->start_p - start_p) < length));
6443 #else
6444 // Assume system is consistent and would invalidate the right range
6445 const bool invalidate = (bi->pc_p - start_p) < length;
6446 #endif
6447 if (invalidate) {
6448 uae_u32 cl = cacheline(bi->pc_p);
6449 if (bi == cache_tags[cl + 1].bi)
6450 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6451 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6452 set_dhtu(bi, bi->direct_pen);
6453 bi->status = BI_NEED_RECOMP;
6454 }
6455 bi = bi->next;
6456 }
6457 return;
6458 #endif
6459 flush_icache(-1);
6460 }
6461
6462 static void catastrophe(void)
6463 {
6464 abort();
6465 }
6466
6467 int failure;
6468
6469 #define TARGET_M68K 0
6470 #define TARGET_POWERPC 1
6471 #define TARGET_X86 2
6472 #define TARGET_X86_64 3
6473 #if defined(i386) || defined(__i386__)
6474 #define TARGET_NATIVE TARGET_X86
6475 #endif
6476 #if defined(powerpc) || defined(__powerpc__)
6477 #define TARGET_NATIVE TARGET_POWERPC
6478 #endif
6479 #if defined(x86_64) || defined(__x86_64__)
6480 #define TARGET_NATIVE TARGET_X86_64
6481 #endif
6482
6483 #ifdef ENABLE_MON
6484 static uae_u32 mon_read_byte_jit(uintptr addr)
6485 {
6486 uae_u8 *m = (uae_u8 *)addr;
6487 return (uintptr)(*m);
6488 }
6489
6490 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6491 {
6492 uae_u8 *m = (uae_u8 *)addr;
6493 *m = b;
6494 }
6495 #endif
6496
6497 void disasm_block(int target, uint8 * start, size_t length)
6498 {
6499 if (!JITDebug)
6500 return;
6501
6502 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6503 char disasm_str[200];
6504 sprintf(disasm_str, "%s $%x $%x",
6505 target == TARGET_M68K ? "d68" :
6506 target == TARGET_X86 ? "d86" :
6507 target == TARGET_X86_64 ? "d8664" :
6508 target == TARGET_POWERPC ? "d" : "x",
6509 start, start + length - 1);
6510
6511 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6512 void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6513
6514 mon_read_byte = mon_read_byte_jit;
6515 mon_write_byte = mon_write_byte_jit;
6516
6517 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6518 mon(4, arg);
6519
6520 mon_read_byte = old_mon_read_byte;
6521 mon_write_byte = old_mon_write_byte;
6522 #endif
6523 }
6524
6525 static void disasm_native_block(uint8 *start, size_t length)
6526 {
6527 disasm_block(TARGET_NATIVE, start, length);
6528 }
6529
6530 static void disasm_m68k_block(uint8 *start, size_t length)
6531 {
6532 disasm_block(TARGET_M68K, start, length);
6533 }
6534
6535 #ifdef HAVE_GET_WORD_UNSWAPPED
6536 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6537 #else
6538 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6539 #endif
6540
6541 #if JIT_DEBUG
6542 static uae_u8 *last_regs_pc_p = 0;
6543 static uae_u8 *last_compiled_block_addr = 0;
6544
6545 void compiler_dumpstate(void)
6546 {
6547 if (!JITDebug)
6548 return;
6549
6550 write_log("### Host addresses\n");
6551 write_log("MEM_BASE : %x\n", MEMBaseDiff);
6552 write_log("PC_P : %p\n", &regs.pc_p);
6553 write_log("SPCFLAGS : %p\n", &regs.spcflags);
6554 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6555 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6556 write_log("\n");
6557
6558 write_log("### M68k processor state\n");
6559 m68k_dumpstate(0);
6560 write_log("\n");
6561
6562 write_log("### Block in Mac address space\n");
6563 write_log("M68K block : %p\n",
6564 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6565 write_log("Native block : %p (%d bytes)\n",
6566 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6567 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6568 write_log("\n");
6569 }
6570 #endif
6571
6572 static void compile_block(cpu_history* pc_hist, int blocklen)
6573 {
6574 if (letit && compiled_code) {
6575 #if PROFILE_COMPILE_TIME
6576 compile_count++;
6577 clock_t start_time = clock();
6578 #endif
6579 #if JIT_DEBUG
6580 bool disasm_block = false;
6581 #endif
6582
6583 /* OK, here we need to 'compile' a block */
6584 int i;
6585 int r;
6586 int was_comp=0;
6587 uae_u8 liveflags[MAXRUN+1];
6588 #if USE_CHECKSUM_INFO
6589 bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6590 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6591 uintptr min_pcp=max_pcp;
6592 #else
6593 uintptr max_pcp=(uintptr)pc_hist[0].location;
6594 uintptr min_pcp=max_pcp;
6595 #endif
6596 uae_u32 cl=cacheline(pc_hist[0].location);
6597 void* specflags=(void*)&regs.spcflags;
6598 blockinfo* bi=NULL;
6599 blockinfo* bi2;
6600 int extra_len=0;
6601
6602 redo_current_block=0;
6603 if (current_compile_p>=max_compile_start)
6604 flush_icache_hard(7);
6605
6606 alloc_blockinfos();
6607
6608 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6609 bi2=get_blockinfo(cl);
6610
6611 optlev=bi->optlevel;
6612 if (bi->status!=BI_INVALID) {
6613 Dif (bi!=bi2) {
6614 /* I don't think it can happen anymore. Shouldn't, in
6615 any case. So let's make sure... */
6616 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6617 bi->count,bi->optlevel,bi->handler_to_use,
6618 cache_tags[cl].handler);
6619 abort();
6620 }
6621
6622 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6623 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6624 /* What the heck? We are not supposed to be here! */
6625 abort();
6626 }
6627 }
6628 if (bi->count==-1) {
6629 optlev++;
6630 while (!optcount[optlev])
6631 optlev++;
6632 bi->count=optcount[optlev]-1;
6633 }
6634 current_block_pc_p=(uintptr)pc_hist[0].location;
6635
6636 remove_deps(bi); /* We are about to create new code */
6637 bi->optlevel=optlev;
6638 bi->pc_p=(uae_u8*)pc_hist[0].location;
6639 #if USE_CHECKSUM_INFO
6640 free_checksum_info_chain(bi->csi);
6641 bi->csi = NULL;
6642 #endif
6643
6644 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6645 i=blocklen;
6646 while (i--) {
6647 uae_u16* currpcp=pc_hist[i].location;
6648 uae_u32 op=DO_GET_OPCODE(currpcp);
6649
6650 #if USE_CHECKSUM_INFO
6651 trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6652 #if USE_INLINING
6653 if (is_const_jump(op)) {
6654 checksum_info *csi = alloc_checksum_info();
6655 csi->start_p = (uae_u8 *)min_pcp;
6656 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6657 csi->next = bi->csi;
6658 bi->csi = csi;
6659 max_pcp = (uintptr)currpcp;
6660 }
6661 #endif
6662 min_pcp = (uintptr)currpcp;
6663 #else
6664 if ((uintptr)currpcp<min_pcp)
6665 min_pcp=(uintptr)currpcp;
6666 if ((uintptr)currpcp>max_pcp)
6667 max_pcp=(uintptr)currpcp;
6668 #endif
6669
6670 liveflags[i]=((liveflags[i+1]&
6671 (~prop[op].set_flags))|
6672 prop[op].use_flags);
6673 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6674 liveflags[i]&= ~FLAG_Z;
6675 }
6676
6677 #if USE_CHECKSUM_INFO
6678 checksum_info *csi = alloc_checksum_info();
6679 csi->start_p = (uae_u8 *)min_pcp;
6680 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6681 csi->next = bi->csi;
6682 bi->csi = csi;
6683 #endif
6684
6685 bi->needed_flags=liveflags[0];
6686
6687 align_target(align_loops);
6688 was_comp=0;
6689
6690 bi->direct_handler=(cpuop_func *)get_target();
6691 set_dhtu(bi,bi->direct_handler);
6692 bi->status=BI_COMPILING;
6693 current_block_start_target=(uintptr)get_target();
6694
6695 log_startblock();
6696
6697 if (bi->count>=0) { /* Need to generate countdown code */
6698 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6699 raw_sub_l_mi((uintptr)&(bi->count),1);
6700 raw_jl((uintptr)popall_recompile_block);
6701 }
6702 if (optlev==0) { /* No need to actually translate */
6703 /* Execute normally without keeping stats */
6704 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6705 raw_jmp((uintptr)popall_exec_nostats);
6706 }
6707 else {
6708 reg_alloc_run=0;
6709 next_pc_p=0;
6710 taken_pc_p=0;
6711 branch_cc=0;
6712
6713 comp_pc_p=(uae_u8*)pc_hist[0].location;
6714 init_comp();
6715 was_comp=1;
6716
6717 #if JIT_DEBUG
6718 if (JITDebug) {
6719 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6720 raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6721 }
6722 #endif
6723
6724 for (i=0;i<blocklen &&
6725 get_target_noopt()<max_compile_start;i++) {
6726 cpuop_func **cputbl;
6727 compop_func **comptbl;
6728 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6729 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6730 if (!needed_flags) {
6731 cputbl=nfcpufunctbl;
6732 comptbl=nfcompfunctbl;
6733 }
6734 else {
6735 cputbl=cpufunctbl;
6736 comptbl=compfunctbl;
6737 }
6738
6739 failure = 1; // gb-- defaults to failure state
6740 if (comptbl[opcode] && optlev>1) {
6741 failure=0;
6742 if (!was_comp) {
6743 comp_pc_p=(uae_u8*)pc_hist[i].location;
6744 init_comp();
6745 }
6746 was_comp=1;
6747
6748 comptbl[opcode](opcode);
6749 freescratch();
6750 if (!(liveflags[i+1] & FLAG_CZNV)) {
6751 /* We can forget about flags */
6752 dont_care_flags();
6753 }
6754 #if INDIVIDUAL_INST
6755 flush(1);
6756 nop();
6757 flush(1);
6758 was_comp=0;
6759 #endif
6760 }
6761
6762 if (failure) {
6763 if (was_comp) {
6764 flush(1);
6765 was_comp=0;
6766 }
6767 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6768 #if USE_NORMAL_CALLING_CONVENTION
6769 raw_push_l_r(REG_PAR1);
6770 #endif
6771 raw_mov_l_mi((uintptr)&regs.pc_p,
6772 (uintptr)pc_hist[i].location);
6773 raw_call((uintptr)cputbl[opcode]);
6774 #if PROFILE_UNTRANSLATED_INSNS
6775 // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6776 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6777 #endif
6778 #if USE_NORMAL_CALLING_CONVENTION
6779 raw_inc_sp(4);
6780 #endif
6781
6782 if (i < blocklen - 1) {
6783 uae_s8* branchadd;
6784
6785 raw_mov_l_rm(0,(uintptr)specflags);
6786 raw_test_l_rr(0,0);
6787 raw_jz_b_oponly();
6788 branchadd=(uae_s8 *)get_target();
6789 emit_byte(0);
6790 raw_jmp((uintptr)popall_do_nothing);
6791 *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6792 }
6793 }
6794 }
6795 #if 1 /* This isn't completely kosher yet; It really needs to be
6796 be integrated into a general inter-block-dependency scheme */
6797 if (next_pc_p && taken_pc_p &&
6798 was_comp && taken_pc_p==current_block_pc_p) {
6799 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6800 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6801 uae_u8 x=bi1->needed_flags;
6802
6803 if (x==0xff || 1) { /* To be on the safe side */
6804 uae_u16* next=(uae_u16*)next_pc_p;
6805 uae_u32 op=DO_GET_OPCODE(next);
6806
6807 x=0x1f;
6808 x&=(~prop[op].set_flags);
6809 x|=prop[op].use_flags;
6810 }
6811
6812 x|=bi2->needed_flags;
6813 if (!(x & FLAG_CZNV)) {
6814 /* We can forget about flags */
6815 dont_care_flags();
6816 extra_len+=2; /* The next instruction now is part of this
6817 block */
6818 }
6819
6820 }
6821 #endif
6822 log_flush();
6823
6824 if (next_pc_p) { /* A branch was registered */
6825 uintptr t1=next_pc_p;
6826 uintptr t2=taken_pc_p;
6827 int cc=branch_cc;
6828
6829 uae_u32* branchadd;
6830 uae_u32* tba;
6831 bigstate tmp;
6832 blockinfo* tbi;
6833
6834 if (taken_pc_p<next_pc_p) {
6835 /* backward branch. Optimize for the "taken" case ---
6836 which means the raw_jcc should fall through when
6837 the 68k branch is taken. */
6838 t1=taken_pc_p;
6839 t2=next_pc_p;
6840 cc=branch_cc^1;
6841 }
6842
6843 tmp=live; /* ouch! This is big... */
6844 raw_jcc_l_oponly(cc);
6845 branchadd=(uae_u32*)get_target();
6846 emit_long(0);
6847
6848 /* predicted outcome */
6849 tbi=get_blockinfo_addr_new((void*)t1,1);
6850 match_states(tbi);
6851 raw_cmp_l_mi((uintptr)specflags,0);
6852 raw_jcc_l_oponly(4);
6853 tba=(uae_u32*)get_target();
6854 emit_long(get_handler(t1)-((uintptr)tba+4));
6855 raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6856 flush_reg_count();
6857 raw_jmp((uintptr)popall_do_nothing);
6858 create_jmpdep(bi,0,tba,t1);
6859
6860 align_target(align_jumps);
6861 /* not-predicted outcome */
6862 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6863 live=tmp; /* Ouch again */
6864 tbi=get_blockinfo_addr_new((void*)t2,1);
6865 match_states(tbi);
6866
6867 //flush(1); /* Can only get here if was_comp==1 */
6868 raw_cmp_l_mi((uintptr)specflags,0);
6869 raw_jcc_l_oponly(4);
6870 tba=(uae_u32*)get_target();
6871 emit_long(get_handler(t2)-((uintptr)tba+4));
6872 raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6873 flush_reg_count();
6874 raw_jmp((uintptr)popall_do_nothing);
6875 create_jmpdep(bi,1,tba,t2);
6876 }
6877 else
6878 {
6879 if (was_comp) {
6880 flush(1);
6881 }
6882 flush_reg_count();
6883
6884 /* Let's find out where next_handler is... */
6885 if (was_comp && isinreg(PC_P)) {
6886 r=live.state[PC_P].realreg;
6887 raw_and_l_ri(r,TAGMASK);
6888 int r2 = (r==0) ? 1 : 0;
6889 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6890 raw_cmp_l_mi((uintptr)specflags,0);
6891 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6892 raw_jmp_r(r2);
6893 }
6894 else if (was_comp && isconst(PC_P)) {
6895 uae_u32 v=live.state[PC_P].val;
6896 uae_u32* tba;
6897 blockinfo* tbi;
6898
6899 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6900 match_states(tbi);
6901
6902 raw_cmp_l_mi((uintptr)specflags,0);
6903 raw_jcc_l_oponly(4);
6904 tba=(uae_u32*)get_target();
6905 emit_long(get_handler(v)-((uintptr)tba+4));
6906 raw_mov_l_mi((uintptr)&regs.pc_p,v);
6907 raw_jmp((uintptr)popall_do_nothing);
6908 create_jmpdep(bi,0,tba,v);
6909 }
6910 else {
6911 r=REG_PC_TMP;
6912 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6913 raw_and_l_ri(r,TAGMASK);
6914 int r2 = (r==0) ? 1 : 0;
6915 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6916 raw_cmp_l_mi((uintptr)specflags,0);
6917 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6918 raw_jmp_r(r2);
6919 }
6920 }
6921 }
6922
6923 #if USE_MATCH
6924 if (callers_need_recompile(&live,&(bi->env))) {
6925 mark_callers_recompile(bi);
6926 }
6927
6928 big_to_small_state(&live,&(bi->env));
6929 #endif
6930
6931 #if USE_CHECKSUM_INFO
6932 remove_from_list(bi);
6933 if (trace_in_rom) {
6934 // No need to checksum that block trace on cache invalidation
6935 free_checksum_info_chain(bi->csi);
6936 bi->csi = NULL;
6937 add_to_dormant(bi);
6938 }
6939 else {
6940 calc_checksum(bi,&(bi->c1),&(bi->c2));
6941 add_to_active(bi);
6942 }
6943 #else
6944 if (next_pc_p+extra_len>=max_pcp &&
6945 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6946 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6947 else
6948 max_pcp+=LONGEST_68K_INST;
6949
6950 bi->len=max_pcp-min_pcp;
6951 bi->min_pcp=min_pcp;
6952
6953 remove_from_list(bi);
6954 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6955 add_to_dormant(bi); /* No need to checksum it on cache flush.
6956 Please don't start changing ROMs in
6957 flight! */
6958 }
6959 else {
6960 calc_checksum(bi,&(bi->c1),&(bi->c2));
6961 add_to_active(bi);
6962 }
6963 #endif
6964
6965 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6966
6967 #if JIT_DEBUG
6968 if (JITDebug)
6969 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6970
6971 if (JITDebug && disasm_block) {
6972 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6973 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6974 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6975 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6976 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6977 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6978 getchar();
6979 }
6980 #endif
6981
6982 log_dump();
6983 align_target(align_jumps);
6984
6985 /* This is the non-direct handler */
6986 bi->handler=
6987 bi->handler_to_use=(cpuop_func *)get_target();
6988 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6989 raw_jnz((uintptr)popall_cache_miss);
6990 comp_pc_p=(uae_u8*)pc_hist[0].location;
6991
6992 bi->status=BI_FINALIZING;
6993 init_comp();
6994 match_states(bi);
6995 flush(1);
6996
6997 raw_jmp((uintptr)bi->direct_handler);
6998
6999 current_compile_p=get_target();
7000 raise_in_cl_list(bi);
7001
7002 /* We will flush soon, anyway, so let's do it now */
7003 if (current_compile_p>=max_compile_start)
7004 flush_icache_hard(7);
7005
7006 bi->status=BI_ACTIVE;
7007 if (redo_current_block)
7008 block_need_recompile(bi);
7009
7010 #if PROFILE_COMPILE_TIME
7011 compile_time += (clock() - start_time);
7012 #endif
7013 }
7014 }
7015
7016 void do_nothing(void)
7017 {
7018 /* What did you expect this to do? */
7019 }
7020
7021 void exec_nostats(void)
7022 {
7023 for (;;) {
7024 uae_u32 opcode = GET_OPCODE;
7025 (*cpufunctbl[opcode])(opcode);
7026 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7027 return; /* We will deal with the spcflags in the caller */
7028 }
7029 }
7030 }
7031
7032 void execute_normal(void)
7033 {
7034 if (!check_for_cache_miss()) {
7035 cpu_history pc_hist[MAXRUN];
7036 int blocklen = 0;
7037 #if REAL_ADDRESSING || DIRECT_ADDRESSING
7038 start_pc_p = regs.pc_p;
7039 start_pc = get_virtual_address(regs.pc_p);
7040 #else
7041 start_pc_p = regs.pc_oldp;
7042 start_pc = regs.pc;
7043 #endif
7044 for (;;) { /* Take note: This is the do-it-normal loop */
7045 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7046 uae_u32 opcode = GET_OPCODE;
7047 #if FLIGHT_RECORDER
7048 m68k_record_step(m68k_getpc());
7049 #endif
7050 (*cpufunctbl[opcode])(opcode);
7051 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7052 compile_block(pc_hist, blocklen);
7053 return; /* We will deal with the spcflags in the caller */
7054 }
7055 /* No need to check regs.spcflags, because if they were set,
7056 we'd have ended up inside that "if" */
7057 }
7058 }
7059 }
7060
7061 typedef void (*compiled_handler)(void);
7062
7063 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
7064 void (*m68k_compile_execute)(void) = NULL;
7065 #else
7066 void m68k_do_compile_execute(void)
7067 {
7068 for (;;) {
7069 ((compiled_handler)(pushall_call_handler))();
7070 /* Whenever we return from that, we should check spcflags */
7071 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7072 if (m68k_do_specialties ())
7073 return;
7074 }
7075 }
7076 }
7077 #endif