ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.27
Committed: 2004-11-08T23:24:54Z (19 years, 8 months ago) by gbeauche
Branch: MAIN
Changes since 1.26: +2 -2 lines
Log Message:
fix inline dispatcher to really generate a cmove on x86-64 (silly bug!)

File Contents

# Content
1 /*
2 * compiler/compemu_support.cpp - Core dynamic translation engine
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2004
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2004 Christian Bauer
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28 #endif
29
30 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31 #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32 #endif
33
34 /* NOTE: support for AMD64 assumes translation cache and other code
35 * buffers are allocated into a 32-bit address space because (i) B2/JIT
36 * code is not 64-bit clean and (ii) it's faster to resolve branches
37 * that way.
38 */
39 #if !defined(__i386__) && !defined(__x86_64__)
40 #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41 #endif
42
43 #define USE_MATCH 0
44
45 /* kludge for Brian, so he can compile under MSVC++ */
46 #define USE_NORMAL_CALLING_CONVENTION 0
47
48 #ifndef WIN32
49 #include <unistd.h>
50 #include <sys/types.h>
51 #include <sys/mman.h>
52 #endif
53
54 #include <stdlib.h>
55 #include <fcntl.h>
56 #include <errno.h>
57
58 #include "sysdeps.h"
59 #include "cpu_emulation.h"
60 #include "main.h"
61 #include "prefs.h"
62 #include "user_strings.h"
63 #include "vm_alloc.h"
64
65 #include "m68k.h"
66 #include "memory.h"
67 #include "readcpu.h"
68 #include "newcpu.h"
69 #include "comptbl.h"
70 #include "compiler/compemu.h"
71 #include "fpu/fpu.h"
72 #include "fpu/flags.h"
73
74 #define DEBUG 1
75 #include "debug.h"
76
77 #ifdef ENABLE_MON
78 #include "mon.h"
79 #endif
80
81 #ifndef WIN32
82 #define PROFILE_COMPILE_TIME 1
83 #define PROFILE_UNTRANSLATED_INSNS 1
84 #endif
85
86 #ifdef WIN32
87 #undef write_log
88 #define write_log dummy_write_log
89 static void dummy_write_log(const char *, ...) { }
90 #endif
91
92 #if JIT_DEBUG
93 #undef abort
94 #define abort() do { \
95 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
96 exit(EXIT_FAILURE); \
97 } while (0)
98 #endif
99
100 #if PROFILE_COMPILE_TIME
101 #include <time.h>
102 static uae_u32 compile_count = 0;
103 static clock_t compile_time = 0;
104 static clock_t emul_start_time = 0;
105 static clock_t emul_end_time = 0;
106 #endif
107
108 #if PROFILE_UNTRANSLATED_INSNS
109 const int untranslated_top_ten = 20;
110 static uae_u32 raw_cputbl_count[65536] = { 0, };
111 static uae_u16 opcode_nums[65536];
112
113 static int untranslated_compfn(const void *e1, const void *e2)
114 {
115 return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
116 }
117 #endif
118
119 static compop_func *compfunctbl[65536];
120 static compop_func *nfcompfunctbl[65536];
121 static cpuop_func *nfcpufunctbl[65536];
122 uae_u8* comp_pc_p;
123
124 // From main_unix.cpp
125 extern bool ThirtyThreeBitAddressing;
126
127 // From newcpu.cpp
128 extern bool quit_program;
129
130 // gb-- Extra data for Basilisk II/JIT
131 #if JIT_DEBUG
132 static bool JITDebug = false; // Enable runtime disassemblers through mon?
133 #else
134 const bool JITDebug = false; // Don't use JIT debug mode at all
135 #endif
136
137 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
138 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
139 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
140 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
141 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
142 static bool have_cmov = false; // target has CMOV instructions ?
143 static bool have_rat_stall = true; // target has partial register stalls ?
144 const bool tune_alignment = true; // Tune code alignments for running CPU ?
145 const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
146 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
147 static int align_loops = 32; // Align the start of loops
148 static int align_jumps = 32; // Align the start of jumps
149 static int optcount[10] = {
150 10, // How often a block has to be executed before it is translated
151 0, // How often to use naive translation
152 0, 0, 0, 0,
153 -1, -1, -1, -1
154 };
155
156 struct op_properties {
157 uae_u8 use_flags;
158 uae_u8 set_flags;
159 uae_u8 is_addx;
160 uae_u8 cflow;
161 };
162 static op_properties prop[65536];
163
164 static inline int end_block(uae_u32 opcode)
165 {
166 return (prop[opcode].cflow & fl_end_block);
167 }
168
169 static inline bool is_const_jump(uae_u32 opcode)
170 {
171 return (prop[opcode].cflow == fl_const_jump);
172 }
173
174 static inline bool may_trap(uae_u32 opcode)
175 {
176 return (prop[opcode].cflow & fl_trap);
177 }
178
179 static inline unsigned int cft_map (unsigned int f)
180 {
181 #ifndef HAVE_GET_WORD_UNSWAPPED
182 return f;
183 #else
184 return ((f >> 8) & 255) | ((f & 255) << 8);
185 #endif
186 }
187
188 uae_u8* start_pc_p;
189 uae_u32 start_pc;
190 uae_u32 current_block_pc_p;
191 static uintptr current_block_start_target;
192 uae_u32 needed_flags;
193 static uintptr next_pc_p;
194 static uintptr taken_pc_p;
195 static int branch_cc;
196 static int redo_current_block;
197
198 int segvcount=0;
199 int soft_flush_count=0;
200 int hard_flush_count=0;
201 int checksum_count=0;
202 static uae_u8* current_compile_p=NULL;
203 static uae_u8* max_compile_start;
204 static uae_u8* compiled_code=NULL;
205 static uae_s32 reg_alloc_run;
206 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
207 static uae_u8* popallspace=NULL;
208
209 void* pushall_call_handler=NULL;
210 static void* popall_do_nothing=NULL;
211 static void* popall_exec_nostats=NULL;
212 static void* popall_execute_normal=NULL;
213 static void* popall_cache_miss=NULL;
214 static void* popall_recompile_block=NULL;
215 static void* popall_check_checksum=NULL;
216
217 /* The 68k only ever executes from even addresses. So right now, we
218 * waste half the entries in this array
219 * UPDATE: We now use those entries to store the start of the linked
220 * lists that we maintain for each hash result.
221 */
222 cacheline cache_tags[TAGSIZE];
223 int letit=0;
224 blockinfo* hold_bi[MAX_HOLD_BI];
225 blockinfo* active;
226 blockinfo* dormant;
227
228 /* 68040 */
229 extern struct cputbl op_smalltbl_0_nf[];
230 extern struct comptbl op_smalltbl_0_comp_nf[];
231 extern struct comptbl op_smalltbl_0_comp_ff[];
232
233 /* 68020 + 68881 */
234 extern struct cputbl op_smalltbl_1_nf[];
235
236 /* 68020 */
237 extern struct cputbl op_smalltbl_2_nf[];
238
239 /* 68010 */
240 extern struct cputbl op_smalltbl_3_nf[];
241
242 /* 68000 */
243 extern struct cputbl op_smalltbl_4_nf[];
244
245 /* 68000 slow but compatible. */
246 extern struct cputbl op_smalltbl_5_nf[];
247
248 static void flush_icache_hard(int n);
249 static void flush_icache_lazy(int n);
250 static void flush_icache_none(int n);
251 void (*flush_icache)(int n) = flush_icache_none;
252
253
254
255 bigstate live;
256 smallstate empty_ss;
257 smallstate default_ss;
258 static int optlev;
259
260 static int writereg(int r, int size);
261 static void unlock2(int r);
262 static void setlock(int r);
263 static int readreg_specific(int r, int size, int spec);
264 static int writereg_specific(int r, int size, int spec);
265 static void prepare_for_call_1(void);
266 static void prepare_for_call_2(void);
267 static void align_target(uae_u32 a);
268
269 static uae_s32 nextused[VREGS];
270
271 uae_u32 m68k_pc_offset;
272
273 /* Some arithmetic ooperations can be optimized away if the operands
274 * are known to be constant. But that's only a good idea when the
275 * side effects they would have on the flags are not important. This
276 * variable indicates whether we need the side effects or not
277 */
278 uae_u32 needflags=0;
279
280 /* Flag handling is complicated.
281 *
282 * x86 instructions create flags, which quite often are exactly what we
283 * want. So at times, the "68k" flags are actually in the x86 flags.
284 *
285 * Then again, sometimes we do x86 instructions that clobber the x86
286 * flags, but don't represent a corresponding m68k instruction. In that
287 * case, we have to save them.
288 *
289 * We used to save them to the stack, but now store them back directly
290 * into the regflags.cznv of the traditional emulation. Thus some odd
291 * names.
292 *
293 * So flags can be in either of two places (used to be three; boy were
294 * things complicated back then!); And either place can contain either
295 * valid flags or invalid trash (and on the stack, there was also the
296 * option of "nothing at all", now gone). A couple of variables keep
297 * track of the respective states.
298 *
299 * To make things worse, we might or might not be interested in the flags.
300 * by default, we are, but a call to dont_care_flags can change that
301 * until the next call to live_flags. If we are not, pretty much whatever
302 * is in the register and/or the native flags is seen as valid.
303 */
304
305 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
306 {
307 return cache_tags[cl+1].bi;
308 }
309
310 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
311 {
312 blockinfo* bi=get_blockinfo(cacheline(addr));
313
314 while (bi) {
315 if (bi->pc_p==addr)
316 return bi;
317 bi=bi->next_same_cl;
318 }
319 return NULL;
320 }
321
322
323 /*******************************************************************
324 * All sorts of list related functions for all of the lists *
325 *******************************************************************/
326
327 static __inline__ void remove_from_cl_list(blockinfo* bi)
328 {
329 uae_u32 cl=cacheline(bi->pc_p);
330
331 if (bi->prev_same_cl_p)
332 *(bi->prev_same_cl_p)=bi->next_same_cl;
333 if (bi->next_same_cl)
334 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
335 if (cache_tags[cl+1].bi)
336 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
337 else
338 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
339 }
340
341 static __inline__ void remove_from_list(blockinfo* bi)
342 {
343 if (bi->prev_p)
344 *(bi->prev_p)=bi->next;
345 if (bi->next)
346 bi->next->prev_p=bi->prev_p;
347 }
348
349 static __inline__ void remove_from_lists(blockinfo* bi)
350 {
351 remove_from_list(bi);
352 remove_from_cl_list(bi);
353 }
354
355 static __inline__ void add_to_cl_list(blockinfo* bi)
356 {
357 uae_u32 cl=cacheline(bi->pc_p);
358
359 if (cache_tags[cl+1].bi)
360 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
361 bi->next_same_cl=cache_tags[cl+1].bi;
362
363 cache_tags[cl+1].bi=bi;
364 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
365
366 cache_tags[cl].handler=bi->handler_to_use;
367 }
368
369 static __inline__ void raise_in_cl_list(blockinfo* bi)
370 {
371 remove_from_cl_list(bi);
372 add_to_cl_list(bi);
373 }
374
375 static __inline__ void add_to_active(blockinfo* bi)
376 {
377 if (active)
378 active->prev_p=&(bi->next);
379 bi->next=active;
380
381 active=bi;
382 bi->prev_p=&active;
383 }
384
385 static __inline__ void add_to_dormant(blockinfo* bi)
386 {
387 if (dormant)
388 dormant->prev_p=&(bi->next);
389 bi->next=dormant;
390
391 dormant=bi;
392 bi->prev_p=&dormant;
393 }
394
395 static __inline__ void remove_dep(dependency* d)
396 {
397 if (d->prev_p)
398 *(d->prev_p)=d->next;
399 if (d->next)
400 d->next->prev_p=d->prev_p;
401 d->prev_p=NULL;
402 d->next=NULL;
403 }
404
405 /* This block's code is about to be thrown away, so it no longer
406 depends on anything else */
407 static __inline__ void remove_deps(blockinfo* bi)
408 {
409 remove_dep(&(bi->dep[0]));
410 remove_dep(&(bi->dep[1]));
411 }
412
413 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
414 {
415 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
416 }
417
418 /********************************************************************
419 * Soft flush handling support functions *
420 ********************************************************************/
421
422 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
423 {
424 //write_log("bi is %p\n",bi);
425 if (dh!=bi->direct_handler_to_use) {
426 dependency* x=bi->deplist;
427 //write_log("bi->deplist=%p\n",bi->deplist);
428 while (x) {
429 //write_log("x is %p\n",x);
430 //write_log("x->next is %p\n",x->next);
431 //write_log("x->prev_p is %p\n",x->prev_p);
432
433 if (x->jmp_off) {
434 adjust_jmpdep(x,dh);
435 }
436 x=x->next;
437 }
438 bi->direct_handler_to_use=dh;
439 }
440 }
441
442 static __inline__ void invalidate_block(blockinfo* bi)
443 {
444 int i;
445
446 bi->optlevel=0;
447 bi->count=optcount[0]-1;
448 bi->handler=NULL;
449 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
450 bi->direct_handler=NULL;
451 set_dhtu(bi,bi->direct_pen);
452 bi->needed_flags=0xff;
453 bi->status=BI_INVALID;
454 for (i=0;i<2;i++) {
455 bi->dep[i].jmp_off=NULL;
456 bi->dep[i].target=NULL;
457 }
458 remove_deps(bi);
459 }
460
461 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
462 {
463 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
464
465 Dif(!tbi) {
466 write_log("Could not create jmpdep!\n");
467 abort();
468 }
469 bi->dep[i].jmp_off=jmpaddr;
470 bi->dep[i].source=bi;
471 bi->dep[i].target=tbi;
472 bi->dep[i].next=tbi->deplist;
473 if (bi->dep[i].next)
474 bi->dep[i].next->prev_p=&(bi->dep[i].next);
475 bi->dep[i].prev_p=&(tbi->deplist);
476 tbi->deplist=&(bi->dep[i]);
477 }
478
479 static __inline__ void block_need_recompile(blockinfo * bi)
480 {
481 uae_u32 cl = cacheline(bi->pc_p);
482
483 set_dhtu(bi, bi->direct_pen);
484 bi->direct_handler = bi->direct_pen;
485
486 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
487 bi->handler = (cpuop_func *)popall_execute_normal;
488 if (bi == cache_tags[cl + 1].bi)
489 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
490 bi->status = BI_NEED_RECOMP;
491 }
492
493 static __inline__ void mark_callers_recompile(blockinfo * bi)
494 {
495 dependency *x = bi->deplist;
496
497 while (x) {
498 dependency *next = x->next; /* This disappears when we mark for
499 * recompilation and thus remove the
500 * blocks from the lists */
501 if (x->jmp_off) {
502 blockinfo *cbi = x->source;
503
504 Dif(cbi->status == BI_INVALID) {
505 // write_log("invalid block in dependency list\n"); // FIXME?
506 // abort();
507 }
508 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
509 block_need_recompile(cbi);
510 mark_callers_recompile(cbi);
511 }
512 else if (cbi->status == BI_COMPILING) {
513 redo_current_block = 1;
514 }
515 else if (cbi->status == BI_NEED_RECOMP) {
516 /* nothing */
517 }
518 else {
519 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
520 }
521 }
522 x = next;
523 }
524 }
525
526 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
527 {
528 blockinfo* bi=get_blockinfo_addr(addr);
529 int i;
530
531 if (!bi) {
532 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
533 if (hold_bi[i]) {
534 uae_u32 cl=cacheline(addr);
535
536 bi=hold_bi[i];
537 hold_bi[i]=NULL;
538 bi->pc_p=(uae_u8 *)addr;
539 invalidate_block(bi);
540 add_to_active(bi);
541 add_to_cl_list(bi);
542
543 }
544 }
545 }
546 if (!bi) {
547 write_log("Looking for blockinfo, can't find free one\n");
548 abort();
549 }
550 return bi;
551 }
552
553 static void prepare_block(blockinfo* bi);
554
555 /* Managment of blockinfos.
556
557 A blockinfo struct is allocated whenever a new block has to be
558 compiled. If the list of free blockinfos is empty, we allocate a new
559 pool of blockinfos and link the newly created blockinfos altogether
560 into the list of free blockinfos. Otherwise, we simply pop a structure
561 off the free list.
562
563 Blockinfo are lazily deallocated, i.e. chained altogether in the
564 list of free blockinfos whenvever a translation cache flush (hard or
565 soft) request occurs.
566 */
567
568 template< class T >
569 class LazyBlockAllocator
570 {
571 enum {
572 kPoolSize = 1 + 4096 / sizeof(T)
573 };
574 struct Pool {
575 T chunk[kPoolSize];
576 Pool * next;
577 };
578 Pool * mPools;
579 T * mChunks;
580 public:
581 LazyBlockAllocator() : mPools(0), mChunks(0) { }
582 ~LazyBlockAllocator();
583 T * acquire();
584 void release(T * const);
585 };
586
587 template< class T >
588 LazyBlockAllocator<T>::~LazyBlockAllocator()
589 {
590 Pool * currentPool = mPools;
591 while (currentPool) {
592 Pool * deadPool = currentPool;
593 currentPool = currentPool->next;
594 free(deadPool);
595 }
596 }
597
598 template< class T >
599 T * LazyBlockAllocator<T>::acquire()
600 {
601 if (!mChunks) {
602 // There is no chunk left, allocate a new pool and link the
603 // chunks into the free list
604 Pool * newPool = (Pool *)malloc(sizeof(Pool));
605 for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
606 chunk->next = mChunks;
607 mChunks = chunk;
608 }
609 newPool->next = mPools;
610 mPools = newPool;
611 }
612 T * chunk = mChunks;
613 mChunks = chunk->next;
614 return chunk;
615 }
616
617 template< class T >
618 void LazyBlockAllocator<T>::release(T * const chunk)
619 {
620 chunk->next = mChunks;
621 mChunks = chunk;
622 }
623
624 template< class T >
625 class HardBlockAllocator
626 {
627 public:
628 T * acquire() {
629 T * data = (T *)current_compile_p;
630 current_compile_p += sizeof(T);
631 return data;
632 }
633
634 void release(T * const chunk) {
635 // Deallocated on invalidation
636 }
637 };
638
639 #if USE_SEPARATE_BIA
640 static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
641 static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
642 #else
643 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
644 static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
645 #endif
646
647 static __inline__ checksum_info *alloc_checksum_info(void)
648 {
649 checksum_info *csi = ChecksumInfoAllocator.acquire();
650 csi->next = NULL;
651 return csi;
652 }
653
654 static __inline__ void free_checksum_info(checksum_info *csi)
655 {
656 csi->next = NULL;
657 ChecksumInfoAllocator.release(csi);
658 }
659
660 static __inline__ void free_checksum_info_chain(checksum_info *csi)
661 {
662 while (csi != NULL) {
663 checksum_info *csi2 = csi->next;
664 free_checksum_info(csi);
665 csi = csi2;
666 }
667 }
668
669 static __inline__ blockinfo *alloc_blockinfo(void)
670 {
671 blockinfo *bi = BlockInfoAllocator.acquire();
672 #if USE_CHECKSUM_INFO
673 bi->csi = NULL;
674 #endif
675 return bi;
676 }
677
678 static __inline__ void free_blockinfo(blockinfo *bi)
679 {
680 #if USE_CHECKSUM_INFO
681 free_checksum_info_chain(bi->csi);
682 bi->csi = NULL;
683 #endif
684 BlockInfoAllocator.release(bi);
685 }
686
687 static __inline__ void alloc_blockinfos(void)
688 {
689 int i;
690 blockinfo* bi;
691
692 for (i=0;i<MAX_HOLD_BI;i++) {
693 if (hold_bi[i])
694 return;
695 bi=hold_bi[i]=alloc_blockinfo();
696 prepare_block(bi);
697 }
698 }
699
700 /********************************************************************
701 * Functions to emit data into memory, and other general support *
702 ********************************************************************/
703
704 static uae_u8* target;
705
706 static void emit_init(void)
707 {
708 }
709
710 static __inline__ void emit_byte(uae_u8 x)
711 {
712 *target++=x;
713 }
714
715 static __inline__ void emit_word(uae_u16 x)
716 {
717 *((uae_u16*)target)=x;
718 target+=2;
719 }
720
721 static __inline__ void emit_long(uae_u32 x)
722 {
723 *((uae_u32*)target)=x;
724 target+=4;
725 }
726
727 static __inline__ void emit_quad(uae_u64 x)
728 {
729 *((uae_u64*)target)=x;
730 target+=8;
731 }
732
733 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
734 {
735 memcpy((uae_u8 *)target,block,blocklen);
736 target+=blocklen;
737 }
738
739 static __inline__ uae_u32 reverse32(uae_u32 v)
740 {
741 #if 1
742 // gb-- We have specialized byteswapping functions, just use them
743 return do_byteswap_32(v);
744 #else
745 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
746 #endif
747 }
748
749 /********************************************************************
750 * Getting the information about the target CPU *
751 ********************************************************************/
752
753 #include "codegen_x86.cpp"
754
755 void set_target(uae_u8* t)
756 {
757 target=t;
758 }
759
760 static __inline__ uae_u8* get_target_noopt(void)
761 {
762 return target;
763 }
764
765 __inline__ uae_u8* get_target(void)
766 {
767 return get_target_noopt();
768 }
769
770
771 /********************************************************************
772 * Flags status handling. EMIT TIME! *
773 ********************************************************************/
774
775 static void bt_l_ri_noclobber(R4 r, IMM i);
776
777 static void make_flags_live_internal(void)
778 {
779 if (live.flags_in_flags==VALID)
780 return;
781 Dif (live.flags_on_stack==TRASH) {
782 write_log("Want flags, got something on stack, but it is TRASH\n");
783 abort();
784 }
785 if (live.flags_on_stack==VALID) {
786 int tmp;
787 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
788 raw_reg_to_flags(tmp);
789 unlock2(tmp);
790
791 live.flags_in_flags=VALID;
792 return;
793 }
794 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
795 live.flags_in_flags,live.flags_on_stack);
796 abort();
797 }
798
799 static void flags_to_stack(void)
800 {
801 if (live.flags_on_stack==VALID)
802 return;
803 if (!live.flags_are_important) {
804 live.flags_on_stack=VALID;
805 return;
806 }
807 Dif (live.flags_in_flags!=VALID)
808 abort();
809 else {
810 int tmp;
811 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
812 raw_flags_to_reg(tmp);
813 unlock2(tmp);
814 }
815 live.flags_on_stack=VALID;
816 }
817
818 static __inline__ void clobber_flags(void)
819 {
820 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
821 flags_to_stack();
822 live.flags_in_flags=TRASH;
823 }
824
825 /* Prepare for leaving the compiled stuff */
826 static __inline__ void flush_flags(void)
827 {
828 flags_to_stack();
829 return;
830 }
831
832 int touchcnt;
833
834 /********************************************************************
835 * Partial register flushing for optimized calls *
836 ********************************************************************/
837
838 struct regusage {
839 uae_u16 rmask;
840 uae_u16 wmask;
841 };
842
843 static inline void ru_set(uae_u16 *mask, int reg)
844 {
845 #if USE_OPTIMIZED_CALLS
846 *mask |= 1 << reg;
847 #endif
848 }
849
850 static inline bool ru_get(const uae_u16 *mask, int reg)
851 {
852 #if USE_OPTIMIZED_CALLS
853 return (*mask & (1 << reg));
854 #else
855 /* Default: instruction reads & write to register */
856 return true;
857 #endif
858 }
859
860 static inline void ru_set_read(regusage *ru, int reg)
861 {
862 ru_set(&ru->rmask, reg);
863 }
864
865 static inline void ru_set_write(regusage *ru, int reg)
866 {
867 ru_set(&ru->wmask, reg);
868 }
869
870 static inline bool ru_read_p(const regusage *ru, int reg)
871 {
872 return ru_get(&ru->rmask, reg);
873 }
874
875 static inline bool ru_write_p(const regusage *ru, int reg)
876 {
877 return ru_get(&ru->wmask, reg);
878 }
879
880 static void ru_fill_ea(regusage *ru, int reg, amodes mode,
881 wordsizes size, int write_mode)
882 {
883 switch (mode) {
884 case Areg:
885 reg += 8;
886 /* fall through */
887 case Dreg:
888 ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
889 break;
890 case Ad16:
891 /* skip displacment */
892 m68k_pc_offset += 2;
893 case Aind:
894 case Aipi:
895 case Apdi:
896 ru_set_read(ru, reg+8);
897 break;
898 case Ad8r:
899 ru_set_read(ru, reg+8);
900 /* fall through */
901 case PC8r: {
902 uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
903 reg = (dp >> 12) & 15;
904 ru_set_read(ru, reg);
905 if (dp & 0x100)
906 m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
907 break;
908 }
909 case PC16:
910 case absw:
911 case imm0:
912 case imm1:
913 m68k_pc_offset += 2;
914 break;
915 case absl:
916 case imm2:
917 m68k_pc_offset += 4;
918 break;
919 case immi:
920 m68k_pc_offset += (size == sz_long) ? 4 : 2;
921 break;
922 }
923 }
924
925 /* TODO: split into a static initialization part and a dynamic one
926 (instructions depending on extension words) */
927 static void ru_fill(regusage *ru, uae_u32 opcode)
928 {
929 m68k_pc_offset += 2;
930
931 /* Default: no register is used or written to */
932 ru->rmask = 0;
933 ru->wmask = 0;
934
935 uae_u32 real_opcode = cft_map(opcode);
936 struct instr *dp = &table68k[real_opcode];
937
938 bool rw_dest = true;
939 bool handled = false;
940
941 /* Handle some instructions specifically */
942 uae_u16 reg, ext;
943 switch (dp->mnemo) {
944 case i_BFCHG:
945 case i_BFCLR:
946 case i_BFEXTS:
947 case i_BFEXTU:
948 case i_BFFFO:
949 case i_BFINS:
950 case i_BFSET:
951 case i_BFTST:
952 ext = comp_get_iword((m68k_pc_offset+=2)-2);
953 if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
954 if (ext & 0x020) ru_set_read(ru, ext & 7);
955 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
956 if (dp->dmode == Dreg)
957 ru_set_read(ru, dp->dreg);
958 switch (dp->mnemo) {
959 case i_BFEXTS:
960 case i_BFEXTU:
961 case i_BFFFO:
962 ru_set_write(ru, (ext >> 12) & 7);
963 break;
964 case i_BFINS:
965 ru_set_read(ru, (ext >> 12) & 7);
966 /* fall through */
967 case i_BFCHG:
968 case i_BFCLR:
969 case i_BSET:
970 if (dp->dmode == Dreg)
971 ru_set_write(ru, dp->dreg);
972 break;
973 }
974 handled = true;
975 rw_dest = false;
976 break;
977
978 case i_BTST:
979 rw_dest = false;
980 break;
981
982 case i_CAS:
983 {
984 ext = comp_get_iword((m68k_pc_offset+=2)-2);
985 int Du = ext & 7;
986 ru_set_read(ru, Du);
987 int Dc = (ext >> 6) & 7;
988 ru_set_read(ru, Dc);
989 ru_set_write(ru, Dc);
990 break;
991 }
992 case i_CAS2:
993 {
994 int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
995 ext = comp_get_iword((m68k_pc_offset+=2)-2);
996 Rn1 = (ext >> 12) & 15;
997 Du1 = (ext >> 6) & 7;
998 Dc1 = ext & 7;
999 ru_set_read(ru, Rn1);
1000 ru_set_read(ru, Du1);
1001 ru_set_read(ru, Dc1);
1002 ru_set_write(ru, Dc1);
1003 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1004 Rn2 = (ext >> 12) & 15;
1005 Du2 = (ext >> 6) & 7;
1006 Dc2 = ext & 7;
1007 ru_set_read(ru, Rn2);
1008 ru_set_read(ru, Du2);
1009 ru_set_write(ru, Dc2);
1010 break;
1011 }
1012 case i_DIVL: case i_MULL:
1013 m68k_pc_offset += 2;
1014 break;
1015 case i_LEA:
1016 case i_MOVE: case i_MOVEA: case i_MOVE16:
1017 rw_dest = false;
1018 break;
1019 case i_PACK: case i_UNPK:
1020 rw_dest = false;
1021 m68k_pc_offset += 2;
1022 break;
1023 case i_TRAPcc:
1024 m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1025 break;
1026 case i_RTR:
1027 /* do nothing, just for coverage debugging */
1028 break;
1029 /* TODO: handle EXG instruction */
1030 }
1031
1032 /* Handle A-Traps better */
1033 if ((real_opcode & 0xf000) == 0xa000) {
1034 handled = true;
1035 }
1036
1037 /* Handle EmulOps better */
1038 if ((real_opcode & 0xff00) == 0x7100) {
1039 handled = true;
1040 ru->rmask = 0xffff;
1041 ru->wmask = 0;
1042 }
1043
1044 if (dp->suse && !handled)
1045 ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1046
1047 if (dp->duse && !handled)
1048 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1049
1050 if (rw_dest)
1051 ru->rmask |= ru->wmask;
1052
1053 handled = handled || dp->suse || dp->duse;
1054
1055 /* Mark all registers as used/written if the instruction may trap */
1056 if (may_trap(opcode)) {
1057 handled = true;
1058 ru->rmask = 0xffff;
1059 ru->wmask = 0xffff;
1060 }
1061
1062 if (!handled) {
1063 write_log("ru_fill: %04x = { %04x, %04x }\n",
1064 real_opcode, ru->rmask, ru->wmask);
1065 abort();
1066 }
1067 }
1068
1069 /********************************************************************
1070 * register allocation per block logging *
1071 ********************************************************************/
1072
1073 static uae_s8 vstate[VREGS];
1074 static uae_s8 vwritten[VREGS];
1075 static uae_s8 nstate[N_REGS];
1076
1077 #define L_UNKNOWN -127
1078 #define L_UNAVAIL -1
1079 #define L_NEEDED -2
1080 #define L_UNNEEDED -3
1081
1082 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1083 {
1084 int i;
1085
1086 for (i = 0; i < VREGS; i++)
1087 s->virt[i] = vstate[i];
1088 for (i = 0; i < N_REGS; i++)
1089 s->nat[i] = nstate[i];
1090 }
1091
1092 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1093 {
1094 int i;
1095 int reverse = 0;
1096
1097 for (i = 0; i < VREGS; i++) {
1098 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1099 return 1;
1100 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1101 reverse++;
1102 }
1103 for (i = 0; i < N_REGS; i++) {
1104 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1105 return 1;
1106 if (nstate[i] < 0 && s->nat[i] >= 0)
1107 reverse++;
1108 }
1109 if (reverse >= 2 && USE_MATCH)
1110 return 1; /* In this case, it might be worth recompiling the
1111 * callers */
1112 return 0;
1113 }
1114
1115 static __inline__ void log_startblock(void)
1116 {
1117 int i;
1118
1119 for (i = 0; i < VREGS; i++) {
1120 vstate[i] = L_UNKNOWN;
1121 vwritten[i] = 0;
1122 }
1123 for (i = 0; i < N_REGS; i++)
1124 nstate[i] = L_UNKNOWN;
1125 }
1126
1127 /* Using an n-reg for a temp variable */
1128 static __inline__ void log_isused(int n)
1129 {
1130 if (nstate[n] == L_UNKNOWN)
1131 nstate[n] = L_UNAVAIL;
1132 }
1133
1134 static __inline__ void log_visused(int r)
1135 {
1136 if (vstate[r] == L_UNKNOWN)
1137 vstate[r] = L_NEEDED;
1138 }
1139
1140 static __inline__ void do_load_reg(int n, int r)
1141 {
1142 if (r == FLAGTMP)
1143 raw_load_flagreg(n, r);
1144 else if (r == FLAGX)
1145 raw_load_flagx(n, r);
1146 else
1147 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1148 }
1149
1150 static __inline__ void check_load_reg(int n, int r)
1151 {
1152 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1153 }
1154
1155 static __inline__ void log_vwrite(int r)
1156 {
1157 vwritten[r] = 1;
1158 }
1159
1160 /* Using an n-reg to hold a v-reg */
1161 static __inline__ void log_isreg(int n, int r)
1162 {
1163 static int count = 0;
1164
1165 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1166 nstate[n] = r;
1167 else {
1168 do_load_reg(n, r);
1169 if (nstate[n] == L_UNKNOWN)
1170 nstate[n] = L_UNAVAIL;
1171 }
1172 if (vstate[r] == L_UNKNOWN)
1173 vstate[r] = L_NEEDED;
1174 }
1175
1176 static __inline__ void log_clobberreg(int r)
1177 {
1178 if (vstate[r] == L_UNKNOWN)
1179 vstate[r] = L_UNNEEDED;
1180 }
1181
1182 /* This ends all possibility of clever register allocation */
1183
1184 static __inline__ void log_flush(void)
1185 {
1186 int i;
1187
1188 for (i = 0; i < VREGS; i++)
1189 if (vstate[i] == L_UNKNOWN)
1190 vstate[i] = L_NEEDED;
1191 for (i = 0; i < N_REGS; i++)
1192 if (nstate[i] == L_UNKNOWN)
1193 nstate[i] = L_UNAVAIL;
1194 }
1195
1196 static __inline__ void log_dump(void)
1197 {
1198 int i;
1199
1200 return;
1201
1202 write_log("----------------------\n");
1203 for (i = 0; i < N_REGS; i++) {
1204 switch (nstate[i]) {
1205 case L_UNKNOWN:
1206 write_log("Nat %d : UNKNOWN\n", i);
1207 break;
1208 case L_UNAVAIL:
1209 write_log("Nat %d : UNAVAIL\n", i);
1210 break;
1211 default:
1212 write_log("Nat %d : %d\n", i, nstate[i]);
1213 break;
1214 }
1215 }
1216 for (i = 0; i < VREGS; i++) {
1217 if (vstate[i] == L_UNNEEDED)
1218 write_log("Virt %d: UNNEEDED\n", i);
1219 }
1220 }
1221
1222 /********************************************************************
1223 * register status handling. EMIT TIME! *
1224 ********************************************************************/
1225
1226 static __inline__ void set_status(int r, int status)
1227 {
1228 if (status == ISCONST)
1229 log_clobberreg(r);
1230 live.state[r].status=status;
1231 }
1232
1233 static __inline__ int isinreg(int r)
1234 {
1235 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1236 }
1237
1238 static __inline__ void adjust_nreg(int r, uae_u32 val)
1239 {
1240 if (!val)
1241 return;
1242 raw_lea_l_brr(r,r,val);
1243 }
1244
1245 static void tomem(int r)
1246 {
1247 int rr=live.state[r].realreg;
1248
1249 if (isinreg(r)) {
1250 if (live.state[r].val && live.nat[rr].nholds==1
1251 && !live.nat[rr].locked) {
1252 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1253 // live.state[r].val,r,rr,target);
1254 adjust_nreg(rr,live.state[r].val);
1255 live.state[r].val=0;
1256 live.state[r].dirtysize=4;
1257 set_status(r,DIRTY);
1258 }
1259 }
1260
1261 if (live.state[r].status==DIRTY) {
1262 switch (live.state[r].dirtysize) {
1263 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1264 case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1265 case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1266 default: abort();
1267 }
1268 log_vwrite(r);
1269 set_status(r,CLEAN);
1270 live.state[r].dirtysize=0;
1271 }
1272 }
1273
1274 static __inline__ int isconst(int r)
1275 {
1276 return live.state[r].status==ISCONST;
1277 }
1278
1279 int is_const(int r)
1280 {
1281 return isconst(r);
1282 }
1283
1284 static __inline__ void writeback_const(int r)
1285 {
1286 if (!isconst(r))
1287 return;
1288 Dif (live.state[r].needflush==NF_HANDLER) {
1289 write_log("Trying to write back constant NF_HANDLER!\n");
1290 abort();
1291 }
1292
1293 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1294 log_vwrite(r);
1295 live.state[r].val=0;
1296 set_status(r,INMEM);
1297 }
1298
1299 static __inline__ void tomem_c(int r)
1300 {
1301 if (isconst(r)) {
1302 writeback_const(r);
1303 }
1304 else
1305 tomem(r);
1306 }
1307
1308 static void evict(int r)
1309 {
1310 int rr;
1311
1312 if (!isinreg(r))
1313 return;
1314 tomem(r);
1315 rr=live.state[r].realreg;
1316
1317 Dif (live.nat[rr].locked &&
1318 live.nat[rr].nholds==1) {
1319 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1320 abort();
1321 }
1322
1323 live.nat[rr].nholds--;
1324 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1325 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1326 int thisind=live.state[r].realind;
1327
1328 live.nat[rr].holds[thisind]=topreg;
1329 live.state[topreg].realind=thisind;
1330 }
1331 live.state[r].realreg=-1;
1332 set_status(r,INMEM);
1333 }
1334
1335 static __inline__ void free_nreg(int r)
1336 {
1337 int i=live.nat[r].nholds;
1338
1339 while (i) {
1340 int vr;
1341
1342 --i;
1343 vr=live.nat[r].holds[i];
1344 evict(vr);
1345 }
1346 Dif (live.nat[r].nholds!=0) {
1347 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1348 abort();
1349 }
1350 }
1351
1352 /* Use with care! */
1353 static __inline__ void isclean(int r)
1354 {
1355 if (!isinreg(r))
1356 return;
1357 live.state[r].validsize=4;
1358 live.state[r].dirtysize=0;
1359 live.state[r].val=0;
1360 set_status(r,CLEAN);
1361 }
1362
1363 static __inline__ void disassociate(int r)
1364 {
1365 isclean(r);
1366 evict(r);
1367 }
1368
1369 static __inline__ void set_const(int r, uae_u32 val)
1370 {
1371 disassociate(r);
1372 live.state[r].val=val;
1373 set_status(r,ISCONST);
1374 }
1375
1376 static __inline__ uae_u32 get_offset(int r)
1377 {
1378 return live.state[r].val;
1379 }
1380
1381 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1382 {
1383 int bestreg;
1384 uae_s32 when;
1385 int i;
1386 uae_s32 badness=0; /* to shut up gcc */
1387 bestreg=-1;
1388 when=2000000000;
1389
1390 for (i=N_REGS;i--;) {
1391 badness=live.nat[i].touched;
1392 if (live.nat[i].nholds==0)
1393 badness=0;
1394 if (i==hint)
1395 badness-=200000000;
1396 if (!live.nat[i].locked && badness<when) {
1397 if ((size==1 && live.nat[i].canbyte) ||
1398 (size==2 && live.nat[i].canword) ||
1399 (size==4)) {
1400 bestreg=i;
1401 when=badness;
1402 if (live.nat[i].nholds==0 && hint<0)
1403 break;
1404 if (i==hint)
1405 break;
1406 }
1407 }
1408 }
1409 Dif (bestreg==-1)
1410 abort();
1411
1412 if (live.nat[bestreg].nholds>0) {
1413 free_nreg(bestreg);
1414 }
1415 if (isinreg(r)) {
1416 int rr=live.state[r].realreg;
1417 /* This will happen if we read a partially dirty register at a
1418 bigger size */
1419 Dif (willclobber || live.state[r].validsize>=size)
1420 abort();
1421 Dif (live.nat[rr].nholds!=1)
1422 abort();
1423 if (size==4 && live.state[r].validsize==2) {
1424 log_isused(bestreg);
1425 log_visused(r);
1426 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1427 raw_bswap_32(bestreg);
1428 raw_zero_extend_16_rr(rr,rr);
1429 raw_zero_extend_16_rr(bestreg,bestreg);
1430 raw_bswap_32(bestreg);
1431 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1432 live.state[r].validsize=4;
1433 live.nat[rr].touched=touchcnt++;
1434 return rr;
1435 }
1436 if (live.state[r].validsize==1) {
1437 /* Nothing yet */
1438 }
1439 evict(r);
1440 }
1441
1442 if (!willclobber) {
1443 if (live.state[r].status!=UNDEF) {
1444 if (isconst(r)) {
1445 raw_mov_l_ri(bestreg,live.state[r].val);
1446 live.state[r].val=0;
1447 live.state[r].dirtysize=4;
1448 set_status(r,DIRTY);
1449 log_isused(bestreg);
1450 }
1451 else {
1452 log_isreg(bestreg, r); /* This will also load it! */
1453 live.state[r].dirtysize=0;
1454 set_status(r,CLEAN);
1455 }
1456 }
1457 else {
1458 live.state[r].val=0;
1459 live.state[r].dirtysize=0;
1460 set_status(r,CLEAN);
1461 log_isused(bestreg);
1462 }
1463 live.state[r].validsize=4;
1464 }
1465 else { /* this is the easiest way, but not optimal. FIXME! */
1466 /* Now it's trickier, but hopefully still OK */
1467 if (!isconst(r) || size==4) {
1468 live.state[r].validsize=size;
1469 live.state[r].dirtysize=size;
1470 live.state[r].val=0;
1471 set_status(r,DIRTY);
1472 if (size == 4) {
1473 log_clobberreg(r);
1474 log_isused(bestreg);
1475 }
1476 else {
1477 log_visused(r);
1478 log_isused(bestreg);
1479 }
1480 }
1481 else {
1482 if (live.state[r].status!=UNDEF)
1483 raw_mov_l_ri(bestreg,live.state[r].val);
1484 live.state[r].val=0;
1485 live.state[r].validsize=4;
1486 live.state[r].dirtysize=4;
1487 set_status(r,DIRTY);
1488 log_isused(bestreg);
1489 }
1490 }
1491 live.state[r].realreg=bestreg;
1492 live.state[r].realind=live.nat[bestreg].nholds;
1493 live.nat[bestreg].touched=touchcnt++;
1494 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1495 live.nat[bestreg].nholds++;
1496
1497 return bestreg;
1498 }
1499
1500 static int alloc_reg(int r, int size, int willclobber)
1501 {
1502 return alloc_reg_hinted(r,size,willclobber,-1);
1503 }
1504
1505 static void unlock2(int r)
1506 {
1507 Dif (!live.nat[r].locked)
1508 abort();
1509 live.nat[r].locked--;
1510 }
1511
1512 static void setlock(int r)
1513 {
1514 live.nat[r].locked++;
1515 }
1516
1517
1518 static void mov_nregs(int d, int s)
1519 {
1520 int ns=live.nat[s].nholds;
1521 int nd=live.nat[d].nholds;
1522 int i;
1523
1524 if (s==d)
1525 return;
1526
1527 if (nd>0)
1528 free_nreg(d);
1529
1530 log_isused(d);
1531 raw_mov_l_rr(d,s);
1532
1533 for (i=0;i<live.nat[s].nholds;i++) {
1534 int vs=live.nat[s].holds[i];
1535
1536 live.state[vs].realreg=d;
1537 live.state[vs].realind=i;
1538 live.nat[d].holds[i]=vs;
1539 }
1540 live.nat[d].nholds=live.nat[s].nholds;
1541
1542 live.nat[s].nholds=0;
1543 }
1544
1545
1546 static __inline__ void make_exclusive(int r, int size, int spec)
1547 {
1548 int clobber;
1549 reg_status oldstate;
1550 int rr=live.state[r].realreg;
1551 int nr;
1552 int nind;
1553 int ndirt=0;
1554 int i;
1555
1556 if (!isinreg(r))
1557 return;
1558 if (live.nat[rr].nholds==1)
1559 return;
1560 for (i=0;i<live.nat[rr].nholds;i++) {
1561 int vr=live.nat[rr].holds[i];
1562 if (vr!=r &&
1563 (live.state[vr].status==DIRTY || live.state[vr].val))
1564 ndirt++;
1565 }
1566 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1567 /* Everything else is clean, so let's keep this register */
1568 for (i=0;i<live.nat[rr].nholds;i++) {
1569 int vr=live.nat[rr].holds[i];
1570 if (vr!=r) {
1571 evict(vr);
1572 i--; /* Try that index again! */
1573 }
1574 }
1575 Dif (live.nat[rr].nholds!=1) {
1576 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1577 rr,live.nat[rr].nholds,r);
1578 abort();
1579 }
1580 return;
1581 }
1582
1583 /* We have to split the register */
1584 oldstate=live.state[r];
1585
1586 setlock(rr); /* Make sure this doesn't go away */
1587 /* Forget about r being in the register rr */
1588 disassociate(r);
1589 /* Get a new register, that we will clobber completely */
1590 if (oldstate.status==DIRTY) {
1591 /* If dirtysize is <4, we need a register that can handle the
1592 eventual smaller memory store! Thanks to Quake68k for exposing
1593 this detail ;-) */
1594 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1595 }
1596 else {
1597 nr=alloc_reg_hinted(r,4,1,spec);
1598 }
1599 nind=live.state[r].realind;
1600 live.state[r]=oldstate; /* Keep all the old state info */
1601 live.state[r].realreg=nr;
1602 live.state[r].realind=nind;
1603
1604 if (size<live.state[r].validsize) {
1605 if (live.state[r].val) {
1606 /* Might as well compensate for the offset now */
1607 raw_lea_l_brr(nr,rr,oldstate.val);
1608 live.state[r].val=0;
1609 live.state[r].dirtysize=4;
1610 set_status(r,DIRTY);
1611 }
1612 else
1613 raw_mov_l_rr(nr,rr); /* Make another copy */
1614 }
1615 unlock2(rr);
1616 }
1617
1618 static __inline__ void add_offset(int r, uae_u32 off)
1619 {
1620 live.state[r].val+=off;
1621 }
1622
1623 static __inline__ void remove_offset(int r, int spec)
1624 {
1625 reg_status oldstate;
1626 int rr;
1627
1628 if (isconst(r))
1629 return;
1630 if (live.state[r].val==0)
1631 return;
1632 if (isinreg(r) && live.state[r].validsize<4)
1633 evict(r);
1634
1635 if (!isinreg(r))
1636 alloc_reg_hinted(r,4,0,spec);
1637
1638 Dif (live.state[r].validsize!=4) {
1639 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1640 abort();
1641 }
1642 make_exclusive(r,0,-1);
1643 /* make_exclusive might have done the job already */
1644 if (live.state[r].val==0)
1645 return;
1646
1647 rr=live.state[r].realreg;
1648
1649 if (live.nat[rr].nholds==1) {
1650 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1651 // live.state[r].val,r,rr,target);
1652 adjust_nreg(rr,live.state[r].val);
1653 live.state[r].dirtysize=4;
1654 live.state[r].val=0;
1655 set_status(r,DIRTY);
1656 return;
1657 }
1658 write_log("Failed in remove_offset\n");
1659 abort();
1660 }
1661
1662 static __inline__ void remove_all_offsets(void)
1663 {
1664 int i;
1665
1666 for (i=0;i<VREGS;i++)
1667 remove_offset(i,-1);
1668 }
1669
1670 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1671 {
1672 int n;
1673 int answer=-1;
1674
1675 if (live.state[r].status==UNDEF) {
1676 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1677 }
1678 if (!can_offset)
1679 remove_offset(r,spec);
1680
1681 if (isinreg(r) && live.state[r].validsize>=size) {
1682 n=live.state[r].realreg;
1683 switch(size) {
1684 case 1:
1685 if (live.nat[n].canbyte || spec>=0) {
1686 answer=n;
1687 }
1688 break;
1689 case 2:
1690 if (live.nat[n].canword || spec>=0) {
1691 answer=n;
1692 }
1693 break;
1694 case 4:
1695 answer=n;
1696 break;
1697 default: abort();
1698 }
1699 if (answer<0)
1700 evict(r);
1701 }
1702 /* either the value was in memory to start with, or it was evicted and
1703 is in memory now */
1704 if (answer<0) {
1705 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1706 }
1707
1708 if (spec>=0 && spec!=answer) {
1709 /* Too bad */
1710 mov_nregs(spec,answer);
1711 answer=spec;
1712 }
1713 live.nat[answer].locked++;
1714 live.nat[answer].touched=touchcnt++;
1715 return answer;
1716 }
1717
1718
1719
1720 static int readreg(int r, int size)
1721 {
1722 return readreg_general(r,size,-1,0);
1723 }
1724
1725 static int readreg_specific(int r, int size, int spec)
1726 {
1727 return readreg_general(r,size,spec,0);
1728 }
1729
1730 static int readreg_offset(int r, int size)
1731 {
1732 return readreg_general(r,size,-1,1);
1733 }
1734
1735 /* writereg_general(r, size, spec)
1736 *
1737 * INPUT
1738 * - r : mid-layer register
1739 * - size : requested size (1/2/4)
1740 * - spec : -1 if find or make a register free, otherwise specifies
1741 * the physical register to use in any case
1742 *
1743 * OUTPUT
1744 * - hard (physical, x86 here) register allocated to virtual register r
1745 */
1746 static __inline__ int writereg_general(int r, int size, int spec)
1747 {
1748 int n;
1749 int answer=-1;
1750
1751 if (size<4) {
1752 remove_offset(r,spec);
1753 }
1754
1755 make_exclusive(r,size,spec);
1756 if (isinreg(r)) {
1757 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1758 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1759 n=live.state[r].realreg;
1760
1761 Dif (live.nat[n].nholds!=1)
1762 abort();
1763 switch(size) {
1764 case 1:
1765 if (live.nat[n].canbyte || spec>=0) {
1766 live.state[r].dirtysize=ndsize;
1767 live.state[r].validsize=nvsize;
1768 answer=n;
1769 }
1770 break;
1771 case 2:
1772 if (live.nat[n].canword || spec>=0) {
1773 live.state[r].dirtysize=ndsize;
1774 live.state[r].validsize=nvsize;
1775 answer=n;
1776 }
1777 break;
1778 case 4:
1779 live.state[r].dirtysize=ndsize;
1780 live.state[r].validsize=nvsize;
1781 answer=n;
1782 break;
1783 default: abort();
1784 }
1785 if (answer<0)
1786 evict(r);
1787 }
1788 /* either the value was in memory to start with, or it was evicted and
1789 is in memory now */
1790 if (answer<0) {
1791 answer=alloc_reg_hinted(r,size,1,spec);
1792 }
1793 if (spec>=0 && spec!=answer) {
1794 mov_nregs(spec,answer);
1795 answer=spec;
1796 }
1797 if (live.state[r].status==UNDEF)
1798 live.state[r].validsize=4;
1799 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1800 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1801
1802 live.nat[answer].locked++;
1803 live.nat[answer].touched=touchcnt++;
1804 if (size==4) {
1805 live.state[r].val=0;
1806 }
1807 else {
1808 Dif (live.state[r].val) {
1809 write_log("Problem with val\n");
1810 abort();
1811 }
1812 }
1813 set_status(r,DIRTY);
1814 return answer;
1815 }
1816
1817 static int writereg(int r, int size)
1818 {
1819 return writereg_general(r,size,-1);
1820 }
1821
1822 static int writereg_specific(int r, int size, int spec)
1823 {
1824 return writereg_general(r,size,spec);
1825 }
1826
1827 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1828 {
1829 int n;
1830 int answer=-1;
1831
1832 if (live.state[r].status==UNDEF) {
1833 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1834 }
1835 remove_offset(r,spec);
1836 make_exclusive(r,0,spec);
1837
1838 Dif (wsize<rsize) {
1839 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1840 abort();
1841 }
1842 if (isinreg(r) && live.state[r].validsize>=rsize) {
1843 n=live.state[r].realreg;
1844 Dif (live.nat[n].nholds!=1)
1845 abort();
1846
1847 switch(rsize) {
1848 case 1:
1849 if (live.nat[n].canbyte || spec>=0) {
1850 answer=n;
1851 }
1852 break;
1853 case 2:
1854 if (live.nat[n].canword || spec>=0) {
1855 answer=n;
1856 }
1857 break;
1858 case 4:
1859 answer=n;
1860 break;
1861 default: abort();
1862 }
1863 if (answer<0)
1864 evict(r);
1865 }
1866 /* either the value was in memory to start with, or it was evicted and
1867 is in memory now */
1868 if (answer<0) {
1869 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1870 }
1871
1872 if (spec>=0 && spec!=answer) {
1873 /* Too bad */
1874 mov_nregs(spec,answer);
1875 answer=spec;
1876 }
1877 if (wsize>live.state[r].dirtysize)
1878 live.state[r].dirtysize=wsize;
1879 if (wsize>live.state[r].validsize)
1880 live.state[r].validsize=wsize;
1881 set_status(r,DIRTY);
1882
1883 live.nat[answer].locked++;
1884 live.nat[answer].touched=touchcnt++;
1885
1886 Dif (live.state[r].val) {
1887 write_log("Problem with val(rmw)\n");
1888 abort();
1889 }
1890 return answer;
1891 }
1892
1893 static int rmw(int r, int wsize, int rsize)
1894 {
1895 return rmw_general(r,wsize,rsize,-1);
1896 }
1897
1898 static int rmw_specific(int r, int wsize, int rsize, int spec)
1899 {
1900 return rmw_general(r,wsize,rsize,spec);
1901 }
1902
1903
1904 /* needed for restoring the carry flag on non-P6 cores */
1905 static void bt_l_ri_noclobber(R4 r, IMM i)
1906 {
1907 int size=4;
1908 if (i<16)
1909 size=2;
1910 r=readreg(r,size);
1911 raw_bt_l_ri(r,i);
1912 unlock2(r);
1913 }
1914
1915 /********************************************************************
1916 * FPU register status handling. EMIT TIME! *
1917 ********************************************************************/
1918
1919 static void f_tomem(int r)
1920 {
1921 if (live.fate[r].status==DIRTY) {
1922 #if USE_LONG_DOUBLE
1923 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1924 #else
1925 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1926 #endif
1927 live.fate[r].status=CLEAN;
1928 }
1929 }
1930
1931 static void f_tomem_drop(int r)
1932 {
1933 if (live.fate[r].status==DIRTY) {
1934 #if USE_LONG_DOUBLE
1935 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1936 #else
1937 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1938 #endif
1939 live.fate[r].status=INMEM;
1940 }
1941 }
1942
1943
1944 static __inline__ int f_isinreg(int r)
1945 {
1946 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1947 }
1948
1949 static void f_evict(int r)
1950 {
1951 int rr;
1952
1953 if (!f_isinreg(r))
1954 return;
1955 rr=live.fate[r].realreg;
1956 if (live.fat[rr].nholds==1)
1957 f_tomem_drop(r);
1958 else
1959 f_tomem(r);
1960
1961 Dif (live.fat[rr].locked &&
1962 live.fat[rr].nholds==1) {
1963 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1964 abort();
1965 }
1966
1967 live.fat[rr].nholds--;
1968 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1969 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1970 int thisind=live.fate[r].realind;
1971 live.fat[rr].holds[thisind]=topreg;
1972 live.fate[topreg].realind=thisind;
1973 }
1974 live.fate[r].status=INMEM;
1975 live.fate[r].realreg=-1;
1976 }
1977
1978 static __inline__ void f_free_nreg(int r)
1979 {
1980 int i=live.fat[r].nholds;
1981
1982 while (i) {
1983 int vr;
1984
1985 --i;
1986 vr=live.fat[r].holds[i];
1987 f_evict(vr);
1988 }
1989 Dif (live.fat[r].nholds!=0) {
1990 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1991 abort();
1992 }
1993 }
1994
1995
1996 /* Use with care! */
1997 static __inline__ void f_isclean(int r)
1998 {
1999 if (!f_isinreg(r))
2000 return;
2001 live.fate[r].status=CLEAN;
2002 }
2003
2004 static __inline__ void f_disassociate(int r)
2005 {
2006 f_isclean(r);
2007 f_evict(r);
2008 }
2009
2010
2011
2012 static int f_alloc_reg(int r, int willclobber)
2013 {
2014 int bestreg;
2015 uae_s32 when;
2016 int i;
2017 uae_s32 badness;
2018 bestreg=-1;
2019 when=2000000000;
2020 for (i=N_FREGS;i--;) {
2021 badness=live.fat[i].touched;
2022 if (live.fat[i].nholds==0)
2023 badness=0;
2024
2025 if (!live.fat[i].locked && badness<when) {
2026 bestreg=i;
2027 when=badness;
2028 if (live.fat[i].nholds==0)
2029 break;
2030 }
2031 }
2032 Dif (bestreg==-1)
2033 abort();
2034
2035 if (live.fat[bestreg].nholds>0) {
2036 f_free_nreg(bestreg);
2037 }
2038 if (f_isinreg(r)) {
2039 f_evict(r);
2040 }
2041
2042 if (!willclobber) {
2043 if (live.fate[r].status!=UNDEF) {
2044 #if USE_LONG_DOUBLE
2045 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2046 #else
2047 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2048 #endif
2049 }
2050 live.fate[r].status=CLEAN;
2051 }
2052 else {
2053 live.fate[r].status=DIRTY;
2054 }
2055 live.fate[r].realreg=bestreg;
2056 live.fate[r].realind=live.fat[bestreg].nholds;
2057 live.fat[bestreg].touched=touchcnt++;
2058 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2059 live.fat[bestreg].nholds++;
2060
2061 return bestreg;
2062 }
2063
2064 static void f_unlock(int r)
2065 {
2066 Dif (!live.fat[r].locked)
2067 abort();
2068 live.fat[r].locked--;
2069 }
2070
2071 static void f_setlock(int r)
2072 {
2073 live.fat[r].locked++;
2074 }
2075
2076 static __inline__ int f_readreg(int r)
2077 {
2078 int n;
2079 int answer=-1;
2080
2081 if (f_isinreg(r)) {
2082 n=live.fate[r].realreg;
2083 answer=n;
2084 }
2085 /* either the value was in memory to start with, or it was evicted and
2086 is in memory now */
2087 if (answer<0)
2088 answer=f_alloc_reg(r,0);
2089
2090 live.fat[answer].locked++;
2091 live.fat[answer].touched=touchcnt++;
2092 return answer;
2093 }
2094
2095 static __inline__ void f_make_exclusive(int r, int clobber)
2096 {
2097 freg_status oldstate;
2098 int rr=live.fate[r].realreg;
2099 int nr;
2100 int nind;
2101 int ndirt=0;
2102 int i;
2103
2104 if (!f_isinreg(r))
2105 return;
2106 if (live.fat[rr].nholds==1)
2107 return;
2108 for (i=0;i<live.fat[rr].nholds;i++) {
2109 int vr=live.fat[rr].holds[i];
2110 if (vr!=r && live.fate[vr].status==DIRTY)
2111 ndirt++;
2112 }
2113 if (!ndirt && !live.fat[rr].locked) {
2114 /* Everything else is clean, so let's keep this register */
2115 for (i=0;i<live.fat[rr].nholds;i++) {
2116 int vr=live.fat[rr].holds[i];
2117 if (vr!=r) {
2118 f_evict(vr);
2119 i--; /* Try that index again! */
2120 }
2121 }
2122 Dif (live.fat[rr].nholds!=1) {
2123 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2124 for (i=0;i<live.fat[rr].nholds;i++) {
2125 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2126 live.fate[live.fat[rr].holds[i]].realreg,
2127 live.fate[live.fat[rr].holds[i]].realind);
2128 }
2129 write_log("\n");
2130 abort();
2131 }
2132 return;
2133 }
2134
2135 /* We have to split the register */
2136 oldstate=live.fate[r];
2137
2138 f_setlock(rr); /* Make sure this doesn't go away */
2139 /* Forget about r being in the register rr */
2140 f_disassociate(r);
2141 /* Get a new register, that we will clobber completely */
2142 nr=f_alloc_reg(r,1);
2143 nind=live.fate[r].realind;
2144 if (!clobber)
2145 raw_fmov_rr(nr,rr); /* Make another copy */
2146 live.fate[r]=oldstate; /* Keep all the old state info */
2147 live.fate[r].realreg=nr;
2148 live.fate[r].realind=nind;
2149 f_unlock(rr);
2150 }
2151
2152
2153 static __inline__ int f_writereg(int r)
2154 {
2155 int n;
2156 int answer=-1;
2157
2158 f_make_exclusive(r,1);
2159 if (f_isinreg(r)) {
2160 n=live.fate[r].realreg;
2161 answer=n;
2162 }
2163 if (answer<0) {
2164 answer=f_alloc_reg(r,1);
2165 }
2166 live.fate[r].status=DIRTY;
2167 live.fat[answer].locked++;
2168 live.fat[answer].touched=touchcnt++;
2169 return answer;
2170 }
2171
2172 static int f_rmw(int r)
2173 {
2174 int n;
2175
2176 f_make_exclusive(r,0);
2177 if (f_isinreg(r)) {
2178 n=live.fate[r].realreg;
2179 }
2180 else
2181 n=f_alloc_reg(r,0);
2182 live.fate[r].status=DIRTY;
2183 live.fat[n].locked++;
2184 live.fat[n].touched=touchcnt++;
2185 return n;
2186 }
2187
2188 static void fflags_into_flags_internal(uae_u32 tmp)
2189 {
2190 int r;
2191
2192 clobber_flags();
2193 r=f_readreg(FP_RESULT);
2194 if (FFLAG_NREG_CLOBBER_CONDITION) {
2195 int tmp2=tmp;
2196 tmp=writereg_specific(tmp,4,FFLAG_NREG);
2197 raw_fflags_into_flags(r);
2198 unlock2(tmp);
2199 forget_about(tmp2);
2200 }
2201 else
2202 raw_fflags_into_flags(r);
2203 f_unlock(r);
2204 live_flags();
2205 }
2206
2207
2208
2209
2210 /********************************************************************
2211 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2212 ********************************************************************/
2213
2214 /*
2215 * RULES FOR HANDLING REGISTERS:
2216 *
2217 * * In the function headers, order the parameters
2218 * - 1st registers written to
2219 * - 2nd read/modify/write registers
2220 * - 3rd registers read from
2221 * * Before calling raw_*, you must call readreg, writereg or rmw for
2222 * each register
2223 * * The order for this is
2224 * - 1st call remove_offset for all registers written to with size<4
2225 * - 2nd call readreg for all registers read without offset
2226 * - 3rd call rmw for all rmw registers
2227 * - 4th call readreg_offset for all registers that can handle offsets
2228 * - 5th call get_offset for all the registers from the previous step
2229 * - 6th call writereg for all written-to registers
2230 * - 7th call raw_*
2231 * - 8th unlock2 all registers that were locked
2232 */
2233
2234 MIDFUNC(0,live_flags,(void))
2235 {
2236 live.flags_on_stack=TRASH;
2237 live.flags_in_flags=VALID;
2238 live.flags_are_important=1;
2239 }
2240 MENDFUNC(0,live_flags,(void))
2241
2242 MIDFUNC(0,dont_care_flags,(void))
2243 {
2244 live.flags_are_important=0;
2245 }
2246 MENDFUNC(0,dont_care_flags,(void))
2247
2248
2249 MIDFUNC(0,duplicate_carry,(void))
2250 {
2251 evict(FLAGX);
2252 make_flags_live_internal();
2253 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2254 log_vwrite(FLAGX);
2255 }
2256 MENDFUNC(0,duplicate_carry,(void))
2257
2258 MIDFUNC(0,restore_carry,(void))
2259 {
2260 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2261 bt_l_ri_noclobber(FLAGX,0);
2262 }
2263 else { /* Avoid the stall the above creates.
2264 This is slow on non-P6, though.
2265 */
2266 COMPCALL(rol_b_ri(FLAGX,8));
2267 isclean(FLAGX);
2268 }
2269 }
2270 MENDFUNC(0,restore_carry,(void))
2271
2272 MIDFUNC(0,start_needflags,(void))
2273 {
2274 needflags=1;
2275 }
2276 MENDFUNC(0,start_needflags,(void))
2277
2278 MIDFUNC(0,end_needflags,(void))
2279 {
2280 needflags=0;
2281 }
2282 MENDFUNC(0,end_needflags,(void))
2283
2284 MIDFUNC(0,make_flags_live,(void))
2285 {
2286 make_flags_live_internal();
2287 }
2288 MENDFUNC(0,make_flags_live,(void))
2289
2290 MIDFUNC(1,fflags_into_flags,(W2 tmp))
2291 {
2292 clobber_flags();
2293 fflags_into_flags_internal(tmp);
2294 }
2295 MENDFUNC(1,fflags_into_flags,(W2 tmp))
2296
2297
2298 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2299 {
2300 int size=4;
2301 if (i<16)
2302 size=2;
2303 CLOBBER_BT;
2304 r=readreg(r,size);
2305 raw_bt_l_ri(r,i);
2306 unlock2(r);
2307 }
2308 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2309
2310 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2311 {
2312 CLOBBER_BT;
2313 r=readreg(r,4);
2314 b=readreg(b,4);
2315 raw_bt_l_rr(r,b);
2316 unlock2(r);
2317 unlock2(b);
2318 }
2319 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2320
2321 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2322 {
2323 int size=4;
2324 if (i<16)
2325 size=2;
2326 CLOBBER_BT;
2327 r=rmw(r,size,size);
2328 raw_btc_l_ri(r,i);
2329 unlock2(r);
2330 }
2331 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2332
2333 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2334 {
2335 CLOBBER_BT;
2336 b=readreg(b,4);
2337 r=rmw(r,4,4);
2338 raw_btc_l_rr(r,b);
2339 unlock2(r);
2340 unlock2(b);
2341 }
2342 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2343
2344
2345 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2346 {
2347 int size=4;
2348 if (i<16)
2349 size=2;
2350 CLOBBER_BT;
2351 r=rmw(r,size,size);
2352 raw_btr_l_ri(r,i);
2353 unlock2(r);
2354 }
2355 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2356
2357 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2358 {
2359 CLOBBER_BT;
2360 b=readreg(b,4);
2361 r=rmw(r,4,4);
2362 raw_btr_l_rr(r,b);
2363 unlock2(r);
2364 unlock2(b);
2365 }
2366 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2367
2368
2369 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2370 {
2371 int size=4;
2372 if (i<16)
2373 size=2;
2374 CLOBBER_BT;
2375 r=rmw(r,size,size);
2376 raw_bts_l_ri(r,i);
2377 unlock2(r);
2378 }
2379 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2380
2381 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2382 {
2383 CLOBBER_BT;
2384 b=readreg(b,4);
2385 r=rmw(r,4,4);
2386 raw_bts_l_rr(r,b);
2387 unlock2(r);
2388 unlock2(b);
2389 }
2390 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2391
2392 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2393 {
2394 CLOBBER_MOV;
2395 d=writereg(d,4);
2396 raw_mov_l_rm(d,s);
2397 unlock2(d);
2398 }
2399 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2400
2401
2402 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2403 {
2404 r=readreg(r,4);
2405 raw_call_r(r);
2406 unlock2(r);
2407 }
2408 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2409
2410 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2411 {
2412 CLOBBER_SUB;
2413 raw_sub_l_mi(d,s) ;
2414 }
2415 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2416
2417 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2418 {
2419 CLOBBER_MOV;
2420 raw_mov_l_mi(d,s) ;
2421 }
2422 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2423
2424 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2425 {
2426 CLOBBER_MOV;
2427 raw_mov_w_mi(d,s) ;
2428 }
2429 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2430
2431 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2432 {
2433 CLOBBER_MOV;
2434 raw_mov_b_mi(d,s) ;
2435 }
2436 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2437
2438 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2439 {
2440 if (!i && !needflags)
2441 return;
2442 CLOBBER_ROL;
2443 r=rmw(r,1,1);
2444 raw_rol_b_ri(r,i);
2445 unlock2(r);
2446 }
2447 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2448
2449 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2450 {
2451 if (!i && !needflags)
2452 return;
2453 CLOBBER_ROL;
2454 r=rmw(r,2,2);
2455 raw_rol_w_ri(r,i);
2456 unlock2(r);
2457 }
2458 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2459
2460 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2461 {
2462 if (!i && !needflags)
2463 return;
2464 CLOBBER_ROL;
2465 r=rmw(r,4,4);
2466 raw_rol_l_ri(r,i);
2467 unlock2(r);
2468 }
2469 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2470
2471 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2472 {
2473 if (isconst(r)) {
2474 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2475 return;
2476 }
2477 CLOBBER_ROL;
2478 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2479 d=rmw(d,4,4);
2480 Dif (r!=1) {
2481 write_log("Illegal register %d in raw_rol_b\n",r);
2482 abort();
2483 }
2484 raw_rol_l_rr(d,r) ;
2485 unlock2(r);
2486 unlock2(d);
2487 }
2488 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2489
2490 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2491 { /* Can only do this with r==1, i.e. cl */
2492
2493 if (isconst(r)) {
2494 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2495 return;
2496 }
2497 CLOBBER_ROL;
2498 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2499 d=rmw(d,2,2);
2500 Dif (r!=1) {
2501 write_log("Illegal register %d in raw_rol_b\n",r);
2502 abort();
2503 }
2504 raw_rol_w_rr(d,r) ;
2505 unlock2(r);
2506 unlock2(d);
2507 }
2508 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2509
2510 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2511 { /* Can only do this with r==1, i.e. cl */
2512
2513 if (isconst(r)) {
2514 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2515 return;
2516 }
2517
2518 CLOBBER_ROL;
2519 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2520 d=rmw(d,1,1);
2521 Dif (r!=1) {
2522 write_log("Illegal register %d in raw_rol_b\n",r);
2523 abort();
2524 }
2525 raw_rol_b_rr(d,r) ;
2526 unlock2(r);
2527 unlock2(d);
2528 }
2529 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2530
2531
2532 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2533 {
2534 if (isconst(r)) {
2535 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2536 return;
2537 }
2538 CLOBBER_SHLL;
2539 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2540 d=rmw(d,4,4);
2541 Dif (r!=1) {
2542 write_log("Illegal register %d in raw_rol_b\n",r);
2543 abort();
2544 }
2545 raw_shll_l_rr(d,r) ;
2546 unlock2(r);
2547 unlock2(d);
2548 }
2549 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2550
2551 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2552 { /* Can only do this with r==1, i.e. cl */
2553
2554 if (isconst(r)) {
2555 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2556 return;
2557 }
2558 CLOBBER_SHLL;
2559 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2560 d=rmw(d,2,2);
2561 Dif (r!=1) {
2562 write_log("Illegal register %d in raw_shll_b\n",r);
2563 abort();
2564 }
2565 raw_shll_w_rr(d,r) ;
2566 unlock2(r);
2567 unlock2(d);
2568 }
2569 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2570
2571 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2572 { /* Can only do this with r==1, i.e. cl */
2573
2574 if (isconst(r)) {
2575 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2576 return;
2577 }
2578
2579 CLOBBER_SHLL;
2580 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2581 d=rmw(d,1,1);
2582 Dif (r!=1) {
2583 write_log("Illegal register %d in raw_shll_b\n",r);
2584 abort();
2585 }
2586 raw_shll_b_rr(d,r) ;
2587 unlock2(r);
2588 unlock2(d);
2589 }
2590 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2591
2592
2593 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2594 {
2595 if (!i && !needflags)
2596 return;
2597 CLOBBER_ROR;
2598 r=rmw(r,1,1);
2599 raw_ror_b_ri(r,i);
2600 unlock2(r);
2601 }
2602 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2603
2604 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2605 {
2606 if (!i && !needflags)
2607 return;
2608 CLOBBER_ROR;
2609 r=rmw(r,2,2);
2610 raw_ror_w_ri(r,i);
2611 unlock2(r);
2612 }
2613 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2614
2615 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2616 {
2617 if (!i && !needflags)
2618 return;
2619 CLOBBER_ROR;
2620 r=rmw(r,4,4);
2621 raw_ror_l_ri(r,i);
2622 unlock2(r);
2623 }
2624 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2625
2626 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2627 {
2628 if (isconst(r)) {
2629 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2630 return;
2631 }
2632 CLOBBER_ROR;
2633 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2634 d=rmw(d,4,4);
2635 raw_ror_l_rr(d,r) ;
2636 unlock2(r);
2637 unlock2(d);
2638 }
2639 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2640
2641 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2642 {
2643 if (isconst(r)) {
2644 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2645 return;
2646 }
2647 CLOBBER_ROR;
2648 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2649 d=rmw(d,2,2);
2650 raw_ror_w_rr(d,r) ;
2651 unlock2(r);
2652 unlock2(d);
2653 }
2654 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2655
2656 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2657 {
2658 if (isconst(r)) {
2659 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2660 return;
2661 }
2662
2663 CLOBBER_ROR;
2664 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2665 d=rmw(d,1,1);
2666 raw_ror_b_rr(d,r) ;
2667 unlock2(r);
2668 unlock2(d);
2669 }
2670 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2671
2672 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2673 {
2674 if (isconst(r)) {
2675 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2676 return;
2677 }
2678 CLOBBER_SHRL;
2679 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2680 d=rmw(d,4,4);
2681 Dif (r!=1) {
2682 write_log("Illegal register %d in raw_rol_b\n",r);
2683 abort();
2684 }
2685 raw_shrl_l_rr(d,r) ;
2686 unlock2(r);
2687 unlock2(d);
2688 }
2689 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2690
2691 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2692 { /* Can only do this with r==1, i.e. cl */
2693
2694 if (isconst(r)) {
2695 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2696 return;
2697 }
2698 CLOBBER_SHRL;
2699 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2700 d=rmw(d,2,2);
2701 Dif (r!=1) {
2702 write_log("Illegal register %d in raw_shrl_b\n",r);
2703 abort();
2704 }
2705 raw_shrl_w_rr(d,r) ;
2706 unlock2(r);
2707 unlock2(d);
2708 }
2709 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2710
2711 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2712 { /* Can only do this with r==1, i.e. cl */
2713
2714 if (isconst(r)) {
2715 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2716 return;
2717 }
2718
2719 CLOBBER_SHRL;
2720 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2721 d=rmw(d,1,1);
2722 Dif (r!=1) {
2723 write_log("Illegal register %d in raw_shrl_b\n",r);
2724 abort();
2725 }
2726 raw_shrl_b_rr(d,r) ;
2727 unlock2(r);
2728 unlock2(d);
2729 }
2730 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2731
2732
2733
2734 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2735 {
2736 if (!i && !needflags)
2737 return;
2738 if (isconst(r) && !needflags) {
2739 live.state[r].val<<=i;
2740 return;
2741 }
2742 CLOBBER_SHLL;
2743 r=rmw(r,4,4);
2744 raw_shll_l_ri(r,i);
2745 unlock2(r);
2746 }
2747 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2748
2749 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2750 {
2751 if (!i && !needflags)
2752 return;
2753 CLOBBER_SHLL;
2754 r=rmw(r,2,2);
2755 raw_shll_w_ri(r,i);
2756 unlock2(r);
2757 }
2758 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2759
2760 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2761 {
2762 if (!i && !needflags)
2763 return;
2764 CLOBBER_SHLL;
2765 r=rmw(r,1,1);
2766 raw_shll_b_ri(r,i);
2767 unlock2(r);
2768 }
2769 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2770
2771 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2772 {
2773 if (!i && !needflags)
2774 return;
2775 if (isconst(r) && !needflags) {
2776 live.state[r].val>>=i;
2777 return;
2778 }
2779 CLOBBER_SHRL;
2780 r=rmw(r,4,4);
2781 raw_shrl_l_ri(r,i);
2782 unlock2(r);
2783 }
2784 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2785
2786 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2787 {
2788 if (!i && !needflags)
2789 return;
2790 CLOBBER_SHRL;
2791 r=rmw(r,2,2);
2792 raw_shrl_w_ri(r,i);
2793 unlock2(r);
2794 }
2795 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2796
2797 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2798 {
2799 if (!i && !needflags)
2800 return;
2801 CLOBBER_SHRL;
2802 r=rmw(r,1,1);
2803 raw_shrl_b_ri(r,i);
2804 unlock2(r);
2805 }
2806 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2807
2808 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2809 {
2810 if (!i && !needflags)
2811 return;
2812 CLOBBER_SHRA;
2813 r=rmw(r,4,4);
2814 raw_shra_l_ri(r,i);
2815 unlock2(r);
2816 }
2817 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2818
2819 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2820 {
2821 if (!i && !needflags)
2822 return;
2823 CLOBBER_SHRA;
2824 r=rmw(r,2,2);
2825 raw_shra_w_ri(r,i);
2826 unlock2(r);
2827 }
2828 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2829
2830 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2831 {
2832 if (!i && !needflags)
2833 return;
2834 CLOBBER_SHRA;
2835 r=rmw(r,1,1);
2836 raw_shra_b_ri(r,i);
2837 unlock2(r);
2838 }
2839 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2840
2841 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2842 {
2843 if (isconst(r)) {
2844 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2845 return;
2846 }
2847 CLOBBER_SHRA;
2848 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2849 d=rmw(d,4,4);
2850 Dif (r!=1) {
2851 write_log("Illegal register %d in raw_rol_b\n",r);
2852 abort();
2853 }
2854 raw_shra_l_rr(d,r) ;
2855 unlock2(r);
2856 unlock2(d);
2857 }
2858 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2859
2860 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2861 { /* Can only do this with r==1, i.e. cl */
2862
2863 if (isconst(r)) {
2864 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2865 return;
2866 }
2867 CLOBBER_SHRA;
2868 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2869 d=rmw(d,2,2);
2870 Dif (r!=1) {
2871 write_log("Illegal register %d in raw_shra_b\n",r);
2872 abort();
2873 }
2874 raw_shra_w_rr(d,r) ;
2875 unlock2(r);
2876 unlock2(d);
2877 }
2878 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2879
2880 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2881 { /* Can only do this with r==1, i.e. cl */
2882
2883 if (isconst(r)) {
2884 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2885 return;
2886 }
2887
2888 CLOBBER_SHRA;
2889 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2890 d=rmw(d,1,1);
2891 Dif (r!=1) {
2892 write_log("Illegal register %d in raw_shra_b\n",r);
2893 abort();
2894 }
2895 raw_shra_b_rr(d,r) ;
2896 unlock2(r);
2897 unlock2(d);
2898 }
2899 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2900
2901
2902 MIDFUNC(2,setcc,(W1 d, IMM cc))
2903 {
2904 CLOBBER_SETCC;
2905 d=writereg(d,1);
2906 raw_setcc(d,cc);
2907 unlock2(d);
2908 }
2909 MENDFUNC(2,setcc,(W1 d, IMM cc))
2910
2911 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2912 {
2913 CLOBBER_SETCC;
2914 raw_setcc_m(d,cc);
2915 }
2916 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2917
2918 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2919 {
2920 if (d==s)
2921 return;
2922 CLOBBER_CMOV;
2923 s=readreg(s,4);
2924 d=rmw(d,4,4);
2925 raw_cmov_l_rr(d,s,cc);
2926 unlock2(s);
2927 unlock2(d);
2928 }
2929 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2930
2931 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2932 {
2933 CLOBBER_CMOV;
2934 d=rmw(d,4,4);
2935 raw_cmov_l_rm(d,s,cc);
2936 unlock2(d);
2937 }
2938 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2939
2940 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2941 {
2942 CLOBBER_BSF;
2943 s = readreg(s, 4);
2944 d = writereg(d, 4);
2945 raw_bsf_l_rr(d, s);
2946 unlock2(s);
2947 unlock2(d);
2948 }
2949 MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2950
2951 /* Set the Z flag depending on the value in s. Note that the
2952 value has to be 0 or -1 (or, more precisely, for non-zero
2953 values, bit 14 must be set)! */
2954 MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2955 {
2956 CLOBBER_BSF;
2957 s=rmw_specific(s,4,4,FLAG_NREG3);
2958 tmp=writereg(tmp,4);
2959 raw_flags_set_zero(s, tmp);
2960 unlock2(tmp);
2961 unlock2(s);
2962 }
2963 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2964
2965 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2966 {
2967 CLOBBER_MUL;
2968 s=readreg(s,4);
2969 d=rmw(d,4,4);
2970 raw_imul_32_32(d,s);
2971 unlock2(s);
2972 unlock2(d);
2973 }
2974 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2975
2976 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2977 {
2978 CLOBBER_MUL;
2979 s=rmw_specific(s,4,4,MUL_NREG2);
2980 d=rmw_specific(d,4,4,MUL_NREG1);
2981 raw_imul_64_32(d,s);
2982 unlock2(s);
2983 unlock2(d);
2984 }
2985 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2986
2987 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2988 {
2989 CLOBBER_MUL;
2990 s=rmw_specific(s,4,4,MUL_NREG2);
2991 d=rmw_specific(d,4,4,MUL_NREG1);
2992 raw_mul_64_32(d,s);
2993 unlock2(s);
2994 unlock2(d);
2995 }
2996 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2997
2998 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2999 {
3000 CLOBBER_MUL;
3001 s=readreg(s,4);
3002 d=rmw(d,4,4);
3003 raw_mul_32_32(d,s);
3004 unlock2(s);
3005 unlock2(d);
3006 }
3007 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3008
3009 #if SIZEOF_VOID_P == 8
3010 MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3011 {
3012 int isrmw;
3013
3014 if (isconst(s)) {
3015 set_const(d,(uae_s32)live.state[s].val);
3016 return;
3017 }
3018
3019 CLOBBER_SE32;
3020 isrmw=(s==d);
3021 if (!isrmw) {
3022 s=readreg(s,4);
3023 d=writereg(d,4);
3024 }
3025 else { /* If we try to lock this twice, with different sizes, we
3026 are int trouble! */
3027 s=d=rmw(s,4,4);
3028 }
3029 raw_sign_extend_32_rr(d,s);
3030 if (!isrmw) {
3031 unlock2(d);
3032 unlock2(s);
3033 }
3034 else {
3035 unlock2(s);
3036 }
3037 }
3038 MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3039 #endif
3040
3041 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3042 {
3043 int isrmw;
3044
3045 if (isconst(s)) {
3046 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3047 return;
3048 }
3049
3050 CLOBBER_SE16;
3051 isrmw=(s==d);
3052 if (!isrmw) {
3053 s=readreg(s,2);
3054 d=writereg(d,4);
3055 }
3056 else { /* If we try to lock this twice, with different sizes, we
3057 are int trouble! */
3058 s=d=rmw(s,4,2);
3059 }
3060 raw_sign_extend_16_rr(d,s);
3061 if (!isrmw) {
3062 unlock2(d);
3063 unlock2(s);
3064 }
3065 else {
3066 unlock2(s);
3067 }
3068 }
3069 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3070
3071 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3072 {
3073 int isrmw;
3074
3075 if (isconst(s)) {
3076 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3077 return;
3078 }
3079
3080 isrmw=(s==d);
3081 CLOBBER_SE8;
3082 if (!isrmw) {
3083 s=readreg(s,1);
3084 d=writereg(d,4);
3085 }
3086 else { /* If we try to lock this twice, with different sizes, we
3087 are int trouble! */
3088 s=d=rmw(s,4,1);
3089 }
3090
3091 raw_sign_extend_8_rr(d,s);
3092
3093 if (!isrmw) {
3094 unlock2(d);
3095 unlock2(s);
3096 }
3097 else {
3098 unlock2(s);
3099 }
3100 }
3101 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3102
3103
3104 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3105 {
3106 int isrmw;
3107
3108 if (isconst(s)) {
3109 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3110 return;
3111 }
3112
3113 isrmw=(s==d);
3114 CLOBBER_ZE16;
3115 if (!isrmw) {
3116 s=readreg(s,2);
3117 d=writereg(d,4);
3118 }
3119 else { /* If we try to lock this twice, with different sizes, we
3120 are int trouble! */
3121 s=d=rmw(s,4,2);
3122 }
3123 raw_zero_extend_16_rr(d,s);
3124 if (!isrmw) {
3125 unlock2(d);
3126 unlock2(s);
3127 }
3128 else {
3129 unlock2(s);
3130 }
3131 }
3132 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3133
3134 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3135 {
3136 int isrmw;
3137 if (isconst(s)) {
3138 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3139 return;
3140 }
3141
3142 isrmw=(s==d);
3143 CLOBBER_ZE8;
3144 if (!isrmw) {
3145 s=readreg(s,1);
3146 d=writereg(d,4);
3147 }
3148 else { /* If we try to lock this twice, with different sizes, we
3149 are int trouble! */
3150 s=d=rmw(s,4,1);
3151 }
3152
3153 raw_zero_extend_8_rr(d,s);
3154
3155 if (!isrmw) {
3156 unlock2(d);
3157 unlock2(s);
3158 }
3159 else {
3160 unlock2(s);
3161 }
3162 }
3163 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3164
3165 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3166 {
3167 if (d==s)
3168 return;
3169 if (isconst(s)) {
3170 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3171 return;
3172 }
3173
3174 CLOBBER_MOV;
3175 s=readreg(s,1);
3176 d=writereg(d,1);
3177 raw_mov_b_rr(d,s);
3178 unlock2(d);
3179 unlock2(s);
3180 }
3181 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3182
3183 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3184 {
3185 if (d==s)
3186 return;
3187 if (isconst(s)) {
3188 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3189 return;
3190 }
3191
3192 CLOBBER_MOV;
3193 s=readreg(s,2);
3194 d=writereg(d,2);
3195 raw_mov_w_rr(d,s);
3196 unlock2(d);
3197 unlock2(s);
3198 }
3199 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3200
3201
3202 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3203 {
3204 CLOBBER_MOV;
3205 baser=readreg(baser,4);
3206 index=readreg(index,4);
3207 d=writereg(d,4);
3208
3209 raw_mov_l_rrm_indexed(d,baser,index,factor);
3210 unlock2(d);
3211 unlock2(baser);
3212 unlock2(index);
3213 }
3214 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3215
3216 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3217 {
3218 CLOBBER_MOV;
3219 baser=readreg(baser,4);
3220 index=readreg(index,4);
3221 d=writereg(d,2);
3222
3223 raw_mov_w_rrm_indexed(d,baser,index,factor);
3224 unlock2(d);
3225 unlock2(baser);
3226 unlock2(index);
3227 }
3228 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3229
3230 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3231 {
3232 CLOBBER_MOV;
3233 baser=readreg(baser,4);
3234 index=readreg(index,4);
3235 d=writereg(d,1);
3236
3237 raw_mov_b_rrm_indexed(d,baser,index,factor);
3238
3239 unlock2(d);
3240 unlock2(baser);
3241 unlock2(index);
3242 }
3243 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3244
3245
3246 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3247 {
3248 CLOBBER_MOV;
3249 baser=readreg(baser,4);
3250 index=readreg(index,4);
3251 s=readreg(s,4);
3252
3253 Dif (baser==s || index==s)
3254 abort();
3255
3256
3257 raw_mov_l_mrr_indexed(baser,index,factor,s);
3258 unlock2(s);
3259 unlock2(baser);
3260 unlock2(index);
3261 }
3262 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3263
3264 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3265 {
3266 CLOBBER_MOV;
3267 baser=readreg(baser,4);
3268 index=readreg(index,4);
3269 s=readreg(s,2);
3270
3271 raw_mov_w_mrr_indexed(baser,index,factor,s);
3272 unlock2(s);
3273 unlock2(baser);
3274 unlock2(index);
3275 }
3276 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3277
3278 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3279 {
3280 CLOBBER_MOV;
3281 s=readreg(s,1);
3282 baser=readreg(baser,4);
3283 index=readreg(index,4);
3284
3285 raw_mov_b_mrr_indexed(baser,index,factor,s);
3286 unlock2(s);
3287 unlock2(baser);
3288 unlock2(index);
3289 }
3290 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3291
3292
3293 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3294 {
3295 int basereg=baser;
3296 int indexreg=index;
3297
3298 CLOBBER_MOV;
3299 s=readreg(s,4);
3300 baser=readreg_offset(baser,4);
3301 index=readreg_offset(index,4);
3302
3303 base+=get_offset(basereg);
3304 base+=factor*get_offset(indexreg);
3305
3306 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3307 unlock2(s);
3308 unlock2(baser);
3309 unlock2(index);
3310 }
3311 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3312
3313 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3314 {
3315 int basereg=baser;
3316 int indexreg=index;
3317
3318 CLOBBER_MOV;
3319 s=readreg(s,2);
3320 baser=readreg_offset(baser,4);
3321 index=readreg_offset(index,4);
3322
3323 base+=get_offset(basereg);
3324 base+=factor*get_offset(indexreg);
3325
3326 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3327 unlock2(s);
3328 unlock2(baser);
3329 unlock2(index);
3330 }
3331 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3332
3333 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3334 {
3335 int basereg=baser;
3336 int indexreg=index;
3337
3338 CLOBBER_MOV;
3339 s=readreg(s,1);
3340 baser=readreg_offset(baser,4);
3341 index=readreg_offset(index,4);
3342
3343 base+=get_offset(basereg);
3344 base+=factor*get_offset(indexreg);
3345
3346 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3347 unlock2(s);
3348 unlock2(baser);
3349 unlock2(index);
3350 }
3351 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3352
3353
3354
3355 /* Read a long from base+baser+factor*index */
3356 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3357 {
3358 int basereg=baser;
3359 int indexreg=index;
3360
3361 CLOBBER_MOV;
3362 baser=readreg_offset(baser,4);
3363 index=readreg_offset(index,4);
3364 base+=get_offset(basereg);
3365 base+=factor*get_offset(indexreg);
3366 d=writereg(d,4);
3367 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3368 unlock2(d);
3369 unlock2(baser);
3370 unlock2(index);
3371 }
3372 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3373
3374
3375 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3376 {
3377 int basereg=baser;
3378 int indexreg=index;
3379
3380 CLOBBER_MOV;
3381 remove_offset(d,-1);
3382 baser=readreg_offset(baser,4);
3383 index=readreg_offset(index,4);
3384 base+=get_offset(basereg);
3385 base+=factor*get_offset(indexreg);
3386 d=writereg(d,2);
3387 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3388 unlock2(d);
3389 unlock2(baser);
3390 unlock2(index);
3391 }
3392 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3393
3394
3395 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3396 {
3397 int basereg=baser;
3398 int indexreg=index;
3399
3400 CLOBBER_MOV;
3401 remove_offset(d,-1);
3402 baser=readreg_offset(baser,4);
3403 index=readreg_offset(index,4);
3404 base+=get_offset(basereg);
3405 base+=factor*get_offset(indexreg);
3406 d=writereg(d,1);
3407 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3408 unlock2(d);
3409 unlock2(baser);
3410 unlock2(index);
3411 }
3412 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3413
3414 /* Read a long from base+factor*index */
3415 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3416 {
3417 int indexreg=index;
3418
3419 if (isconst(index)) {
3420 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3421 return;
3422 }
3423
3424 CLOBBER_MOV;
3425 index=readreg_offset(index,4);
3426 base+=get_offset(indexreg)*factor;
3427 d=writereg(d,4);
3428
3429 raw_mov_l_rm_indexed(d,base,index,factor);
3430 unlock2(index);
3431 unlock2(d);
3432 }
3433 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3434
3435
3436 /* read the long at the address contained in s+offset and store in d */
3437 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3438 {
3439 if (isconst(s)) {
3440 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3441 return;
3442 }
3443 CLOBBER_MOV;
3444 s=readreg(s,4);
3445 d=writereg(d,4);
3446
3447 raw_mov_l_rR(d,s,offset);
3448 unlock2(d);
3449 unlock2(s);
3450 }
3451 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3452
3453 /* read the word at the address contained in s+offset and store in d */
3454 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3455 {
3456 if (isconst(s)) {
3457 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3458 return;
3459 }
3460 CLOBBER_MOV;
3461 s=readreg(s,4);
3462 d=writereg(d,2);
3463
3464 raw_mov_w_rR(d,s,offset);
3465 unlock2(d);
3466 unlock2(s);
3467 }
3468 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3469
3470 /* read the word at the address contained in s+offset and store in d */
3471 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3472 {
3473 if (isconst(s)) {
3474 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3475 return;
3476 }
3477 CLOBBER_MOV;
3478 s=readreg(s,4);
3479 d=writereg(d,1);
3480
3481 raw_mov_b_rR(d,s,offset);
3482 unlock2(d);
3483 unlock2(s);
3484 }
3485 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3486
3487 /* read the long at the address contained in s+offset and store in d */
3488 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3489 {
3490 int sreg=s;
3491 if (isconst(s)) {
3492 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3493 return;
3494 }
3495 CLOBBER_MOV;
3496 s=readreg_offset(s,4);
3497 offset+=get_offset(sreg);
3498 d=writereg(d,4);
3499
3500 raw_mov_l_brR(d,s,offset);
3501 unlock2(d);
3502 unlock2(s);
3503 }
3504 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3505
3506 /* read the word at the address contained in s+offset and store in d */
3507 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3508 {
3509 int sreg=s;
3510 if (isconst(s)) {
3511 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3512 return;
3513 }
3514 CLOBBER_MOV;
3515 remove_offset(d,-1);
3516 s=readreg_offset(s,4);
3517 offset+=get_offset(sreg);
3518 d=writereg(d,2);
3519
3520 raw_mov_w_brR(d,s,offset);
3521 unlock2(d);
3522 unlock2(s);
3523 }
3524 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3525
3526 /* read the word at the address contained in s+offset and store in d */
3527 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3528 {
3529 int sreg=s;
3530 if (isconst(s)) {
3531 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3532 return;
3533 }
3534 CLOBBER_MOV;
3535 remove_offset(d,-1);
3536 s=readreg_offset(s,4);
3537 offset+=get_offset(sreg);
3538 d=writereg(d,1);
3539
3540 raw_mov_b_brR(d,s,offset);
3541 unlock2(d);
3542 unlock2(s);
3543 }
3544 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3545
3546 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3547 {
3548 int dreg=d;
3549 if (isconst(d)) {
3550 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3551 return;
3552 }
3553
3554 CLOBBER_MOV;
3555 d=readreg_offset(d,4);
3556 offset+=get_offset(dreg);
3557 raw_mov_l_Ri(d,i,offset);
3558 unlock2(d);
3559 }
3560 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3561
3562 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3563 {
3564 int dreg=d;
3565 if (isconst(d)) {
3566 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3567 return;
3568 }
3569
3570 CLOBBER_MOV;
3571 d=readreg_offset(d,4);
3572 offset+=get_offset(dreg);
3573 raw_mov_w_Ri(d,i,offset);
3574 unlock2(d);
3575 }
3576 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3577
3578 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3579 {
3580 int dreg=d;
3581 if (isconst(d)) {
3582 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3583 return;
3584 }
3585
3586 CLOBBER_MOV;
3587 d=readreg_offset(d,4);
3588 offset+=get_offset(dreg);
3589 raw_mov_b_Ri(d,i,offset);
3590 unlock2(d);
3591 }
3592 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3593
3594 /* Warning! OFFSET is byte sized only! */
3595 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3596 {
3597 if (isconst(d)) {
3598 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3599 return;
3600 }
3601 if (isconst(s)) {
3602 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3603 return;
3604 }
3605
3606 CLOBBER_MOV;
3607 s=readreg(s,4);
3608 d=readreg(d,4);
3609
3610 raw_mov_l_Rr(d,s,offset);
3611 unlock2(d);
3612 unlock2(s);
3613 }
3614 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3615
3616 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3617 {
3618 if (isconst(d)) {
3619 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3620 return;
3621 }
3622 if (isconst(s)) {
3623 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3624 return;
3625 }
3626
3627 CLOBBER_MOV;
3628 s=readreg(s,2);
3629 d=readreg(d,4);
3630 raw_mov_w_Rr(d,s,offset);
3631 unlock2(d);
3632 unlock2(s);
3633 }
3634 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3635
3636 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3637 {
3638 if (isconst(d)) {
3639 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3640 return;
3641 }
3642 if (isconst(s)) {
3643 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3644 return;
3645 }
3646
3647 CLOBBER_MOV;
3648 s=readreg(s,1);
3649 d=readreg(d,4);
3650 raw_mov_b_Rr(d,s,offset);
3651 unlock2(d);
3652 unlock2(s);
3653 }
3654 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3655
3656 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3657 {
3658 if (isconst(s)) {
3659 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3660 return;
3661 }
3662 #if USE_OFFSET
3663 if (d==s) {
3664 add_offset(d,offset);
3665 return;
3666 }
3667 #endif
3668 CLOBBER_LEA;
3669 s=readreg(s,4);
3670 d=writereg(d,4);
3671 raw_lea_l_brr(d,s,offset);
3672 unlock2(d);
3673 unlock2(s);
3674 }
3675 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3676
3677 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3678 {
3679 if (!offset) {
3680 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3681 return;
3682 }
3683 CLOBBER_LEA;
3684 s=readreg(s,4);
3685 index=readreg(index,4);
3686 d=writereg(d,4);
3687
3688 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3689 unlock2(d);
3690 unlock2(index);
3691 unlock2(s);
3692 }
3693 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3694
3695 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3696 {
3697 CLOBBER_LEA;
3698 s=readreg(s,4);
3699 index=readreg(index,4);
3700 d=writereg(d,4);
3701
3702 raw_lea_l_rr_indexed(d,s,index,factor);
3703 unlock2(d);
3704 unlock2(index);
3705 unlock2(s);
3706 }
3707 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3708
3709 /* write d to the long at the address contained in s+offset */
3710 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3711 {
3712 int dreg=d;
3713 if (isconst(d)) {
3714 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3715 return;
3716 }
3717
3718 CLOBBER_MOV;
3719 s=readreg(s,4);
3720 d=readreg_offset(d,4);
3721 offset+=get_offset(dreg);
3722
3723 raw_mov_l_bRr(d,s,offset);
3724 unlock2(d);
3725 unlock2(s);
3726 }
3727 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3728
3729 /* write the word at the address contained in s+offset and store in d */
3730 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3731 {
3732 int dreg=d;
3733
3734 if (isconst(d)) {
3735 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3736 return;
3737 }
3738
3739 CLOBBER_MOV;
3740 s=readreg(s,2);
3741 d=readreg_offset(d,4);
3742 offset+=get_offset(dreg);
3743 raw_mov_w_bRr(d,s,offset);
3744 unlock2(d);
3745 unlock2(s);
3746 }
3747 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3748
3749 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3750 {
3751 int dreg=d;
3752 if (isconst(d)) {
3753 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3754 return;
3755 }
3756
3757 CLOBBER_MOV;
3758 s=readreg(s,1);
3759 d=readreg_offset(d,4);
3760 offset+=get_offset(dreg);
3761 raw_mov_b_bRr(d,s,offset);
3762 unlock2(d);
3763 unlock2(s);
3764 }
3765 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3766
3767 MIDFUNC(1,bswap_32,(RW4 r))
3768 {
3769 int reg=r;
3770
3771 if (isconst(r)) {
3772 uae_u32 oldv=live.state[r].val;
3773 live.state[r].val=reverse32(oldv);
3774 return;
3775 }
3776
3777 CLOBBER_SW32;
3778 r=rmw(r,4,4);
3779 raw_bswap_32(r);
3780 unlock2(r);
3781 }
3782 MENDFUNC(1,bswap_32,(RW4 r))
3783
3784 MIDFUNC(1,bswap_16,(RW2 r))
3785 {
3786 if (isconst(r)) {
3787 uae_u32 oldv=live.state[r].val;
3788 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3789 (oldv&0xffff0000);
3790 return;
3791 }
3792
3793 CLOBBER_SW16;
3794 r=rmw(r,2,2);
3795
3796 raw_bswap_16(r);
3797 unlock2(r);
3798 }
3799 MENDFUNC(1,bswap_16,(RW2 r))
3800
3801
3802
3803 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3804 {
3805 int olds;
3806
3807 if (d==s) { /* How pointless! */
3808 return;
3809 }
3810 if (isconst(s)) {
3811 COMPCALL(mov_l_ri)(d,live.state[s].val);
3812 return;
3813 }
3814 olds=s;
3815 disassociate(d);
3816 s=readreg_offset(s,4);
3817 live.state[d].realreg=s;
3818 live.state[d].realind=live.nat[s].nholds;
3819 live.state[d].val=live.state[olds].val;
3820 live.state[d].validsize=4;
3821 live.state[d].dirtysize=4;
3822 set_status(d,DIRTY);
3823
3824 live.nat[s].holds[live.nat[s].nholds]=d;
3825 live.nat[s].nholds++;
3826 log_clobberreg(d);
3827 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3828 d,s,live.state[d].realind,live.nat[s].nholds); */
3829 unlock2(s);
3830 }
3831 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3832
3833 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3834 {
3835 if (isconst(s)) {
3836 COMPCALL(mov_l_mi)(d,live.state[s].val);
3837 return;
3838 }
3839 CLOBBER_MOV;
3840 s=readreg(s,4);
3841
3842 raw_mov_l_mr(d,s);
3843 unlock2(s);
3844 }
3845 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3846
3847
3848 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3849 {
3850 if (isconst(s)) {
3851 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3852 return;
3853 }
3854 CLOBBER_MOV;
3855 s=readreg(s,2);
3856
3857 raw_mov_w_mr(d,s);
3858 unlock2(s);
3859 }
3860 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3861
3862 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3863 {
3864 CLOBBER_MOV;
3865 d=writereg(d,2);
3866
3867 raw_mov_w_rm(d,s);
3868 unlock2(d);
3869 }
3870 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3871
3872 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3873 {
3874 if (isconst(s)) {
3875 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3876 return;
3877 }
3878
3879 CLOBBER_MOV;
3880 s=readreg(s,1);
3881
3882 raw_mov_b_mr(d,s);
3883 unlock2(s);
3884 }
3885 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3886
3887 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3888 {
3889 CLOBBER_MOV;
3890 d=writereg(d,1);
3891
3892 raw_mov_b_rm(d,s);
3893 unlock2(d);
3894 }
3895 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3896
3897 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3898 {
3899 set_const(d,s);
3900 return;
3901 }
3902 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3903
3904 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3905 {
3906 CLOBBER_MOV;
3907 d=writereg(d,2);
3908
3909 raw_mov_w_ri(d,s);
3910 unlock2(d);
3911 }
3912 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3913
3914 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3915 {
3916 CLOBBER_MOV;
3917 d=writereg(d,1);
3918
3919 raw_mov_b_ri(d,s);
3920 unlock2(d);
3921 }
3922 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3923
3924
3925 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3926 {
3927 CLOBBER_ADD;
3928 raw_add_l_mi(d,s) ;
3929 }
3930 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3931
3932 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3933 {
3934 CLOBBER_ADD;
3935 raw_add_w_mi(d,s) ;
3936 }
3937 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3938
3939 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3940 {
3941 CLOBBER_ADD;
3942 raw_add_b_mi(d,s) ;
3943 }
3944 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3945
3946
3947 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3948 {
3949 CLOBBER_TEST;
3950 d=readreg(d,4);
3951
3952 raw_test_l_ri(d,i);
3953 unlock2(d);
3954 }
3955 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3956
3957 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3958 {
3959 CLOBBER_TEST;
3960 d=readreg(d,4);
3961 s=readreg(s,4);
3962
3963 raw_test_l_rr(d,s);;
3964 unlock2(d);
3965 unlock2(s);
3966 }
3967 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3968
3969 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3970 {
3971 CLOBBER_TEST;
3972 d=readreg(d,2);
3973 s=readreg(s,2);
3974
3975 raw_test_w_rr(d,s);
3976 unlock2(d);
3977 unlock2(s);
3978 }
3979 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3980
3981 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3982 {
3983 CLOBBER_TEST;
3984 d=readreg(d,1);
3985 s=readreg(s,1);
3986
3987 raw_test_b_rr(d,s);
3988 unlock2(d);
3989 unlock2(s);
3990 }
3991 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3992
3993
3994 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3995 {
3996 if (isconst(d) && !needflags) {
3997 live.state[d].val &= i;
3998 return;
3999 }
4000
4001 CLOBBER_AND;
4002 d=rmw(d,4,4);
4003
4004 raw_and_l_ri(d,i);
4005 unlock2(d);
4006 }
4007 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4008
4009 MIDFUNC(2,and_l,(RW4 d, R4 s))
4010 {
4011 CLOBBER_AND;
4012 s=readreg(s,4);
4013 d=rmw(d,4,4);
4014
4015 raw_and_l(d,s);
4016 unlock2(d);
4017 unlock2(s);
4018 }
4019 MENDFUNC(2,and_l,(RW4 d, R4 s))
4020
4021 MIDFUNC(2,and_w,(RW2 d, R2 s))
4022 {
4023 CLOBBER_AND;
4024 s=readreg(s,2);
4025 d=rmw(d,2,2);
4026
4027 raw_and_w(d,s);
4028 unlock2(d);
4029 unlock2(s);
4030 }
4031 MENDFUNC(2,and_w,(RW2 d, R2 s))
4032
4033 MIDFUNC(2,and_b,(RW1 d, R1 s))
4034 {
4035 CLOBBER_AND;
4036 s=readreg(s,1);
4037 d=rmw(d,1,1);
4038
4039 raw_and_b(d,s);
4040 unlock2(d);
4041 unlock2(s);
4042 }
4043 MENDFUNC(2,and_b,(RW1 d, R1 s))
4044
4045 // gb-- used for making an fpcr value in compemu_fpp.cpp
4046 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4047 {
4048 CLOBBER_OR;
4049 d=rmw(d,4,4);
4050
4051 raw_or_l_rm(d,s);
4052 unlock2(d);
4053 }
4054 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4055
4056 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4057 {
4058 if (isconst(d) && !needflags) {
4059 live.state[d].val|=i;
4060 return;
4061 }
4062 CLOBBER_OR;
4063 d=rmw(d,4,4);
4064
4065 raw_or_l_ri(d,i);
4066 unlock2(d);
4067 }
4068 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4069
4070 MIDFUNC(2,or_l,(RW4 d, R4 s))
4071 {
4072 if (isconst(d) && isconst(s) && !needflags) {
4073 live.state[d].val|=live.state[s].val;
4074 return;
4075 }
4076 CLOBBER_OR;
4077 s=readreg(s,4);
4078 d=rmw(d,4,4);
4079
4080 raw_or_l(d,s);
4081 unlock2(d);
4082 unlock2(s);
4083 }
4084 MENDFUNC(2,or_l,(RW4 d, R4 s))
4085
4086 MIDFUNC(2,or_w,(RW2 d, R2 s))
4087 {
4088 CLOBBER_OR;
4089 s=readreg(s,2);
4090 d=rmw(d,2,2);
4091
4092 raw_or_w(d,s);
4093 unlock2(d);
4094 unlock2(s);
4095 }
4096 MENDFUNC(2,or_w,(RW2 d, R2 s))
4097
4098 MIDFUNC(2,or_b,(RW1 d, R1 s))
4099 {
4100 CLOBBER_OR;
4101 s=readreg(s,1);
4102 d=rmw(d,1,1);
4103
4104 raw_or_b(d,s);
4105 unlock2(d);
4106 unlock2(s);
4107 }
4108 MENDFUNC(2,or_b,(RW1 d, R1 s))
4109
4110 MIDFUNC(2,adc_l,(RW4 d, R4 s))
4111 {
4112 CLOBBER_ADC;
4113 s=readreg(s,4);
4114 d=rmw(d,4,4);
4115
4116 raw_adc_l(d,s);
4117
4118 unlock2(d);
4119 unlock2(s);
4120 }
4121 MENDFUNC(2,adc_l,(RW4 d, R4 s))
4122
4123 MIDFUNC(2,adc_w,(RW2 d, R2 s))
4124 {
4125 CLOBBER_ADC;
4126 s=readreg(s,2);
4127 d=rmw(d,2,2);
4128
4129 raw_adc_w(d,s);
4130 unlock2(d);
4131 unlock2(s);
4132 }
4133 MENDFUNC(2,adc_w,(RW2 d, R2 s))
4134
4135 MIDFUNC(2,adc_b,(RW1 d, R1 s))
4136 {
4137 CLOBBER_ADC;
4138 s=readreg(s,1);
4139 d=rmw(d,1,1);
4140
4141 raw_adc_b(d,s);
4142 unlock2(d);
4143 unlock2(s);
4144 }
4145 MENDFUNC(2,adc_b,(RW1 d, R1 s))
4146
4147 MIDFUNC(2,add_l,(RW4 d, R4 s))
4148 {
4149 if (isconst(s)) {
4150 COMPCALL(add_l_ri)(d,live.state[s].val);
4151 return;
4152 }
4153
4154 CLOBBER_ADD;
4155 s=readreg(s,4);
4156 d=rmw(d,4,4);
4157
4158 raw_add_l(d,s);
4159
4160 unlock2(d);
4161 unlock2(s);
4162 }
4163 MENDFUNC(2,add_l,(RW4 d, R4 s))
4164
4165 MIDFUNC(2,add_w,(RW2 d, R2 s))
4166 {
4167 if (isconst(s)) {
4168 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4169 return;
4170 }
4171
4172 CLOBBER_ADD;
4173 s=readreg(s,2);
4174 d=rmw(d,2,2);
4175
4176 raw_add_w(d,s);
4177 unlock2(d);
4178 unlock2(s);
4179 }
4180 MENDFUNC(2,add_w,(RW2 d, R2 s))
4181
4182 MIDFUNC(2,add_b,(RW1 d, R1 s))
4183 {
4184 if (isconst(s)) {
4185 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4186 return;
4187 }
4188
4189 CLOBBER_ADD;
4190 s=readreg(s,1);
4191 d=rmw(d,1,1);
4192
4193 raw_add_b(d,s);
4194 unlock2(d);
4195 unlock2(s);
4196 }
4197 MENDFUNC(2,add_b,(RW1 d, R1 s))
4198
4199 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4200 {
4201 if (!i && !needflags)
4202 return;
4203 if (isconst(d) && !needflags) {
4204 live.state[d].val-=i;
4205 return;
4206 }
4207 #if USE_OFFSET
4208 if (!needflags) {
4209 add_offset(d,-i);
4210 return;
4211 }
4212 #endif
4213
4214 CLOBBER_SUB;
4215 d=rmw(d,4,4);
4216
4217 raw_sub_l_ri(d,i);
4218 unlock2(d);
4219 }
4220 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4221
4222 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4223 {
4224 if (!i && !needflags)
4225 return;
4226
4227 CLOBBER_SUB;
4228 d=rmw(d,2,2);
4229
4230 raw_sub_w_ri(d,i);
4231 unlock2(d);
4232 }
4233 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4234
4235 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4236 {
4237 if (!i && !needflags)
4238 return;
4239
4240 CLOBBER_SUB;
4241 d=rmw(d,1,1);
4242
4243 raw_sub_b_ri(d,i);
4244
4245 unlock2(d);
4246 }
4247 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4248
4249 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4250 {
4251 if (!i && !needflags)
4252 return;
4253 if (isconst(d) && !needflags) {
4254 live.state[d].val+=i;
4255 return;
4256 }
4257 #if USE_OFFSET
4258 if (!needflags) {
4259 add_offset(d,i);
4260 return;
4261 }
4262 #endif
4263 CLOBBER_ADD;
4264 d=rmw(d,4,4);
4265 raw_add_l_ri(d,i);
4266 unlock2(d);
4267 }
4268 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4269
4270 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4271 {
4272 if (!i && !needflags)
4273 return;
4274
4275 CLOBBER_ADD;
4276 d=rmw(d,2,2);
4277
4278 raw_add_w_ri(d,i);
4279 unlock2(d);
4280 }
4281 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4282
4283 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4284 {
4285 if (!i && !needflags)
4286 return;
4287
4288 CLOBBER_ADD;
4289 d=rmw(d,1,1);
4290
4291 raw_add_b_ri(d,i);
4292
4293 unlock2(d);
4294 }
4295 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4296
4297 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4298 {
4299 CLOBBER_SBB;
4300 s=readreg(s,4);
4301 d=rmw(d,4,4);
4302
4303 raw_sbb_l(d,s);
4304 unlock2(d);
4305 unlock2(s);
4306 }
4307 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4308
4309 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4310 {
4311 CLOBBER_SBB;
4312 s=readreg(s,2);
4313 d=rmw(d,2,2);
4314
4315 raw_sbb_w(d,s);
4316 unlock2(d);
4317 unlock2(s);
4318 }
4319 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4320
4321 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4322 {
4323 CLOBBER_SBB;
4324 s=readreg(s,1);
4325 d=rmw(d,1,1);
4326
4327 raw_sbb_b(d,s);
4328 unlock2(d);
4329 unlock2(s);
4330 }
4331 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4332
4333 MIDFUNC(2,sub_l,(RW4 d, R4 s))
4334 {
4335 if (isconst(s)) {
4336 COMPCALL(sub_l_ri)(d,live.state[s].val);
4337 return;
4338 }
4339
4340 CLOBBER_SUB;
4341 s=readreg(s,4);
4342 d=rmw(d,4,4);
4343
4344 raw_sub_l(d,s);
4345 unlock2(d);
4346 unlock2(s);
4347 }
4348 MENDFUNC(2,sub_l,(RW4 d, R4 s))
4349
4350 MIDFUNC(2,sub_w,(RW2 d, R2 s))
4351 {
4352 if (isconst(s)) {
4353 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4354 return;
4355 }
4356
4357 CLOBBER_SUB;
4358 s=readreg(s,2);
4359 d=rmw(d,2,2);
4360
4361 raw_sub_w(d,s);
4362 unlock2(d);
4363 unlock2(s);
4364 }
4365 MENDFUNC(2,sub_w,(RW2 d, R2 s))
4366
4367 MIDFUNC(2,sub_b,(RW1 d, R1 s))
4368 {
4369 if (isconst(s)) {
4370 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4371 return;
4372 }
4373
4374 CLOBBER_SUB;
4375 s=readreg(s,1);
4376 d=rmw(d,1,1);
4377
4378 raw_sub_b(d,s);
4379 unlock2(d);
4380 unlock2(s);
4381 }
4382 MENDFUNC(2,sub_b,(RW1 d, R1 s))
4383
4384 MIDFUNC(2,cmp_l,(R4 d, R4 s))
4385 {
4386 CLOBBER_CMP;
4387 s=readreg(s,4);
4388 d=readreg(d,4);
4389
4390 raw_cmp_l(d,s);
4391 unlock2(d);
4392 unlock2(s);
4393 }
4394 MENDFUNC(2,cmp_l,(R4 d, R4 s))
4395
4396 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4397 {
4398 CLOBBER_CMP;
4399 r=readreg(r,4);
4400
4401 raw_cmp_l_ri(r,i);
4402 unlock2(r);
4403 }
4404 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4405
4406 MIDFUNC(2,cmp_w,(R2 d, R2 s))
4407 {
4408 CLOBBER_CMP;
4409 s=readreg(s,2);
4410 d=readreg(d,2);
4411
4412 raw_cmp_w(d,s);
4413 unlock2(d);
4414 unlock2(s);
4415 }
4416 MENDFUNC(2,cmp_w,(R2 d, R2 s))
4417
4418 MIDFUNC(2,cmp_b,(R1 d, R1 s))
4419 {
4420 CLOBBER_CMP;
4421 s=readreg(s,1);
4422 d=readreg(d,1);
4423
4424 raw_cmp_b(d,s);
4425 unlock2(d);
4426 unlock2(s);
4427 }
4428 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4429
4430
4431 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4432 {
4433 CLOBBER_XOR;
4434 s=readreg(s,4);
4435 d=rmw(d,4,4);
4436
4437 raw_xor_l(d,s);
4438 unlock2(d);
4439 unlock2(s);
4440 }
4441 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4442
4443 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4444 {
4445 CLOBBER_XOR;
4446 s=readreg(s,2);
4447 d=rmw(d,2,2);
4448
4449 raw_xor_w(d,s);
4450 unlock2(d);
4451 unlock2(s);
4452 }
4453 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4454
4455 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4456 {
4457 CLOBBER_XOR;
4458 s=readreg(s,1);
4459 d=rmw(d,1,1);
4460
4461 raw_xor_b(d,s);
4462 unlock2(d);
4463 unlock2(s);
4464 }
4465 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4466
4467 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4468 {
4469 clobber_flags();
4470 remove_all_offsets();
4471 if (osize==4) {
4472 if (out1!=in1 && out1!=r) {
4473 COMPCALL(forget_about)(out1);
4474 }
4475 }
4476 else {
4477 tomem_c(out1);
4478 }
4479
4480 in1=readreg_specific(in1,isize,REG_PAR1);
4481 r=readreg(r,4);
4482 prepare_for_call_1(); /* This should ensure that there won't be
4483 any need for swapping nregs in prepare_for_call_2
4484 */
4485 #if USE_NORMAL_CALLING_CONVENTION
4486 raw_push_l_r(in1);
4487 #endif
4488 unlock2(in1);
4489 unlock2(r);
4490
4491 prepare_for_call_2();
4492 raw_call_r(r);
4493
4494 #if USE_NORMAL_CALLING_CONVENTION
4495 raw_inc_sp(4);
4496 #endif
4497
4498
4499 live.nat[REG_RESULT].holds[0]=out1;
4500 live.nat[REG_RESULT].nholds=1;
4501 live.nat[REG_RESULT].touched=touchcnt++;
4502
4503 live.state[out1].realreg=REG_RESULT;
4504 live.state[out1].realind=0;
4505 live.state[out1].val=0;
4506 live.state[out1].validsize=osize;
4507 live.state[out1].dirtysize=osize;
4508 set_status(out1,DIRTY);
4509 }
4510 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4511
4512 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4513 {
4514 clobber_flags();
4515 remove_all_offsets();
4516 in1=readreg_specific(in1,isize1,REG_PAR1);
4517 in2=readreg_specific(in2,isize2,REG_PAR2);
4518 r=readreg(r,4);
4519 prepare_for_call_1(); /* This should ensure that there won't be
4520 any need for swapping nregs in prepare_for_call_2
4521 */
4522 #if USE_NORMAL_CALLING_CONVENTION
4523 raw_push_l_r(in2);
4524 raw_push_l_r(in1);
4525 #endif
4526 unlock2(r);
4527 unlock2(in1);
4528 unlock2(in2);
4529 prepare_for_call_2();
4530 raw_call_r(r);
4531 #if USE_NORMAL_CALLING_CONVENTION
4532 raw_inc_sp(8);
4533 #endif
4534 }
4535 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4536
4537 /* forget_about() takes a mid-layer register */
4538 MIDFUNC(1,forget_about,(W4 r))
4539 {
4540 if (isinreg(r))
4541 disassociate(r);
4542 live.state[r].val=0;
4543 set_status(r,UNDEF);
4544 }
4545 MENDFUNC(1,forget_about,(W4 r))
4546
4547 MIDFUNC(0,nop,(void))
4548 {
4549 raw_nop();
4550 }
4551 MENDFUNC(0,nop,(void))
4552
4553
4554 MIDFUNC(1,f_forget_about,(FW r))
4555 {
4556 if (f_isinreg(r))
4557 f_disassociate(r);
4558 live.fate[r].status=UNDEF;
4559 }
4560 MENDFUNC(1,f_forget_about,(FW r))
4561
4562 MIDFUNC(1,fmov_pi,(FW r))
4563 {
4564 r=f_writereg(r);
4565 raw_fmov_pi(r);
4566 f_unlock(r);
4567 }
4568 MENDFUNC(1,fmov_pi,(FW r))
4569
4570 MIDFUNC(1,fmov_log10_2,(FW r))
4571 {
4572 r=f_writereg(r);
4573 raw_fmov_log10_2(r);
4574 f_unlock(r);
4575 }
4576 MENDFUNC(1,fmov_log10_2,(FW r))
4577
4578 MIDFUNC(1,fmov_log2_e,(FW r))
4579 {
4580 r=f_writereg(r);
4581 raw_fmov_log2_e(r);
4582 f_unlock(r);
4583 }
4584 MENDFUNC(1,fmov_log2_e,(FW r))
4585
4586 MIDFUNC(1,fmov_loge_2,(FW r))
4587 {
4588 r=f_writereg(r);
4589 raw_fmov_loge_2(r);
4590 f_unlock(r);
4591 }
4592 MENDFUNC(1,fmov_loge_2,(FW r))
4593
4594 MIDFUNC(1,fmov_1,(FW r))
4595 {
4596 r=f_writereg(r);
4597 raw_fmov_1(r);
4598 f_unlock(r);
4599 }
4600 MENDFUNC(1,fmov_1,(FW r))
4601
4602 MIDFUNC(1,fmov_0,(FW r))
4603 {
4604 r=f_writereg(r);
4605 raw_fmov_0(r);
4606 f_unlock(r);
4607 }
4608 MENDFUNC(1,fmov_0,(FW r))
4609
4610 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4611 {
4612 r=f_writereg(r);
4613 raw_fmov_rm(r,m);
4614 f_unlock(r);
4615 }
4616 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4617
4618 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4619 {
4620 r=f_writereg(r);
4621 raw_fmovi_rm(r,m);
4622 f_unlock(r);
4623 }
4624 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4625
4626 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4627 {
4628 r=f_readreg(r);
4629 raw_fmovi_mr(m,r);
4630 f_unlock(r);
4631 }
4632 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4633
4634 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4635 {
4636 r=f_writereg(r);
4637 raw_fmovs_rm(r,m);
4638 f_unlock(r);
4639 }
4640 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4641
4642 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4643 {
4644 r=f_readreg(r);
4645 raw_fmovs_mr(m,r);
4646 f_unlock(r);
4647 }
4648 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4649
4650 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4651 {
4652 r=f_readreg(r);
4653 raw_fmov_ext_mr(m,r);
4654 f_unlock(r);
4655 }
4656 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4657
4658 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4659 {
4660 r=f_readreg(r);
4661 raw_fmov_mr(m,r);
4662 f_unlock(r);
4663 }
4664 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4665
4666 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4667 {
4668 r=f_writereg(r);
4669 raw_fmov_ext_rm(r,m);
4670 f_unlock(r);
4671 }
4672 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4673
4674 MIDFUNC(2,fmov_rr,(FW d, FR s))
4675 {
4676 if (d==s) { /* How pointless! */
4677 return;
4678 }
4679 #if USE_F_ALIAS
4680 f_disassociate(d);
4681 s=f_readreg(s);
4682 live.fate[d].realreg=s;
4683 live.fate[d].realind=live.fat[s].nholds;
4684 live.fate[d].status=DIRTY;
4685 live.fat[s].holds[live.fat[s].nholds]=d;
4686 live.fat[s].nholds++;
4687 f_unlock(s);
4688 #else
4689 s=f_readreg(s);
4690 d=f_writereg(d);
4691 raw_fmov_rr(d,s);
4692 f_unlock(s);
4693 f_unlock(d);
4694 #endif
4695 }
4696 MENDFUNC(2,fmov_rr,(FW d, FR s))
4697
4698 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4699 {
4700 index=readreg(index,4);
4701
4702 raw_fldcw_m_indexed(index,base);
4703 unlock2(index);
4704 }
4705 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4706
4707 MIDFUNC(1,ftst_r,(FR r))
4708 {
4709 r=f_readreg(r);
4710 raw_ftst_r(r);
4711 f_unlock(r);
4712 }
4713 MENDFUNC(1,ftst_r,(FR r))
4714
4715 MIDFUNC(0,dont_care_fflags,(void))
4716 {
4717 f_disassociate(FP_RESULT);
4718 }
4719 MENDFUNC(0,dont_care_fflags,(void))
4720
4721 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4722 {
4723 s=f_readreg(s);
4724 d=f_writereg(d);
4725 raw_fsqrt_rr(d,s);
4726 f_unlock(s);
4727 f_unlock(d);
4728 }
4729 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4730
4731 MIDFUNC(2,fabs_rr,(FW d, FR s))
4732 {
4733 s=f_readreg(s);
4734 d=f_writereg(d);
4735 raw_fabs_rr(d,s);
4736 f_unlock(s);
4737 f_unlock(d);
4738 }
4739 MENDFUNC(2,fabs_rr,(FW d, FR s))
4740
4741 MIDFUNC(2,fsin_rr,(FW d, FR s))
4742 {
4743 s=f_readreg(s);
4744 d=f_writereg(d);
4745 raw_fsin_rr(d,s);
4746 f_unlock(s);
4747 f_unlock(d);
4748 }
4749 MENDFUNC(2,fsin_rr,(FW d, FR s))
4750
4751 MIDFUNC(2,fcos_rr,(FW d, FR s))
4752 {
4753 s=f_readreg(s);
4754 d=f_writereg(d);
4755 raw_fcos_rr(d,s);
4756 f_unlock(s);
4757 f_unlock(d);
4758 }
4759 MENDFUNC(2,fcos_rr,(FW d, FR s))
4760
4761 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4762 {
4763 s=f_readreg(s);
4764 d=f_writereg(d);
4765 raw_ftwotox_rr(d,s);
4766 f_unlock(s);
4767 f_unlock(d);
4768 }
4769 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4770
4771 MIDFUNC(2,fetox_rr,(FW d, FR s))
4772 {
4773 s=f_readreg(s);
4774 d=f_writereg(d);
4775 raw_fetox_rr(d,s);
4776 f_unlock(s);
4777 f_unlock(d);
4778 }
4779 MENDFUNC(2,fetox_rr,(FW d, FR s))
4780
4781 MIDFUNC(2,frndint_rr,(FW d, FR s))
4782 {
4783 s=f_readreg(s);
4784 d=f_writereg(d);
4785 raw_frndint_rr(d,s);
4786 f_unlock(s);
4787 f_unlock(d);
4788 }
4789 MENDFUNC(2,frndint_rr,(FW d, FR s))
4790
4791 MIDFUNC(2,flog2_rr,(FW d, FR s))
4792 {
4793 s=f_readreg(s);
4794 d=f_writereg(d);
4795 raw_flog2_rr(d,s);
4796 f_unlock(s);
4797 f_unlock(d);
4798 }
4799 MENDFUNC(2,flog2_rr,(FW d, FR s))
4800
4801 MIDFUNC(2,fneg_rr,(FW d, FR s))
4802 {
4803 s=f_readreg(s);
4804 d=f_writereg(d);
4805 raw_fneg_rr(d,s);
4806 f_unlock(s);
4807 f_unlock(d);
4808 }
4809 MENDFUNC(2,fneg_rr,(FW d, FR s))
4810
4811 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4812 {
4813 s=f_readreg(s);
4814 d=f_rmw(d);
4815 raw_fadd_rr(d,s);
4816 f_unlock(s);
4817 f_unlock(d);
4818 }
4819 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4820
4821 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4822 {
4823 s=f_readreg(s);
4824 d=f_rmw(d);
4825 raw_fsub_rr(d,s);
4826 f_unlock(s);
4827 f_unlock(d);
4828 }
4829 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4830
4831 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4832 {
4833 d=f_readreg(d);
4834 s=f_readreg(s);
4835 raw_fcmp_rr(d,s);
4836 f_unlock(s);
4837 f_unlock(d);
4838 }
4839 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4840
4841 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4842 {
4843 s=f_readreg(s);
4844 d=f_rmw(d);
4845 raw_fdiv_rr(d,s);
4846 f_unlock(s);
4847 f_unlock(d);
4848 }
4849 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4850
4851 MIDFUNC(2,frem_rr,(FRW d, FR s))
4852 {
4853 s=f_readreg(s);
4854 d=f_rmw(d);
4855 raw_frem_rr(d,s);
4856 f_unlock(s);
4857 f_unlock(d);
4858 }
4859 MENDFUNC(2,frem_rr,(FRW d, FR s))
4860
4861 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4862 {
4863 s=f_readreg(s);
4864 d=f_rmw(d);
4865 raw_frem1_rr(d,s);
4866 f_unlock(s);
4867 f_unlock(d);
4868 }
4869 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4870
4871 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4872 {
4873 s=f_readreg(s);
4874 d=f_rmw(d);
4875 raw_fmul_rr(d,s);
4876 f_unlock(s);
4877 f_unlock(d);
4878 }
4879 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4880
4881 /********************************************************************
4882 * Support functions exposed to gencomp. CREATE time *
4883 ********************************************************************/
4884
4885 void set_zero(int r, int tmp)
4886 {
4887 if (setzflg_uses_bsf)
4888 bsf_l_rr(r,r);
4889 else
4890 simulate_bsf(tmp,r);
4891 }
4892
4893 int kill_rodent(int r)
4894 {
4895 return KILLTHERAT &&
4896 have_rat_stall &&
4897 (live.state[r].status==INMEM ||
4898 live.state[r].status==CLEAN ||
4899 live.state[r].status==ISCONST ||
4900 live.state[r].dirtysize==4);
4901 }
4902
4903 uae_u32 get_const(int r)
4904 {
4905 Dif (!isconst(r)) {
4906 write_log("Register %d should be constant, but isn't\n",r);
4907 abort();
4908 }
4909 return live.state[r].val;
4910 }
4911
4912 void sync_m68k_pc(void)
4913 {
4914 if (m68k_pc_offset) {
4915 add_l_ri(PC_P,m68k_pc_offset);
4916 comp_pc_p+=m68k_pc_offset;
4917 m68k_pc_offset=0;
4918 }
4919 }
4920
4921 /********************************************************************
4922 * Scratch registers management *
4923 ********************************************************************/
4924
4925 struct scratch_t {
4926 uae_u32 regs[VREGS];
4927 fpu_register fregs[VFREGS];
4928 };
4929
4930 static scratch_t scratch;
4931
4932 /********************************************************************
4933 * Support functions exposed to newcpu *
4934 ********************************************************************/
4935
4936 static inline const char *str_on_off(bool b)
4937 {
4938 return b ? "on" : "off";
4939 }
4940
4941 void compiler_init(void)
4942 {
4943 static bool initialized = false;
4944 if (initialized)
4945 return;
4946
4947 #if JIT_DEBUG
4948 // JIT debug mode ?
4949 JITDebug = PrefsFindBool("jitdebug");
4950 #endif
4951 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4952
4953 #ifdef USE_JIT_FPU
4954 // Use JIT compiler for FPU instructions ?
4955 avoid_fpu = !PrefsFindBool("jitfpu");
4956 #else
4957 // JIT FPU is always disabled
4958 avoid_fpu = true;
4959 #endif
4960 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4961
4962 // Get size of the translation cache (in KB)
4963 cache_size = PrefsFindInt32("jitcachesize");
4964 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4965
4966 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4967 raw_init_cpu();
4968 setzflg_uses_bsf = target_check_bsf();
4969 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4970 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4971 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4972
4973 // Translation cache flush mechanism
4974 lazy_flush = PrefsFindBool("jitlazyflush");
4975 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4976 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4977
4978 // Compiler features
4979 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4980 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4981 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4982 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4983 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4984
4985 // Build compiler tables
4986 build_comp();
4987
4988 initialized = true;
4989
4990 #if PROFILE_UNTRANSLATED_INSNS
4991 write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4992 #endif
4993
4994 #if PROFILE_COMPILE_TIME
4995 write_log("<JIT compiler> : gather statistics on translation time\n");
4996 emul_start_time = clock();
4997 #endif
4998 }
4999
5000 void compiler_exit(void)
5001 {
5002 #if PROFILE_COMPILE_TIME
5003 emul_end_time = clock();
5004 #endif
5005
5006 // Deallocate translation cache
5007 if (compiled_code) {
5008 vm_release(compiled_code, cache_size * 1024);
5009 compiled_code = 0;
5010 }
5011
5012 // Deallocate popallspace
5013 if (popallspace) {
5014 vm_release(popallspace, POPALLSPACE_SIZE);
5015 popallspace = 0;
5016 }
5017
5018 #if PROFILE_COMPILE_TIME
5019 write_log("### Compile Block statistics\n");
5020 write_log("Number of calls to compile_block : %d\n", compile_count);
5021 uae_u32 emul_time = emul_end_time - emul_start_time;
5022 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5023 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5024 100.0*double(compile_time)/double(emul_time));
5025 write_log("\n");
5026 #endif
5027
5028 #if PROFILE_UNTRANSLATED_INSNS
5029 uae_u64 untranslated_count = 0;
5030 for (int i = 0; i < 65536; i++) {
5031 opcode_nums[i] = i;
5032 untranslated_count += raw_cputbl_count[i];
5033 }
5034 write_log("Sorting out untranslated instructions count...\n");
5035 qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5036 write_log("\nRank Opc Count Name\n");
5037 for (int i = 0; i < untranslated_top_ten; i++) {
5038 uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5039 struct instr *dp;
5040 struct mnemolookup *lookup;
5041 if (!count)
5042 break;
5043 dp = table68k + opcode_nums[i];
5044 for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5045 ;
5046 write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5047 }
5048 #endif
5049 }
5050
5051 bool compiler_use_jit(void)
5052 {
5053 // Check for the "jit" prefs item
5054 if (!PrefsFindBool("jit"))
5055 return false;
5056
5057 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5058 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5059 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5060 return false;
5061 }
5062
5063 // FIXME: there are currently problems with JIT compilation and anything below a 68040
5064 if (CPUType < 4) {
5065 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5066 return false;
5067 }
5068
5069 return true;
5070 }
5071
5072 void init_comp(void)
5073 {
5074 int i;
5075 uae_s8* cb=can_byte;
5076 uae_s8* cw=can_word;
5077 uae_s8* au=always_used;
5078
5079 for (i=0;i<VREGS;i++) {
5080 live.state[i].realreg=-1;
5081 live.state[i].needflush=NF_SCRATCH;
5082 live.state[i].val=0;
5083 set_status(i,UNDEF);
5084 }
5085
5086 for (i=0;i<VFREGS;i++) {
5087 live.fate[i].status=UNDEF;
5088 live.fate[i].realreg=-1;
5089 live.fate[i].needflush=NF_SCRATCH;
5090 }
5091
5092 for (i=0;i<VREGS;i++) {
5093 if (i<16) { /* First 16 registers map to 68k registers */
5094 live.state[i].mem=((uae_u32*)&regs)+i;
5095 live.state[i].needflush=NF_TOMEM;
5096 set_status(i,INMEM);
5097 }
5098 else
5099 live.state[i].mem=scratch.regs+i;
5100 }
5101 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5102 live.state[PC_P].needflush=NF_TOMEM;
5103 set_const(PC_P,(uintptr)comp_pc_p);
5104
5105 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5106 live.state[FLAGX].needflush=NF_TOMEM;
5107 set_status(FLAGX,INMEM);
5108
5109 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5110 live.state[FLAGTMP].needflush=NF_TOMEM;
5111 set_status(FLAGTMP,INMEM);
5112
5113 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5114 set_status(NEXT_HANDLER,UNDEF);
5115
5116 for (i=0;i<VFREGS;i++) {
5117 if (i<8) { /* First 8 registers map to 68k FPU registers */
5118 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5119 live.fate[i].needflush=NF_TOMEM;
5120 live.fate[i].status=INMEM;
5121 }
5122 else if (i==FP_RESULT) {
5123 live.fate[i].mem=(uae_u32*)(&fpu.result);
5124 live.fate[i].needflush=NF_TOMEM;
5125 live.fate[i].status=INMEM;
5126 }
5127 else
5128 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5129 }
5130
5131
5132 for (i=0;i<N_REGS;i++) {
5133 live.nat[i].touched=0;
5134 live.nat[i].nholds=0;
5135 live.nat[i].locked=0;
5136 if (*cb==i) {
5137 live.nat[i].canbyte=1; cb++;
5138 } else live.nat[i].canbyte=0;
5139 if (*cw==i) {
5140 live.nat[i].canword=1; cw++;
5141 } else live.nat[i].canword=0;
5142 if (*au==i) {
5143 live.nat[i].locked=1; au++;
5144 }
5145 }
5146
5147 for (i=0;i<N_FREGS;i++) {
5148 live.fat[i].touched=0;
5149 live.fat[i].nholds=0;
5150 live.fat[i].locked=0;
5151 }
5152
5153 touchcnt=1;
5154 m68k_pc_offset=0;
5155 live.flags_in_flags=TRASH;
5156 live.flags_on_stack=VALID;
5157 live.flags_are_important=1;
5158
5159 raw_fp_init();
5160 }
5161
5162 /* Only do this if you really mean it! The next call should be to init!*/
5163 void flush(int save_regs)
5164 {
5165 int fi,i;
5166
5167 log_flush();
5168 flush_flags(); /* low level */
5169 sync_m68k_pc(); /* mid level */
5170
5171 if (save_regs) {
5172 for (i=0;i<VFREGS;i++) {
5173 if (live.fate[i].needflush==NF_SCRATCH ||
5174 live.fate[i].status==CLEAN) {
5175 f_disassociate(i);
5176 }
5177 }
5178 for (i=0;i<VREGS;i++) {
5179 if (live.state[i].needflush==NF_TOMEM) {
5180 switch(live.state[i].status) {
5181 case INMEM:
5182 if (live.state[i].val) {
5183 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5184 log_vwrite(i);
5185 live.state[i].val=0;
5186 }
5187 break;
5188 case CLEAN:
5189 case DIRTY:
5190 remove_offset(i,-1); tomem(i); break;
5191 case ISCONST:
5192 if (i!=PC_P)
5193 writeback_const(i);
5194 break;
5195 default: break;
5196 }
5197 Dif (live.state[i].val && i!=PC_P) {
5198 write_log("Register %d still has val %x\n",
5199 i,live.state[i].val);
5200 }
5201 }
5202 }
5203 for (i=0;i<VFREGS;i++) {
5204 if (live.fate[i].needflush==NF_TOMEM &&
5205 live.fate[i].status==DIRTY) {
5206 f_evict(i);
5207 }
5208 }
5209 raw_fp_cleanup_drop();
5210 }
5211 if (needflags) {
5212 write_log("Warning! flush with needflags=1!\n");
5213 }
5214 }
5215
5216 static void flush_keepflags(void)
5217 {
5218 int fi,i;
5219
5220 for (i=0;i<VFREGS;i++) {
5221 if (live.fate[i].needflush==NF_SCRATCH ||
5222 live.fate[i].status==CLEAN) {
5223 f_disassociate(i);
5224 }
5225 }
5226 for (i=0;i<VREGS;i++) {
5227 if (live.state[i].needflush==NF_TOMEM) {
5228 switch(live.state[i].status) {
5229 case INMEM:
5230 /* Can't adjust the offset here --- that needs "add" */
5231 break;
5232 case CLEAN:
5233 case DIRTY:
5234 remove_offset(i,-1); tomem(i); break;
5235 case ISCONST:
5236 if (i!=PC_P)
5237 writeback_const(i);
5238 break;
5239 default: break;
5240 }
5241 }
5242 }
5243 for (i=0;i<VFREGS;i++) {
5244 if (live.fate[i].needflush==NF_TOMEM &&
5245 live.fate[i].status==DIRTY) {
5246 f_evict(i);
5247 }
5248 }
5249 raw_fp_cleanup_drop();
5250 }
5251
5252 void freescratch(void)
5253 {
5254 int i;
5255 for (i=0;i<N_REGS;i++)
5256 if (live.nat[i].locked && i!=4)
5257 write_log("Warning! %d is locked\n",i);
5258
5259 for (i=0;i<VREGS;i++)
5260 if (live.state[i].needflush==NF_SCRATCH) {
5261 forget_about(i);
5262 }
5263
5264 for (i=0;i<VFREGS;i++)
5265 if (live.fate[i].needflush==NF_SCRATCH) {
5266 f_forget_about(i);
5267 }
5268 }
5269
5270 /********************************************************************
5271 * Support functions, internal *
5272 ********************************************************************/
5273
5274
5275 static void align_target(uae_u32 a)
5276 {
5277 if (!a)
5278 return;
5279
5280 if (tune_nop_fillers)
5281 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5282 else {
5283 /* Fill with NOPs --- makes debugging with gdb easier */
5284 while ((uintptr)target&(a-1))
5285 *target++=0x90;
5286 }
5287 }
5288
5289 static __inline__ int isinrom(uintptr addr)
5290 {
5291 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5292 }
5293
5294 static void flush_all(void)
5295 {
5296 int i;
5297
5298 log_flush();
5299 for (i=0;i<VREGS;i++)
5300 if (live.state[i].status==DIRTY) {
5301 if (!call_saved[live.state[i].realreg]) {
5302 tomem(i);
5303 }
5304 }
5305 for (i=0;i<VFREGS;i++)
5306 if (f_isinreg(i))
5307 f_evict(i);
5308 raw_fp_cleanup_drop();
5309 }
5310
5311 /* Make sure all registers that will get clobbered by a call are
5312 save and sound in memory */
5313 static void prepare_for_call_1(void)
5314 {
5315 flush_all(); /* If there are registers that don't get clobbered,
5316 * we should be a bit more selective here */
5317 }
5318
5319 /* We will call a C routine in a moment. That will clobber all registers,
5320 so we need to disassociate everything */
5321 static void prepare_for_call_2(void)
5322 {
5323 int i;
5324 for (i=0;i<N_REGS;i++)
5325 if (!call_saved[i] && live.nat[i].nholds>0)
5326 free_nreg(i);
5327
5328 for (i=0;i<N_FREGS;i++)
5329 if (live.fat[i].nholds>0)
5330 f_free_nreg(i);
5331
5332 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5333 flags at the very start of the call_r
5334 functions! */
5335 }
5336
5337 /********************************************************************
5338 * Memory access and related functions, CREATE time *
5339 ********************************************************************/
5340
5341 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5342 {
5343 next_pc_p=not_taken;
5344 taken_pc_p=taken;
5345 branch_cc=cond;
5346 }
5347
5348
5349 static uae_u32 get_handler_address(uae_u32 addr)
5350 {
5351 uae_u32 cl=cacheline(addr);
5352 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5353 return (uintptr)&(bi->direct_handler_to_use);
5354 }
5355
5356 static uae_u32 get_handler(uae_u32 addr)
5357 {
5358 uae_u32 cl=cacheline(addr);
5359 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5360 return (uintptr)bi->direct_handler_to_use;
5361 }
5362
5363 static void load_handler(int reg, uae_u32 addr)
5364 {
5365 mov_l_rm(reg,get_handler_address(addr));
5366 }
5367
5368 /* This version assumes that it is writing *real* memory, and *will* fail
5369 * if that assumption is wrong! No branches, no second chances, just
5370 * straight go-for-it attitude */
5371
5372 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5373 {
5374 int f=tmp;
5375
5376 if (clobber)
5377 f=source;
5378
5379 #if SIZEOF_VOID_P == 8
5380 if (!ThirtyThreeBitAddressing)
5381 sign_extend_32_rr(address, address);
5382 #endif
5383
5384 switch(size) {
5385 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5386 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5387 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5388 }
5389 forget_about(tmp);
5390 forget_about(f);
5391 }
5392
5393 void writebyte(int address, int source, int tmp)
5394 {
5395 writemem_real(address,source,1,tmp,0);
5396 }
5397
5398 static __inline__ void writeword_general(int address, int source, int tmp,
5399 int clobber)
5400 {
5401 writemem_real(address,source,2,tmp,clobber);
5402 }
5403
5404 void writeword_clobber(int address, int source, int tmp)
5405 {
5406 writeword_general(address,source,tmp,1);
5407 }
5408
5409 void writeword(int address, int source, int tmp)
5410 {
5411 writeword_general(address,source,tmp,0);
5412 }
5413
5414 static __inline__ void writelong_general(int address, int source, int tmp,
5415 int clobber)
5416 {
5417 writemem_real(address,source,4,tmp,clobber);
5418 }
5419
5420 void writelong_clobber(int address, int source, int tmp)
5421 {
5422 writelong_general(address,source,tmp,1);
5423 }
5424
5425 void writelong(int address, int source, int tmp)
5426 {
5427 writelong_general(address,source,tmp,0);
5428 }
5429
5430
5431
5432 /* This version assumes that it is reading *real* memory, and *will* fail
5433 * if that assumption is wrong! No branches, no second chances, just
5434 * straight go-for-it attitude */
5435
5436 static void readmem_real(int address, int dest, int size, int tmp)
5437 {
5438 int f=tmp;
5439
5440 if (size==4 && address!=dest)
5441 f=dest;
5442
5443 #if SIZEOF_VOID_P == 8
5444 if (!ThirtyThreeBitAddressing)
5445 sign_extend_32_rr(address, address);
5446 #endif
5447
5448 switch(size) {
5449 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5450 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5451 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5452 }
5453 forget_about(tmp);
5454 }
5455
5456 void readbyte(int address, int dest, int tmp)
5457 {
5458 readmem_real(address,dest,1,tmp);
5459 }
5460
5461 void readword(int address, int dest, int tmp)
5462 {
5463 readmem_real(address,dest,2,tmp);
5464 }
5465
5466 void readlong(int address, int dest, int tmp)
5467 {
5468 readmem_real(address,dest,4,tmp);
5469 }
5470
5471 void get_n_addr(int address, int dest, int tmp)
5472 {
5473 // a is the register containing the virtual address
5474 // after the offset had been fetched
5475 int a=tmp;
5476
5477 // f is the register that will contain the offset
5478 int f=tmp;
5479
5480 // a == f == tmp if (address == dest)
5481 if (address!=dest) {
5482 a=address;
5483 f=dest;
5484 }
5485
5486 #if REAL_ADDRESSING
5487 mov_l_rr(dest, address);
5488 #elif DIRECT_ADDRESSING
5489 lea_l_brr(dest,address,MEMBaseDiff);
5490 #endif
5491 forget_about(tmp);
5492 }
5493
5494 void get_n_addr_jmp(int address, int dest, int tmp)
5495 {
5496 /* For this, we need to get the same address as the rest of UAE
5497 would --- otherwise we end up translating everything twice */
5498 get_n_addr(address,dest,tmp);
5499 }
5500
5501
5502 /* base is a register, but dp is an actual value.
5503 target is a register, as is tmp */
5504 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5505 {
5506 int reg = (dp >> 12) & 15;
5507 int regd_shift=(dp >> 9) & 3;
5508
5509 if (dp & 0x100) {
5510 int ignorebase=(dp&0x80);
5511 int ignorereg=(dp&0x40);
5512 int addbase=0;
5513 int outer=0;
5514
5515 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5516 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5517
5518 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5519 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5520
5521 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5522 if (!ignorereg) {
5523 if ((dp & 0x800) == 0)
5524 sign_extend_16_rr(target,reg);
5525 else
5526 mov_l_rr(target,reg);
5527 shll_l_ri(target,regd_shift);
5528 }
5529 else
5530 mov_l_ri(target,0);
5531
5532 /* target is now regd */
5533 if (!ignorebase)
5534 add_l(target,base);
5535 add_l_ri(target,addbase);
5536 if (dp&0x03) readlong(target,target,tmp);
5537 } else { /* do the getlong first, then add regd */
5538 if (!ignorebase) {
5539 mov_l_rr(target,base);
5540 add_l_ri(target,addbase);
5541 }
5542 else
5543 mov_l_ri(target,addbase);
5544 if (dp&0x03) readlong(target,target,tmp);
5545
5546 if (!ignorereg) {
5547 if ((dp & 0x800) == 0)
5548 sign_extend_16_rr(tmp,reg);
5549 else
5550 mov_l_rr(tmp,reg);
5551 shll_l_ri(tmp,regd_shift);
5552 /* tmp is now regd */
5553 add_l(target,tmp);
5554 }
5555 }
5556 add_l_ri(target,outer);
5557 }
5558 else { /* 68000 version */
5559 if ((dp & 0x800) == 0) { /* Sign extend */
5560 sign_extend_16_rr(target,reg);
5561 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5562 }
5563 else {
5564 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5565 }
5566 }
5567 forget_about(tmp);
5568 }
5569
5570
5571
5572
5573
5574 void set_cache_state(int enabled)
5575 {
5576 if (enabled!=letit)
5577 flush_icache_hard(77);
5578 letit=enabled;
5579 }
5580
5581 int get_cache_state(void)
5582 {
5583 return letit;
5584 }
5585
5586 uae_u32 get_jitted_size(void)
5587 {
5588 if (compiled_code)
5589 return current_compile_p-compiled_code;
5590 return 0;
5591 }
5592
5593 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5594 const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5595
5596 static uint8 *do_alloc_code(uint32 size, int depth)
5597 {
5598 #if defined(__linux__) && 0
5599 /*
5600 This is a really awful hack that is known to work on Linux at
5601 least.
5602
5603 The trick here is to make sure the allocated cache is nearby
5604 code segment, and more precisely in the positive half of a
5605 32-bit address space. i.e. addr < 0x80000000. Actually, it
5606 turned out that a 32-bit binary run on AMD64 yields a cache
5607 allocated around 0xa0000000, thus causing some troubles when
5608 translating addresses from m68k to x86.
5609 */
5610 static uint8 * code_base = NULL;
5611 if (code_base == NULL) {
5612 uintptr page_size = getpagesize();
5613 uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5614 if (boundaries < page_size)
5615 boundaries = page_size;
5616 code_base = (uint8 *)sbrk(0);
5617 for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5618 if (vm_acquire_fixed(code_base, size) == 0) {
5619 uint8 *code = code_base;
5620 code_base += size;
5621 return code;
5622 }
5623 code_base += boundaries;
5624 }
5625 return NULL;
5626 }
5627
5628 if (vm_acquire_fixed(code_base, size) == 0) {
5629 uint8 *code = code_base;
5630 code_base += size;
5631 return code;
5632 }
5633
5634 if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5635 return NULL;
5636
5637 return do_alloc_code(size, depth + 1);
5638 #else
5639 uint8 *code = (uint8 *)vm_acquire(size);
5640 return code == VM_MAP_FAILED ? NULL : code;
5641 #endif
5642 }
5643
5644 static inline uint8 *alloc_code(uint32 size)
5645 {
5646 return do_alloc_code(size, 0);
5647 }
5648
5649 void alloc_cache(void)
5650 {
5651 if (compiled_code) {
5652 flush_icache_hard(6);
5653 vm_release(compiled_code, cache_size * 1024);
5654 compiled_code = 0;
5655 }
5656
5657 if (cache_size == 0)
5658 return;
5659
5660 while (!compiled_code && cache_size) {
5661 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5662 compiled_code = 0;
5663 cache_size /= 2;
5664 }
5665 }
5666 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5667
5668 if (compiled_code) {
5669 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5670 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5671 current_compile_p = compiled_code;
5672 current_cache_size = 0;
5673 }
5674 }
5675
5676
5677
5678 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5679
5680 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5681 {
5682 uae_u32 k1 = 0;
5683 uae_u32 k2 = 0;
5684
5685 #if USE_CHECKSUM_INFO
5686 checksum_info *csi = bi->csi;
5687 Dif(!csi) abort();
5688 while (csi) {
5689 uae_s32 len = csi->length;
5690 uintptr tmp = (uintptr)csi->start_p;
5691 #else
5692 uae_s32 len = bi->len;
5693 uintptr tmp = (uintptr)bi->min_pcp;
5694 #endif
5695 uae_u32*pos;
5696
5697 len += (tmp & 3);
5698 tmp &= ~((uintptr)3);
5699 pos = (uae_u32 *)tmp;
5700
5701 if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5702 while (len > 0) {
5703 k1 += *pos;
5704 k2 ^= *pos;
5705 pos++;
5706 len -= 4;
5707 }
5708 }
5709
5710 #if USE_CHECKSUM_INFO
5711 csi = csi->next;
5712 }
5713 #endif
5714
5715 *c1 = k1;
5716 *c2 = k2;
5717 }
5718
5719 #if 0
5720 static void show_checksum(CSI_TYPE* csi)
5721 {
5722 uae_u32 k1=0;
5723 uae_u32 k2=0;
5724 uae_s32 len=CSI_LENGTH(csi);
5725 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5726 uae_u32* pos;
5727
5728 len+=(tmp&3);
5729 tmp&=(~3);
5730 pos=(uae_u32*)tmp;
5731
5732 if (len<0 || len>MAX_CHECKSUM_LEN) {
5733 return;
5734 }
5735 else {
5736 while (len>0) {
5737 write_log("%08x ",*pos);
5738 pos++;
5739 len-=4;
5740 }
5741 write_log(" bla\n");
5742 }
5743 }
5744 #endif
5745
5746
5747 int check_for_cache_miss(void)
5748 {
5749 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5750
5751 if (bi) {
5752 int cl=cacheline(regs.pc_p);
5753 if (bi!=cache_tags[cl+1].bi) {
5754 raise_in_cl_list(bi);
5755 return 1;
5756 }
5757 }
5758 return 0;
5759 }
5760
5761
5762 static void recompile_block(void)
5763 {
5764 /* An existing block's countdown code has expired. We need to make
5765 sure that execute_normal doesn't refuse to recompile due to a
5766 perceived cache miss... */
5767 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5768
5769 Dif (!bi)
5770 abort();
5771 raise_in_cl_list(bi);
5772 execute_normal();
5773 return;
5774 }
5775 static void cache_miss(void)
5776 {
5777 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5778 uae_u32 cl=cacheline(regs.pc_p);
5779 blockinfo* bi2=get_blockinfo(cl);
5780
5781 if (!bi) {
5782 execute_normal(); /* Compile this block now */
5783 return;
5784 }
5785 Dif (!bi2 || bi==bi2) {
5786 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5787 abort();
5788 }
5789 raise_in_cl_list(bi);
5790 return;
5791 }
5792
5793 static int called_check_checksum(blockinfo* bi);
5794
5795 static inline int block_check_checksum(blockinfo* bi)
5796 {
5797 uae_u32 c1,c2;
5798 bool isgood;
5799
5800 if (bi->status!=BI_NEED_CHECK)
5801 return 1; /* This block is in a checked state */
5802
5803 checksum_count++;
5804
5805 if (bi->c1 || bi->c2)
5806 calc_checksum(bi,&c1,&c2);
5807 else {
5808 c1=c2=1; /* Make sure it doesn't match */
5809 }
5810
5811 isgood=(c1==bi->c1 && c2==bi->c2);
5812
5813 if (isgood) {
5814 /* This block is still OK. So we reactivate. Of course, that
5815 means we have to move it into the needs-to-be-flushed list */
5816 bi->handler_to_use=bi->handler;
5817 set_dhtu(bi,bi->direct_handler);
5818 bi->status=BI_CHECKING;
5819 isgood=called_check_checksum(bi);
5820 }
5821 if (isgood) {
5822 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5823 c1,c2,bi->c1,bi->c2);*/
5824 remove_from_list(bi);
5825 add_to_active(bi);
5826 raise_in_cl_list(bi);
5827 bi->status=BI_ACTIVE;
5828 }
5829 else {
5830 /* This block actually changed. We need to invalidate it,
5831 and set it up to be recompiled */
5832 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5833 c1,c2,bi->c1,bi->c2); */
5834 invalidate_block(bi);
5835 raise_in_cl_list(bi);
5836 }
5837 return isgood;
5838 }
5839
5840 static int called_check_checksum(blockinfo* bi)
5841 {
5842 dependency* x=bi->deplist;
5843 int isgood=1;
5844 int i;
5845
5846 for (i=0;i<2 && isgood;i++) {
5847 if (bi->dep[i].jmp_off) {
5848 isgood=block_check_checksum(bi->dep[i].target);
5849 }
5850 }
5851 return isgood;
5852 }
5853
5854 static void check_checksum(void)
5855 {
5856 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5857 uae_u32 cl=cacheline(regs.pc_p);
5858 blockinfo* bi2=get_blockinfo(cl);
5859
5860 /* These are not the droids you are looking for... */
5861 if (!bi) {
5862 /* Whoever is the primary target is in a dormant state, but
5863 calling it was accidental, and we should just compile this
5864 new block */
5865 execute_normal();
5866 return;
5867 }
5868 if (bi!=bi2) {
5869 /* The block was hit accidentally, but it does exist. Cache miss */
5870 cache_miss();
5871 return;
5872 }
5873
5874 if (!block_check_checksum(bi))
5875 execute_normal();
5876 }
5877
5878 static __inline__ void match_states(blockinfo* bi)
5879 {
5880 int i;
5881 smallstate* s=&(bi->env);
5882
5883 if (bi->status==BI_NEED_CHECK) {
5884 block_check_checksum(bi);
5885 }
5886 if (bi->status==BI_ACTIVE ||
5887 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5888 block makes (about not using
5889 certain vregs) */
5890 for (i=0;i<16;i++) {
5891 if (s->virt[i]==L_UNNEEDED) {
5892 // write_log("unneeded reg %d at %p\n",i,target);
5893 COMPCALL(forget_about)(i); // FIXME
5894 }
5895 }
5896 }
5897 flush(1);
5898
5899 /* And now deal with the *demands* the block makes */
5900 for (i=0;i<N_REGS;i++) {
5901 int v=s->nat[i];
5902 if (v>=0) {
5903 // printf("Loading reg %d into %d at %p\n",v,i,target);
5904 readreg_specific(v,4,i);
5905 // do_load_reg(i,v);
5906 // setlock(i);
5907 }
5908 }
5909 for (i=0;i<N_REGS;i++) {
5910 int v=s->nat[i];
5911 if (v>=0) {
5912 unlock2(i);
5913 }
5914 }
5915 }
5916
5917 static __inline__ void create_popalls(void)
5918 {
5919 int i,r;
5920
5921 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5922 write_log("FATAL: Could not allocate popallspace!\n");
5923 abort();
5924 }
5925 vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5926
5927 current_compile_p=popallspace;
5928 set_target(current_compile_p);
5929 #if USE_PUSH_POP
5930 /* If we can't use gcc inline assembly, we need to pop some
5931 registers before jumping back to the various get-out routines.
5932 This generates the code for it.
5933 */
5934 align_target(align_jumps);
5935 popall_do_nothing=get_target();
5936 for (i=0;i<N_REGS;i++) {
5937 if (need_to_preserve[i])
5938 raw_pop_l_r(i);
5939 }
5940 raw_jmp((uintptr)do_nothing);
5941
5942 align_target(align_jumps);
5943 popall_execute_normal=get_target();
5944 for (i=0;i<N_REGS;i++) {
5945 if (need_to_preserve[i])
5946 raw_pop_l_r(i);
5947 }
5948 raw_jmp((uintptr)execute_normal);
5949
5950 align_target(align_jumps);
5951 popall_cache_miss=get_target();
5952 for (i=0;i<N_REGS;i++) {
5953 if (need_to_preserve[i])
5954 raw_pop_l_r(i);
5955 }
5956 raw_jmp((uintptr)cache_miss);
5957
5958 align_target(align_jumps);
5959 popall_recompile_block=get_target();
5960 for (i=0;i<N_REGS;i++) {
5961 if (need_to_preserve[i])
5962 raw_pop_l_r(i);
5963 }
5964 raw_jmp((uintptr)recompile_block);
5965
5966 align_target(align_jumps);
5967 popall_exec_nostats=get_target();
5968 for (i=0;i<N_REGS;i++) {
5969 if (need_to_preserve[i])
5970 raw_pop_l_r(i);
5971 }
5972 raw_jmp((uintptr)exec_nostats);
5973
5974 align_target(align_jumps);
5975 popall_check_checksum=get_target();
5976 for (i=0;i<N_REGS;i++) {
5977 if (need_to_preserve[i])
5978 raw_pop_l_r(i);
5979 }
5980 raw_jmp((uintptr)check_checksum);
5981
5982 align_target(align_jumps);
5983 current_compile_p=get_target();
5984 #else
5985 popall_exec_nostats=(void *)exec_nostats;
5986 popall_execute_normal=(void *)execute_normal;
5987 popall_cache_miss=(void *)cache_miss;
5988 popall_recompile_block=(void *)recompile_block;
5989 popall_do_nothing=(void *)do_nothing;
5990 popall_check_checksum=(void *)check_checksum;
5991 #endif
5992
5993 /* And now, the code to do the matching pushes and then jump
5994 into a handler routine */
5995 pushall_call_handler=get_target();
5996 #if USE_PUSH_POP
5997 for (i=N_REGS;i--;) {
5998 if (need_to_preserve[i])
5999 raw_push_l_r(i);
6000 }
6001 #endif
6002 r=REG_PC_TMP;
6003 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6004 raw_and_l_ri(r,TAGMASK);
6005 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6006
6007 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
6008 align_target(align_jumps);
6009 m68k_compile_execute = (void (*)(void))get_target();
6010 for (i=N_REGS;i--;) {
6011 if (need_to_preserve[i])
6012 raw_push_l_r(i);
6013 }
6014 align_target(align_loops);
6015 uae_u32 dispatch_loop = (uintptr)get_target();
6016 r=REG_PC_TMP;
6017 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6018 raw_and_l_ri(r,TAGMASK);
6019 raw_call_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6020 raw_cmp_l_mi((uintptr)&regs.spcflags,0);
6021 raw_jcc_b_oponly(NATIVE_CC_EQ);
6022 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6023 raw_call((uintptr)m68k_do_specialties);
6024 raw_test_l_rr(REG_RESULT,REG_RESULT);
6025 raw_jcc_b_oponly(NATIVE_CC_EQ);
6026 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6027 raw_cmp_b_mi((uintptr)&quit_program,0);
6028 raw_jcc_b_oponly(NATIVE_CC_EQ);
6029 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6030 for (i=0;i<N_REGS;i++) {
6031 if (need_to_preserve[i])
6032 raw_pop_l_r(i);
6033 }
6034 raw_ret();
6035 #endif
6036
6037 // no need to further write into popallspace
6038 vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6039 }
6040
6041 static __inline__ void reset_lists(void)
6042 {
6043 int i;
6044
6045 for (i=0;i<MAX_HOLD_BI;i++)
6046 hold_bi[i]=NULL;
6047 active=NULL;
6048 dormant=NULL;
6049 }
6050
6051 static void prepare_block(blockinfo* bi)
6052 {
6053 int i;
6054
6055 set_target(current_compile_p);
6056 align_target(align_jumps);
6057 bi->direct_pen=(cpuop_func *)get_target();
6058 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6059 raw_mov_l_mr((uintptr)&regs.pc_p,0);
6060 raw_jmp((uintptr)popall_execute_normal);
6061
6062 align_target(align_jumps);
6063 bi->direct_pcc=(cpuop_func *)get_target();
6064 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6065 raw_mov_l_mr((uintptr)&regs.pc_p,0);
6066 raw_jmp((uintptr)popall_check_checksum);
6067 current_compile_p=get_target();
6068
6069 bi->deplist=NULL;
6070 for (i=0;i<2;i++) {
6071 bi->dep[i].prev_p=NULL;
6072 bi->dep[i].next=NULL;
6073 }
6074 bi->env=default_ss;
6075 bi->status=BI_INVALID;
6076 bi->havestate=0;
6077 //bi->env=empty_ss;
6078 }
6079
6080 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6081 static inline void reset_compop(int opcode)
6082 {
6083 compfunctbl[opcode] = NULL;
6084 nfcompfunctbl[opcode] = NULL;
6085 }
6086
6087 static int read_opcode(const char *p)
6088 {
6089 int opcode = 0;
6090 for (int i = 0; i < 4; i++) {
6091 int op = p[i];
6092 switch (op) {
6093 case '0': case '1': case '2': case '3': case '4':
6094 case '5': case '6': case '7': case '8': case '9':
6095 opcode = (opcode << 4) | (op - '0');
6096 break;
6097 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6098 opcode = (opcode << 4) | ((op - 'a') + 10);
6099 break;
6100 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6101 opcode = (opcode << 4) | ((op - 'A') + 10);
6102 break;
6103 default:
6104 return -1;
6105 }
6106 }
6107 return opcode;
6108 }
6109
6110 static bool merge_blacklist()
6111 {
6112 const char *blacklist = PrefsFindString("jitblacklist");
6113 if (blacklist) {
6114 const char *p = blacklist;
6115 for (;;) {
6116 if (*p == 0)
6117 return true;
6118
6119 int opcode1 = read_opcode(p);
6120 if (opcode1 < 0)
6121 return false;
6122 p += 4;
6123
6124 int opcode2 = opcode1;
6125 if (*p == '-') {
6126 p++;
6127 opcode2 = read_opcode(p);
6128 if (opcode2 < 0)
6129 return false;
6130 p += 4;
6131 }
6132
6133 if (*p == 0 || *p == ';') {
6134 write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6135 for (int opcode = opcode1; opcode <= opcode2; opcode++)
6136 reset_compop(cft_map(opcode));
6137
6138 if (*p++ == ';')
6139 continue;
6140
6141 return true;
6142 }
6143
6144 return false;
6145 }
6146 }
6147 return true;
6148 }
6149
6150 void build_comp(void)
6151 {
6152 int i;
6153 int jumpcount=0;
6154 unsigned long opcode;
6155 struct comptbl* tbl=op_smalltbl_0_comp_ff;
6156 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6157 int count;
6158 int cpu_level = 0; // 68000 (default)
6159 if (CPUType == 4)
6160 cpu_level = 4; // 68040 with FPU
6161 else {
6162 if (FPUType)
6163 cpu_level = 3; // 68020 with FPU
6164 else if (CPUType >= 2)
6165 cpu_level = 2; // 68020
6166 else if (CPUType == 1)
6167 cpu_level = 1;
6168 }
6169 struct cputbl *nfctbl = (
6170 cpu_level == 4 ? op_smalltbl_0_nf
6171 : cpu_level == 3 ? op_smalltbl_1_nf
6172 : cpu_level == 2 ? op_smalltbl_2_nf
6173 : cpu_level == 1 ? op_smalltbl_3_nf
6174 : op_smalltbl_4_nf);
6175
6176 write_log ("<JIT compiler> : building compiler function tables\n");
6177
6178 for (opcode = 0; opcode < 65536; opcode++) {
6179 reset_compop(opcode);
6180 nfcpufunctbl[opcode] = op_illg_1;
6181 prop[opcode].use_flags = 0x1f;
6182 prop[opcode].set_flags = 0x1f;
6183 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6184 }
6185
6186 for (i = 0; tbl[i].opcode < 65536; i++) {
6187 int cflow = table68k[tbl[i].opcode].cflow;
6188 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6189 cflow = fl_const_jump;
6190 else
6191 cflow &= ~fl_const_jump;
6192 prop[cft_map(tbl[i].opcode)].cflow = cflow;
6193
6194 int uses_fpu = tbl[i].specific & 32;
6195 if (uses_fpu && avoid_fpu)
6196 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6197 else
6198 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6199 }
6200
6201 for (i = 0; nftbl[i].opcode < 65536; i++) {
6202 int uses_fpu = tbl[i].specific & 32;
6203 if (uses_fpu && avoid_fpu)
6204 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6205 else
6206 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6207
6208 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6209 }
6210
6211 for (i = 0; nfctbl[i].handler; i++) {
6212 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6213 }
6214
6215 for (opcode = 0; opcode < 65536; opcode++) {
6216 compop_func *f;
6217 compop_func *nff;
6218 cpuop_func *nfcf;
6219 int isaddx,cflow;
6220
6221 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6222 continue;
6223
6224 if (table68k[opcode].handler != -1) {
6225 f = compfunctbl[cft_map(table68k[opcode].handler)];
6226 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6227 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6228 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6229 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6230 prop[cft_map(opcode)].cflow = cflow;
6231 prop[cft_map(opcode)].is_addx = isaddx;
6232 compfunctbl[cft_map(opcode)] = f;
6233 nfcompfunctbl[cft_map(opcode)] = nff;
6234 Dif (nfcf == op_illg_1)
6235 abort();
6236 nfcpufunctbl[cft_map(opcode)] = nfcf;
6237 }
6238 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6239 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6240 }
6241 for (i = 0; nfctbl[i].handler != NULL; i++) {
6242 if (nfctbl[i].specific)
6243 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6244 }
6245
6246 /* Merge in blacklist */
6247 if (!merge_blacklist())
6248 write_log("<JIT compiler> : blacklist merge failure!\n");
6249
6250 count=0;
6251 for (opcode = 0; opcode < 65536; opcode++) {
6252 if (compfunctbl[cft_map(opcode)])
6253 count++;
6254 }
6255 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6256
6257 /* Initialise state */
6258 create_popalls();
6259 alloc_cache();
6260 reset_lists();
6261
6262 for (i=0;i<TAGSIZE;i+=2) {
6263 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6264 cache_tags[i+1].bi=NULL;
6265 }
6266
6267 #if 0
6268 for (i=0;i<N_REGS;i++) {
6269 empty_ss.nat[i].holds=-1;
6270 empty_ss.nat[i].validsize=0;
6271 empty_ss.nat[i].dirtysize=0;
6272 }
6273 #endif
6274 for (i=0;i<VREGS;i++) {
6275 empty_ss.virt[i]=L_NEEDED;
6276 }
6277 for (i=0;i<N_REGS;i++) {
6278 empty_ss.nat[i]=L_UNKNOWN;
6279 }
6280 default_ss=empty_ss;
6281 }
6282
6283
6284 static void flush_icache_none(int n)
6285 {
6286 /* Nothing to do. */
6287 }
6288
6289 static void flush_icache_hard(int n)
6290 {
6291 uae_u32 i;
6292 blockinfo* bi, *dbi;
6293
6294 hard_flush_count++;
6295 #if 0
6296 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6297 n,regs.pc,regs.pc_p,current_cache_size/1024);
6298 current_cache_size = 0;
6299 #endif
6300 bi=active;
6301 while(bi) {
6302 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6303 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6304 dbi=bi; bi=bi->next;
6305 free_blockinfo(dbi);
6306 }
6307 bi=dormant;
6308 while(bi) {
6309 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6310 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6311 dbi=bi; bi=bi->next;
6312 free_blockinfo(dbi);
6313 }
6314
6315 reset_lists();
6316 if (!compiled_code)
6317 return;
6318 current_compile_p=compiled_code;
6319 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6320 }
6321
6322
6323 /* "Soft flushing" --- instead of actually throwing everything away,
6324 we simply mark everything as "needs to be checked".
6325 */
6326
6327 static inline void flush_icache_lazy(int n)
6328 {
6329 uae_u32 i;
6330 blockinfo* bi;
6331 blockinfo* bi2;
6332
6333 soft_flush_count++;
6334 if (!active)
6335 return;
6336
6337 bi=active;
6338 while (bi) {
6339 uae_u32 cl=cacheline(bi->pc_p);
6340 if (bi->status==BI_INVALID ||
6341 bi->status==BI_NEED_RECOMP) {
6342 if (bi==cache_tags[cl+1].bi)
6343 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6344 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6345 set_dhtu(bi,bi->direct_pen);
6346 bi->status=BI_INVALID;
6347 }
6348 else {
6349 if (bi==cache_tags[cl+1].bi)
6350 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6351 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6352 set_dhtu(bi,bi->direct_pcc);
6353 bi->status=BI_NEED_CHECK;
6354 }
6355 bi2=bi;
6356 bi=bi->next;
6357 }
6358 /* bi2 is now the last entry in the active list */
6359 bi2->next=dormant;
6360 if (dormant)
6361 dormant->prev_p=&(bi2->next);
6362
6363 dormant=active;
6364 active->prev_p=&dormant;
6365 active=NULL;
6366 }
6367
6368 void flush_icache_range(uae_u32 start, uae_u32 length)
6369 {
6370 if (!active)
6371 return;
6372
6373 #if LAZY_FLUSH_ICACHE_RANGE
6374 uae_u8 *start_p = get_real_address(start);
6375 blockinfo *bi = active;
6376 while (bi) {
6377 #if USE_CHECKSUM_INFO
6378 bool invalidate = false;
6379 for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6380 invalidate = (((start_p - csi->start_p) < csi->length) ||
6381 ((csi->start_p - start_p) < length));
6382 #else
6383 // Assume system is consistent and would invalidate the right range
6384 const bool invalidate = (bi->pc_p - start_p) < length;
6385 #endif
6386 if (invalidate) {
6387 uae_u32 cl = cacheline(bi->pc_p);
6388 if (bi == cache_tags[cl + 1].bi)
6389 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6390 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6391 set_dhtu(bi, bi->direct_pen);
6392 bi->status = BI_NEED_RECOMP;
6393 }
6394 bi = bi->next;
6395 }
6396 return;
6397 #endif
6398 flush_icache(-1);
6399 }
6400
6401 static void catastrophe(void)
6402 {
6403 abort();
6404 }
6405
6406 int failure;
6407
6408 #define TARGET_M68K 0
6409 #define TARGET_POWERPC 1
6410 #define TARGET_X86 2
6411 #define TARGET_X86_64 3
6412 #if defined(i386) || defined(__i386__)
6413 #define TARGET_NATIVE TARGET_X86
6414 #endif
6415 #if defined(powerpc) || defined(__powerpc__)
6416 #define TARGET_NATIVE TARGET_POWERPC
6417 #endif
6418 #if defined(x86_64) || defined(__x86_64__)
6419 #define TARGET_NATIVE TARGET_X86_64
6420 #endif
6421
6422 #ifdef ENABLE_MON
6423 static uae_u32 mon_read_byte_jit(uintptr addr)
6424 {
6425 uae_u8 *m = (uae_u8 *)addr;
6426 return (uintptr)(*m);
6427 }
6428
6429 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6430 {
6431 uae_u8 *m = (uae_u8 *)addr;
6432 *m = b;
6433 }
6434 #endif
6435
6436 void disasm_block(int target, uint8 * start, size_t length)
6437 {
6438 if (!JITDebug)
6439 return;
6440
6441 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6442 char disasm_str[200];
6443 sprintf(disasm_str, "%s $%x $%x",
6444 target == TARGET_M68K ? "d68" :
6445 target == TARGET_X86 ? "d86" :
6446 target == TARGET_X86_64 ? "d8664" :
6447 target == TARGET_POWERPC ? "d" : "x",
6448 start, start + length - 1);
6449
6450 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6451 void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6452
6453 mon_read_byte = mon_read_byte_jit;
6454 mon_write_byte = mon_write_byte_jit;
6455
6456 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6457 mon(4, arg);
6458
6459 mon_read_byte = old_mon_read_byte;
6460 mon_write_byte = old_mon_write_byte;
6461 #endif
6462 }
6463
6464 static void disasm_native_block(uint8 *start, size_t length)
6465 {
6466 disasm_block(TARGET_NATIVE, start, length);
6467 }
6468
6469 static void disasm_m68k_block(uint8 *start, size_t length)
6470 {
6471 disasm_block(TARGET_M68K, start, length);
6472 }
6473
6474 #ifdef HAVE_GET_WORD_UNSWAPPED
6475 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6476 #else
6477 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6478 #endif
6479
6480 #if JIT_DEBUG
6481 static uae_u8 *last_regs_pc_p = 0;
6482 static uae_u8 *last_compiled_block_addr = 0;
6483
6484 void compiler_dumpstate(void)
6485 {
6486 if (!JITDebug)
6487 return;
6488
6489 write_log("### Host addresses\n");
6490 write_log("MEM_BASE : %x\n", MEMBaseDiff);
6491 write_log("PC_P : %p\n", &regs.pc_p);
6492 write_log("SPCFLAGS : %p\n", &regs.spcflags);
6493 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6494 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6495 write_log("\n");
6496
6497 write_log("### M68k processor state\n");
6498 m68k_dumpstate(0);
6499 write_log("\n");
6500
6501 write_log("### Block in Mac address space\n");
6502 write_log("M68K block : %p\n",
6503 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6504 write_log("Native block : %p (%d bytes)\n",
6505 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6506 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6507 write_log("\n");
6508 }
6509 #endif
6510
6511 static void compile_block(cpu_history* pc_hist, int blocklen)
6512 {
6513 if (letit && compiled_code) {
6514 #if PROFILE_COMPILE_TIME
6515 compile_count++;
6516 clock_t start_time = clock();
6517 #endif
6518 #if JIT_DEBUG
6519 bool disasm_block = false;
6520 #endif
6521
6522 /* OK, here we need to 'compile' a block */
6523 int i;
6524 int r;
6525 int was_comp=0;
6526 uae_u8 liveflags[MAXRUN+1];
6527 #if USE_CHECKSUM_INFO
6528 bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6529 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6530 uintptr min_pcp=max_pcp;
6531 #else
6532 uintptr max_pcp=(uintptr)pc_hist[0].location;
6533 uintptr min_pcp=max_pcp;
6534 #endif
6535 uae_u32 cl=cacheline(pc_hist[0].location);
6536 void* specflags=(void*)&regs.spcflags;
6537 blockinfo* bi=NULL;
6538 blockinfo* bi2;
6539 int extra_len=0;
6540
6541 redo_current_block=0;
6542 if (current_compile_p>=max_compile_start)
6543 flush_icache_hard(7);
6544
6545 alloc_blockinfos();
6546
6547 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6548 bi2=get_blockinfo(cl);
6549
6550 optlev=bi->optlevel;
6551 if (bi->status!=BI_INVALID) {
6552 Dif (bi!=bi2) {
6553 /* I don't think it can happen anymore. Shouldn't, in
6554 any case. So let's make sure... */
6555 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6556 bi->count,bi->optlevel,bi->handler_to_use,
6557 cache_tags[cl].handler);
6558 abort();
6559 }
6560
6561 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6562 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6563 /* What the heck? We are not supposed to be here! */
6564 abort();
6565 }
6566 }
6567 if (bi->count==-1) {
6568 optlev++;
6569 while (!optcount[optlev])
6570 optlev++;
6571 bi->count=optcount[optlev]-1;
6572 }
6573 current_block_pc_p=(uintptr)pc_hist[0].location;
6574
6575 remove_deps(bi); /* We are about to create new code */
6576 bi->optlevel=optlev;
6577 bi->pc_p=(uae_u8*)pc_hist[0].location;
6578 #if USE_CHECKSUM_INFO
6579 free_checksum_info_chain(bi->csi);
6580 bi->csi = NULL;
6581 #endif
6582
6583 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6584 i=blocklen;
6585 while (i--) {
6586 uae_u16* currpcp=pc_hist[i].location;
6587 uae_u32 op=DO_GET_OPCODE(currpcp);
6588
6589 #if USE_CHECKSUM_INFO
6590 trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6591 #if USE_INLINING
6592 if (is_const_jump(op)) {
6593 checksum_info *csi = alloc_checksum_info();
6594 csi->start_p = (uae_u8 *)min_pcp;
6595 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6596 csi->next = bi->csi;
6597 bi->csi = csi;
6598 max_pcp = (uintptr)currpcp;
6599 }
6600 #endif
6601 min_pcp = (uintptr)currpcp;
6602 #else
6603 if ((uintptr)currpcp<min_pcp)
6604 min_pcp=(uintptr)currpcp;
6605 if ((uintptr)currpcp>max_pcp)
6606 max_pcp=(uintptr)currpcp;
6607 #endif
6608
6609 liveflags[i]=((liveflags[i+1]&
6610 (~prop[op].set_flags))|
6611 prop[op].use_flags);
6612 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6613 liveflags[i]&= ~FLAG_Z;
6614 }
6615
6616 #if USE_CHECKSUM_INFO
6617 checksum_info *csi = alloc_checksum_info();
6618 csi->start_p = (uae_u8 *)min_pcp;
6619 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6620 csi->next = bi->csi;
6621 bi->csi = csi;
6622 #endif
6623
6624 bi->needed_flags=liveflags[0];
6625
6626 align_target(align_loops);
6627 was_comp=0;
6628
6629 bi->direct_handler=(cpuop_func *)get_target();
6630 set_dhtu(bi,bi->direct_handler);
6631 bi->status=BI_COMPILING;
6632 current_block_start_target=(uintptr)get_target();
6633
6634 log_startblock();
6635
6636 if (bi->count>=0) { /* Need to generate countdown code */
6637 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6638 raw_sub_l_mi((uintptr)&(bi->count),1);
6639 raw_jl((uintptr)popall_recompile_block);
6640 }
6641 if (optlev==0) { /* No need to actually translate */
6642 /* Execute normally without keeping stats */
6643 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6644 raw_jmp((uintptr)popall_exec_nostats);
6645 }
6646 else {
6647 reg_alloc_run=0;
6648 next_pc_p=0;
6649 taken_pc_p=0;
6650 branch_cc=0;
6651
6652 comp_pc_p=(uae_u8*)pc_hist[0].location;
6653 init_comp();
6654 was_comp=1;
6655
6656 #if JIT_DEBUG
6657 if (JITDebug) {
6658 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6659 raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6660 }
6661 #endif
6662
6663 for (i=0;i<blocklen &&
6664 get_target_noopt()<max_compile_start;i++) {
6665 cpuop_func **cputbl;
6666 compop_func **comptbl;
6667 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6668 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6669 if (!needed_flags) {
6670 cputbl=nfcpufunctbl;
6671 comptbl=nfcompfunctbl;
6672 }
6673 else {
6674 cputbl=cpufunctbl;
6675 comptbl=compfunctbl;
6676 }
6677
6678 failure = 1; // gb-- defaults to failure state
6679 if (comptbl[opcode] && optlev>1) {
6680 failure=0;
6681 if (!was_comp) {
6682 comp_pc_p=(uae_u8*)pc_hist[i].location;
6683 init_comp();
6684 }
6685 was_comp=1;
6686
6687 comptbl[opcode](opcode);
6688 freescratch();
6689 if (!(liveflags[i+1] & FLAG_CZNV)) {
6690 /* We can forget about flags */
6691 dont_care_flags();
6692 }
6693 #if INDIVIDUAL_INST
6694 flush(1);
6695 nop();
6696 flush(1);
6697 was_comp=0;
6698 #endif
6699 }
6700
6701 if (failure) {
6702 if (was_comp) {
6703 flush(1);
6704 was_comp=0;
6705 }
6706 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6707 #if USE_NORMAL_CALLING_CONVENTION
6708 raw_push_l_r(REG_PAR1);
6709 #endif
6710 raw_mov_l_mi((uintptr)&regs.pc_p,
6711 (uintptr)pc_hist[i].location);
6712 raw_call((uintptr)cputbl[opcode]);
6713 #if PROFILE_UNTRANSLATED_INSNS
6714 // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6715 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6716 #endif
6717 #if USE_NORMAL_CALLING_CONVENTION
6718 raw_inc_sp(4);
6719 #endif
6720
6721 if (i < blocklen - 1) {
6722 uae_s8* branchadd;
6723
6724 raw_mov_l_rm(0,(uintptr)specflags);
6725 raw_test_l_rr(0,0);
6726 raw_jz_b_oponly();
6727 branchadd=(uae_s8 *)get_target();
6728 emit_byte(0);
6729 raw_jmp((uintptr)popall_do_nothing);
6730 *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6731 }
6732 }
6733 }
6734 #if 1 /* This isn't completely kosher yet; It really needs to be
6735 be integrated into a general inter-block-dependency scheme */
6736 if (next_pc_p && taken_pc_p &&
6737 was_comp && taken_pc_p==current_block_pc_p) {
6738 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6739 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6740 uae_u8 x=bi1->needed_flags;
6741
6742 if (x==0xff || 1) { /* To be on the safe side */
6743 uae_u16* next=(uae_u16*)next_pc_p;
6744 uae_u32 op=DO_GET_OPCODE(next);
6745
6746 x=0x1f;
6747 x&=(~prop[op].set_flags);
6748 x|=prop[op].use_flags;
6749 }
6750
6751 x|=bi2->needed_flags;
6752 if (!(x & FLAG_CZNV)) {
6753 /* We can forget about flags */
6754 dont_care_flags();
6755 extra_len+=2; /* The next instruction now is part of this
6756 block */
6757 }
6758
6759 }
6760 #endif
6761 log_flush();
6762
6763 if (next_pc_p) { /* A branch was registered */
6764 uintptr t1=next_pc_p;
6765 uintptr t2=taken_pc_p;
6766 int cc=branch_cc;
6767
6768 uae_u32* branchadd;
6769 uae_u32* tba;
6770 bigstate tmp;
6771 blockinfo* tbi;
6772
6773 if (taken_pc_p<next_pc_p) {
6774 /* backward branch. Optimize for the "taken" case ---
6775 which means the raw_jcc should fall through when
6776 the 68k branch is taken. */
6777 t1=taken_pc_p;
6778 t2=next_pc_p;
6779 cc=branch_cc^1;
6780 }
6781
6782 tmp=live; /* ouch! This is big... */
6783 raw_jcc_l_oponly(cc);
6784 branchadd=(uae_u32*)get_target();
6785 emit_long(0);
6786
6787 /* predicted outcome */
6788 tbi=get_blockinfo_addr_new((void*)t1,1);
6789 match_states(tbi);
6790 raw_cmp_l_mi((uintptr)specflags,0);
6791 raw_jcc_l_oponly(4);
6792 tba=(uae_u32*)get_target();
6793 emit_long(get_handler(t1)-((uintptr)tba+4));
6794 raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6795 raw_jmp((uintptr)popall_do_nothing);
6796 create_jmpdep(bi,0,tba,t1);
6797
6798 align_target(align_jumps);
6799 /* not-predicted outcome */
6800 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6801 live=tmp; /* Ouch again */
6802 tbi=get_blockinfo_addr_new((void*)t2,1);
6803 match_states(tbi);
6804
6805 //flush(1); /* Can only get here if was_comp==1 */
6806 raw_cmp_l_mi((uintptr)specflags,0);
6807 raw_jcc_l_oponly(4);
6808 tba=(uae_u32*)get_target();
6809 emit_long(get_handler(t2)-((uintptr)tba+4));
6810 raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6811 raw_jmp((uintptr)popall_do_nothing);
6812 create_jmpdep(bi,1,tba,t2);
6813 }
6814 else
6815 {
6816 if (was_comp) {
6817 flush(1);
6818 }
6819
6820 /* Let's find out where next_handler is... */
6821 if (was_comp && isinreg(PC_P)) {
6822 r=live.state[PC_P].realreg;
6823 raw_and_l_ri(r,TAGMASK);
6824 int r2 = (r==0) ? 1 : 0;
6825 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6826 raw_cmp_l_mi((uintptr)specflags,0);
6827 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6828 raw_jmp_r(r2);
6829 }
6830 else if (was_comp && isconst(PC_P)) {
6831 uae_u32 v=live.state[PC_P].val;
6832 uae_u32* tba;
6833 blockinfo* tbi;
6834
6835 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6836 match_states(tbi);
6837
6838 raw_cmp_l_mi((uintptr)specflags,0);
6839 raw_jcc_l_oponly(4);
6840 tba=(uae_u32*)get_target();
6841 emit_long(get_handler(v)-((uintptr)tba+4));
6842 raw_mov_l_mi((uintptr)&regs.pc_p,v);
6843 raw_jmp((uintptr)popall_do_nothing);
6844 create_jmpdep(bi,0,tba,v);
6845 }
6846 else {
6847 r=REG_PC_TMP;
6848 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6849 raw_and_l_ri(r,TAGMASK);
6850 int r2 = (r==0) ? 1 : 0;
6851 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6852 raw_cmp_l_mi((uintptr)specflags,0);
6853 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6854 raw_jmp_r(r2);
6855 }
6856 }
6857 }
6858
6859 #if USE_MATCH
6860 if (callers_need_recompile(&live,&(bi->env))) {
6861 mark_callers_recompile(bi);
6862 }
6863
6864 big_to_small_state(&live,&(bi->env));
6865 #endif
6866
6867 #if USE_CHECKSUM_INFO
6868 remove_from_list(bi);
6869 if (trace_in_rom) {
6870 // No need to checksum that block trace on cache invalidation
6871 free_checksum_info_chain(bi->csi);
6872 bi->csi = NULL;
6873 add_to_dormant(bi);
6874 }
6875 else {
6876 calc_checksum(bi,&(bi->c1),&(bi->c2));
6877 add_to_active(bi);
6878 }
6879 #else
6880 if (next_pc_p+extra_len>=max_pcp &&
6881 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6882 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6883 else
6884 max_pcp+=LONGEST_68K_INST;
6885
6886 bi->len=max_pcp-min_pcp;
6887 bi->min_pcp=min_pcp;
6888
6889 remove_from_list(bi);
6890 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6891 add_to_dormant(bi); /* No need to checksum it on cache flush.
6892 Please don't start changing ROMs in
6893 flight! */
6894 }
6895 else {
6896 calc_checksum(bi,&(bi->c1),&(bi->c2));
6897 add_to_active(bi);
6898 }
6899 #endif
6900
6901 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6902
6903 #if JIT_DEBUG
6904 if (JITDebug)
6905 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6906
6907 if (JITDebug && disasm_block) {
6908 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6909 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6910 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6911 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6912 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6913 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6914 getchar();
6915 }
6916 #endif
6917
6918 log_dump();
6919 align_target(align_jumps);
6920
6921 /* This is the non-direct handler */
6922 bi->handler=
6923 bi->handler_to_use=(cpuop_func *)get_target();
6924 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6925 raw_jnz((uintptr)popall_cache_miss);
6926 comp_pc_p=(uae_u8*)pc_hist[0].location;
6927
6928 bi->status=BI_FINALIZING;
6929 init_comp();
6930 match_states(bi);
6931 flush(1);
6932
6933 raw_jmp((uintptr)bi->direct_handler);
6934
6935 current_compile_p=get_target();
6936 raise_in_cl_list(bi);
6937
6938 /* We will flush soon, anyway, so let's do it now */
6939 if (current_compile_p>=max_compile_start)
6940 flush_icache_hard(7);
6941
6942 bi->status=BI_ACTIVE;
6943 if (redo_current_block)
6944 block_need_recompile(bi);
6945
6946 #if PROFILE_COMPILE_TIME
6947 compile_time += (clock() - start_time);
6948 #endif
6949 }
6950 }
6951
6952 void do_nothing(void)
6953 {
6954 /* What did you expect this to do? */
6955 }
6956
6957 void exec_nostats(void)
6958 {
6959 for (;;) {
6960 uae_u32 opcode = GET_OPCODE;
6961 (*cpufunctbl[opcode])(opcode);
6962 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6963 return; /* We will deal with the spcflags in the caller */
6964 }
6965 }
6966 }
6967
6968 void execute_normal(void)
6969 {
6970 if (!check_for_cache_miss()) {
6971 cpu_history pc_hist[MAXRUN];
6972 int blocklen = 0;
6973 #if REAL_ADDRESSING || DIRECT_ADDRESSING
6974 start_pc_p = regs.pc_p;
6975 start_pc = get_virtual_address(regs.pc_p);
6976 #else
6977 start_pc_p = regs.pc_oldp;
6978 start_pc = regs.pc;
6979 #endif
6980 for (;;) { /* Take note: This is the do-it-normal loop */
6981 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6982 uae_u32 opcode = GET_OPCODE;
6983 #if FLIGHT_RECORDER
6984 m68k_record_step(m68k_getpc());
6985 #endif
6986 (*cpufunctbl[opcode])(opcode);
6987 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6988 compile_block(pc_hist, blocklen);
6989 return; /* We will deal with the spcflags in the caller */
6990 }
6991 /* No need to check regs.spcflags, because if they were set,
6992 we'd have ended up inside that "if" */
6993 }
6994 }
6995 }
6996
6997 typedef void (*compiled_handler)(void);
6998
6999 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
7000 void (*m68k_compile_execute)(void) = NULL;
7001 #else
7002 void m68k_do_compile_execute(void)
7003 {
7004 for (;;) {
7005 ((compiled_handler)(pushall_call_handler))();
7006 /* Whenever we return from that, we should check spcflags */
7007 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7008 if (m68k_do_specialties ())
7009 return;
7010 }
7011 }
7012 }
7013 #endif