ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.29
Committed: 2005-01-30T21:42:16Z (19 years, 5 months ago) by gbeauche
Branch: MAIN
Changes since 1.28: +2 -2 lines
Log Message:
Happy New Year!

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.29 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.11 * Gwenole Beauchesne
8     *
9 gbeauche 1.29 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.11 *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.24 /* NOTE: support for AMD64 assumes translation cache and other code
35     * buffers are allocated into a 32-bit address space because (i) B2/JIT
36     * code is not 64-bit clean and (ii) it's faster to resolve branches
37     * that way.
38     */
39     #if !defined(__i386__) && !defined(__x86_64__)
40     #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41     #endif
42    
43 gbeauche 1.1 #define USE_MATCH 0
44    
45     /* kludge for Brian, so he can compile under MSVC++ */
46     #define USE_NORMAL_CALLING_CONVENTION 0
47    
48     #ifndef WIN32
49 gbeauche 1.20 #include <unistd.h>
50 gbeauche 1.1 #include <sys/types.h>
51     #include <sys/mman.h>
52     #endif
53    
54     #include <stdlib.h>
55     #include <fcntl.h>
56     #include <errno.h>
57    
58     #include "sysdeps.h"
59     #include "cpu_emulation.h"
60     #include "main.h"
61     #include "prefs.h"
62     #include "user_strings.h"
63 gbeauche 1.2 #include "vm_alloc.h"
64 gbeauche 1.1
65     #include "m68k.h"
66     #include "memory.h"
67     #include "readcpu.h"
68     #include "newcpu.h"
69     #include "comptbl.h"
70     #include "compiler/compemu.h"
71     #include "fpu/fpu.h"
72     #include "fpu/flags.h"
73    
74     #define DEBUG 1
75     #include "debug.h"
76    
77     #ifdef ENABLE_MON
78     #include "mon.h"
79     #endif
80    
81     #ifndef WIN32
82 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
83     #define PROFILE_UNTRANSLATED_INSNS 1
84 gbeauche 1.1 #endif
85    
86 gbeauche 1.28 #if defined(__x86_64__) && 0
87     #define RECORD_REGISTER_USAGE 1
88     #endif
89    
90 gbeauche 1.1 #ifdef WIN32
91     #undef write_log
92     #define write_log dummy_write_log
93     static void dummy_write_log(const char *, ...) { }
94     #endif
95    
96     #if JIT_DEBUG
97     #undef abort
98     #define abort() do { \
99     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100     exit(EXIT_FAILURE); \
101     } while (0)
102     #endif
103    
104 gbeauche 1.28 #if RECORD_REGISTER_USAGE
105     static uint64 reg_count[16];
106     static int reg_count_local[16];
107    
108     static int reg_count_compare(const void *ap, const void *bp)
109     {
110     const int a = *((int *)ap);
111     const int b = *((int *)bp);
112     return reg_count[b] - reg_count[a];
113     }
114     #endif
115    
116 gbeauche 1.1 #if PROFILE_COMPILE_TIME
117     #include <time.h>
118     static uae_u32 compile_count = 0;
119     static clock_t compile_time = 0;
120     static clock_t emul_start_time = 0;
121     static clock_t emul_end_time = 0;
122     #endif
123    
124 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
125     const int untranslated_top_ten = 20;
126     static uae_u32 raw_cputbl_count[65536] = { 0, };
127     static uae_u16 opcode_nums[65536];
128    
129     static int untranslated_compfn(const void *e1, const void *e2)
130     {
131     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132     }
133     #endif
134    
135 gbeauche 1.24 static compop_func *compfunctbl[65536];
136     static compop_func *nfcompfunctbl[65536];
137     static cpuop_func *nfcpufunctbl[65536];
138 gbeauche 1.1 uae_u8* comp_pc_p;
139    
140 gbeauche 1.26 // From main_unix.cpp
141     extern bool ThirtyThreeBitAddressing;
142    
143 gbeauche 1.6 // From newcpu.cpp
144     extern bool quit_program;
145    
146 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
147     #if JIT_DEBUG
148     static bool JITDebug = false; // Enable runtime disassemblers through mon?
149     #else
150     const bool JITDebug = false; // Don't use JIT debug mode at all
151     #endif
152    
153 gbeauche 1.22 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
154 gbeauche 1.1 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
155 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
156 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
157     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
158     static bool have_cmov = false; // target has CMOV instructions ?
159     static bool have_rat_stall = true; // target has partial register stalls ?
160 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
161     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
162 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
163 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
164     static int align_jumps = 32; // Align the start of jumps
165 gbeauche 1.1 static int optcount[10] = {
166     10, // How often a block has to be executed before it is translated
167     0, // How often to use naive translation
168     0, 0, 0, 0,
169     -1, -1, -1, -1
170     };
171    
172     struct op_properties {
173     uae_u8 use_flags;
174     uae_u8 set_flags;
175     uae_u8 is_addx;
176     uae_u8 cflow;
177     };
178     static op_properties prop[65536];
179    
180     static inline int end_block(uae_u32 opcode)
181     {
182     return (prop[opcode].cflow & fl_end_block);
183     }
184    
185 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
186     {
187     return (prop[opcode].cflow == fl_const_jump);
188     }
189    
190 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
191     {
192     return (prop[opcode].cflow & fl_trap);
193     }
194    
195     static inline unsigned int cft_map (unsigned int f)
196     {
197     #ifndef HAVE_GET_WORD_UNSWAPPED
198     return f;
199     #else
200     return ((f >> 8) & 255) | ((f & 255) << 8);
201     #endif
202     }
203    
204 gbeauche 1.1 uae_u8* start_pc_p;
205     uae_u32 start_pc;
206     uae_u32 current_block_pc_p;
207 gbeauche 1.24 static uintptr current_block_start_target;
208 gbeauche 1.1 uae_u32 needed_flags;
209 gbeauche 1.24 static uintptr next_pc_p;
210     static uintptr taken_pc_p;
211 gbeauche 1.1 static int branch_cc;
212     static int redo_current_block;
213    
214     int segvcount=0;
215     int soft_flush_count=0;
216     int hard_flush_count=0;
217     int checksum_count=0;
218     static uae_u8* current_compile_p=NULL;
219     static uae_u8* max_compile_start;
220     static uae_u8* compiled_code=NULL;
221     static uae_s32 reg_alloc_run;
222 gbeauche 1.24 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
223     static uae_u8* popallspace=NULL;
224 gbeauche 1.1
225     void* pushall_call_handler=NULL;
226     static void* popall_do_nothing=NULL;
227     static void* popall_exec_nostats=NULL;
228     static void* popall_execute_normal=NULL;
229     static void* popall_cache_miss=NULL;
230     static void* popall_recompile_block=NULL;
231     static void* popall_check_checksum=NULL;
232    
233     /* The 68k only ever executes from even addresses. So right now, we
234     * waste half the entries in this array
235     * UPDATE: We now use those entries to store the start of the linked
236     * lists that we maintain for each hash result.
237     */
238     cacheline cache_tags[TAGSIZE];
239     int letit=0;
240     blockinfo* hold_bi[MAX_HOLD_BI];
241     blockinfo* active;
242     blockinfo* dormant;
243    
244     /* 68040 */
245     extern struct cputbl op_smalltbl_0_nf[];
246     extern struct comptbl op_smalltbl_0_comp_nf[];
247     extern struct comptbl op_smalltbl_0_comp_ff[];
248    
249     /* 68020 + 68881 */
250     extern struct cputbl op_smalltbl_1_nf[];
251    
252     /* 68020 */
253     extern struct cputbl op_smalltbl_2_nf[];
254    
255     /* 68010 */
256     extern struct cputbl op_smalltbl_3_nf[];
257    
258     /* 68000 */
259     extern struct cputbl op_smalltbl_4_nf[];
260    
261     /* 68000 slow but compatible. */
262     extern struct cputbl op_smalltbl_5_nf[];
263    
264     static void flush_icache_hard(int n);
265     static void flush_icache_lazy(int n);
266     static void flush_icache_none(int n);
267     void (*flush_icache)(int n) = flush_icache_none;
268    
269    
270    
271     bigstate live;
272     smallstate empty_ss;
273     smallstate default_ss;
274     static int optlev;
275    
276     static int writereg(int r, int size);
277     static void unlock2(int r);
278     static void setlock(int r);
279     static int readreg_specific(int r, int size, int spec);
280     static int writereg_specific(int r, int size, int spec);
281     static void prepare_for_call_1(void);
282     static void prepare_for_call_2(void);
283     static void align_target(uae_u32 a);
284    
285     static uae_s32 nextused[VREGS];
286    
287     uae_u32 m68k_pc_offset;
288    
289     /* Some arithmetic ooperations can be optimized away if the operands
290     * are known to be constant. But that's only a good idea when the
291     * side effects they would have on the flags are not important. This
292     * variable indicates whether we need the side effects or not
293     */
294     uae_u32 needflags=0;
295    
296     /* Flag handling is complicated.
297     *
298     * x86 instructions create flags, which quite often are exactly what we
299     * want. So at times, the "68k" flags are actually in the x86 flags.
300     *
301     * Then again, sometimes we do x86 instructions that clobber the x86
302     * flags, but don't represent a corresponding m68k instruction. In that
303     * case, we have to save them.
304     *
305     * We used to save them to the stack, but now store them back directly
306     * into the regflags.cznv of the traditional emulation. Thus some odd
307     * names.
308     *
309     * So flags can be in either of two places (used to be three; boy were
310     * things complicated back then!); And either place can contain either
311     * valid flags or invalid trash (and on the stack, there was also the
312     * option of "nothing at all", now gone). A couple of variables keep
313     * track of the respective states.
314     *
315     * To make things worse, we might or might not be interested in the flags.
316     * by default, we are, but a call to dont_care_flags can change that
317     * until the next call to live_flags. If we are not, pretty much whatever
318     * is in the register and/or the native flags is seen as valid.
319     */
320    
321     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
322     {
323     return cache_tags[cl+1].bi;
324     }
325    
326     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
327     {
328     blockinfo* bi=get_blockinfo(cacheline(addr));
329    
330     while (bi) {
331     if (bi->pc_p==addr)
332     return bi;
333     bi=bi->next_same_cl;
334     }
335     return NULL;
336     }
337    
338    
339     /*******************************************************************
340     * All sorts of list related functions for all of the lists *
341     *******************************************************************/
342    
343     static __inline__ void remove_from_cl_list(blockinfo* bi)
344     {
345     uae_u32 cl=cacheline(bi->pc_p);
346    
347     if (bi->prev_same_cl_p)
348     *(bi->prev_same_cl_p)=bi->next_same_cl;
349     if (bi->next_same_cl)
350     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
351     if (cache_tags[cl+1].bi)
352     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
353     else
354     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
355     }
356    
357     static __inline__ void remove_from_list(blockinfo* bi)
358     {
359     if (bi->prev_p)
360     *(bi->prev_p)=bi->next;
361     if (bi->next)
362     bi->next->prev_p=bi->prev_p;
363     }
364    
365     static __inline__ void remove_from_lists(blockinfo* bi)
366     {
367     remove_from_list(bi);
368     remove_from_cl_list(bi);
369     }
370    
371     static __inline__ void add_to_cl_list(blockinfo* bi)
372     {
373     uae_u32 cl=cacheline(bi->pc_p);
374    
375     if (cache_tags[cl+1].bi)
376     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
377     bi->next_same_cl=cache_tags[cl+1].bi;
378    
379     cache_tags[cl+1].bi=bi;
380     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
381    
382     cache_tags[cl].handler=bi->handler_to_use;
383     }
384    
385     static __inline__ void raise_in_cl_list(blockinfo* bi)
386     {
387     remove_from_cl_list(bi);
388     add_to_cl_list(bi);
389     }
390    
391     static __inline__ void add_to_active(blockinfo* bi)
392     {
393     if (active)
394     active->prev_p=&(bi->next);
395     bi->next=active;
396    
397     active=bi;
398     bi->prev_p=&active;
399     }
400    
401     static __inline__ void add_to_dormant(blockinfo* bi)
402     {
403     if (dormant)
404     dormant->prev_p=&(bi->next);
405     bi->next=dormant;
406    
407     dormant=bi;
408     bi->prev_p=&dormant;
409     }
410    
411     static __inline__ void remove_dep(dependency* d)
412     {
413     if (d->prev_p)
414     *(d->prev_p)=d->next;
415     if (d->next)
416     d->next->prev_p=d->prev_p;
417     d->prev_p=NULL;
418     d->next=NULL;
419     }
420    
421     /* This block's code is about to be thrown away, so it no longer
422     depends on anything else */
423     static __inline__ void remove_deps(blockinfo* bi)
424     {
425     remove_dep(&(bi->dep[0]));
426     remove_dep(&(bi->dep[1]));
427     }
428    
429     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
430     {
431     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
432     }
433    
434     /********************************************************************
435     * Soft flush handling support functions *
436     ********************************************************************/
437    
438     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
439     {
440     //write_log("bi is %p\n",bi);
441     if (dh!=bi->direct_handler_to_use) {
442     dependency* x=bi->deplist;
443     //write_log("bi->deplist=%p\n",bi->deplist);
444     while (x) {
445     //write_log("x is %p\n",x);
446     //write_log("x->next is %p\n",x->next);
447     //write_log("x->prev_p is %p\n",x->prev_p);
448    
449     if (x->jmp_off) {
450     adjust_jmpdep(x,dh);
451     }
452     x=x->next;
453     }
454     bi->direct_handler_to_use=dh;
455     }
456     }
457    
458     static __inline__ void invalidate_block(blockinfo* bi)
459     {
460     int i;
461    
462     bi->optlevel=0;
463     bi->count=optcount[0]-1;
464     bi->handler=NULL;
465     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
466     bi->direct_handler=NULL;
467     set_dhtu(bi,bi->direct_pen);
468     bi->needed_flags=0xff;
469     bi->status=BI_INVALID;
470     for (i=0;i<2;i++) {
471     bi->dep[i].jmp_off=NULL;
472     bi->dep[i].target=NULL;
473     }
474     remove_deps(bi);
475     }
476    
477     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
478     {
479 gbeauche 1.24 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
480 gbeauche 1.1
481     Dif(!tbi) {
482     write_log("Could not create jmpdep!\n");
483     abort();
484     }
485     bi->dep[i].jmp_off=jmpaddr;
486     bi->dep[i].source=bi;
487     bi->dep[i].target=tbi;
488     bi->dep[i].next=tbi->deplist;
489     if (bi->dep[i].next)
490     bi->dep[i].next->prev_p=&(bi->dep[i].next);
491     bi->dep[i].prev_p=&(tbi->deplist);
492     tbi->deplist=&(bi->dep[i]);
493     }
494    
495     static __inline__ void block_need_recompile(blockinfo * bi)
496     {
497     uae_u32 cl = cacheline(bi->pc_p);
498    
499     set_dhtu(bi, bi->direct_pen);
500     bi->direct_handler = bi->direct_pen;
501    
502     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
503     bi->handler = (cpuop_func *)popall_execute_normal;
504     if (bi == cache_tags[cl + 1].bi)
505     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
506     bi->status = BI_NEED_RECOMP;
507     }
508    
509     static __inline__ void mark_callers_recompile(blockinfo * bi)
510     {
511     dependency *x = bi->deplist;
512    
513     while (x) {
514     dependency *next = x->next; /* This disappears when we mark for
515     * recompilation and thus remove the
516     * blocks from the lists */
517     if (x->jmp_off) {
518     blockinfo *cbi = x->source;
519    
520     Dif(cbi->status == BI_INVALID) {
521     // write_log("invalid block in dependency list\n"); // FIXME?
522     // abort();
523     }
524     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
525     block_need_recompile(cbi);
526     mark_callers_recompile(cbi);
527     }
528     else if (cbi->status == BI_COMPILING) {
529     redo_current_block = 1;
530     }
531     else if (cbi->status == BI_NEED_RECOMP) {
532     /* nothing */
533     }
534     else {
535     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
536     }
537     }
538     x = next;
539     }
540     }
541    
542     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
543     {
544     blockinfo* bi=get_blockinfo_addr(addr);
545     int i;
546    
547     if (!bi) {
548     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
549     if (hold_bi[i]) {
550     uae_u32 cl=cacheline(addr);
551    
552     bi=hold_bi[i];
553     hold_bi[i]=NULL;
554     bi->pc_p=(uae_u8 *)addr;
555     invalidate_block(bi);
556     add_to_active(bi);
557     add_to_cl_list(bi);
558    
559     }
560     }
561     }
562     if (!bi) {
563     write_log("Looking for blockinfo, can't find free one\n");
564     abort();
565     }
566     return bi;
567     }
568    
569     static void prepare_block(blockinfo* bi);
570    
571     /* Managment of blockinfos.
572    
573     A blockinfo struct is allocated whenever a new block has to be
574     compiled. If the list of free blockinfos is empty, we allocate a new
575     pool of blockinfos and link the newly created blockinfos altogether
576     into the list of free blockinfos. Otherwise, we simply pop a structure
577 gbeauche 1.7 off the free list.
578 gbeauche 1.1
579     Blockinfo are lazily deallocated, i.e. chained altogether in the
580     list of free blockinfos whenvever a translation cache flush (hard or
581     soft) request occurs.
582     */
583    
584 gbeauche 1.7 template< class T >
585     class LazyBlockAllocator
586     {
587     enum {
588     kPoolSize = 1 + 4096 / sizeof(T)
589     };
590     struct Pool {
591     T chunk[kPoolSize];
592     Pool * next;
593     };
594     Pool * mPools;
595     T * mChunks;
596     public:
597     LazyBlockAllocator() : mPools(0), mChunks(0) { }
598     ~LazyBlockAllocator();
599     T * acquire();
600     void release(T * const);
601 gbeauche 1.1 };
602    
603 gbeauche 1.7 template< class T >
604     LazyBlockAllocator<T>::~LazyBlockAllocator()
605 gbeauche 1.1 {
606 gbeauche 1.7 Pool * currentPool = mPools;
607     while (currentPool) {
608     Pool * deadPool = currentPool;
609     currentPool = currentPool->next;
610     free(deadPool);
611     }
612     }
613    
614     template< class T >
615     T * LazyBlockAllocator<T>::acquire()
616     {
617     if (!mChunks) {
618     // There is no chunk left, allocate a new pool and link the
619     // chunks into the free list
620     Pool * newPool = (Pool *)malloc(sizeof(Pool));
621     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
622     chunk->next = mChunks;
623     mChunks = chunk;
624 gbeauche 1.1 }
625 gbeauche 1.7 newPool->next = mPools;
626     mPools = newPool;
627     }
628     T * chunk = mChunks;
629     mChunks = chunk->next;
630     return chunk;
631     }
632    
633     template< class T >
634     void LazyBlockAllocator<T>::release(T * const chunk)
635     {
636     chunk->next = mChunks;
637     mChunks = chunk;
638     }
639    
640     template< class T >
641     class HardBlockAllocator
642     {
643     public:
644     T * acquire() {
645     T * data = (T *)current_compile_p;
646     current_compile_p += sizeof(T);
647     return data;
648 gbeauche 1.1 }
649 gbeauche 1.7
650     void release(T * const chunk) {
651     // Deallocated on invalidation
652     }
653     };
654    
655     #if USE_SEPARATE_BIA
656     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
657     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
658 gbeauche 1.1 #else
659 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
660     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
661 gbeauche 1.1 #endif
662    
663 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
664     {
665     checksum_info *csi = ChecksumInfoAllocator.acquire();
666     csi->next = NULL;
667     return csi;
668     }
669    
670     static __inline__ void free_checksum_info(checksum_info *csi)
671     {
672     csi->next = NULL;
673     ChecksumInfoAllocator.release(csi);
674     }
675    
676     static __inline__ void free_checksum_info_chain(checksum_info *csi)
677     {
678     while (csi != NULL) {
679     checksum_info *csi2 = csi->next;
680     free_checksum_info(csi);
681     csi = csi2;
682     }
683     }
684 gbeauche 1.7
685     static __inline__ blockinfo *alloc_blockinfo(void)
686 gbeauche 1.1 {
687 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
688     #if USE_CHECKSUM_INFO
689     bi->csi = NULL;
690 gbeauche 1.1 #endif
691 gbeauche 1.7 return bi;
692 gbeauche 1.1 }
693    
694 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
695 gbeauche 1.1 {
696 gbeauche 1.7 #if USE_CHECKSUM_INFO
697 gbeauche 1.8 free_checksum_info_chain(bi->csi);
698     bi->csi = NULL;
699 gbeauche 1.1 #endif
700 gbeauche 1.7 BlockInfoAllocator.release(bi);
701 gbeauche 1.1 }
702    
703     static __inline__ void alloc_blockinfos(void)
704     {
705     int i;
706     blockinfo* bi;
707    
708     for (i=0;i<MAX_HOLD_BI;i++) {
709     if (hold_bi[i])
710     return;
711     bi=hold_bi[i]=alloc_blockinfo();
712     prepare_block(bi);
713     }
714     }
715    
716     /********************************************************************
717     * Functions to emit data into memory, and other general support *
718     ********************************************************************/
719    
720     static uae_u8* target;
721    
722     static void emit_init(void)
723     {
724     }
725    
726     static __inline__ void emit_byte(uae_u8 x)
727     {
728     *target++=x;
729     }
730    
731     static __inline__ void emit_word(uae_u16 x)
732     {
733     *((uae_u16*)target)=x;
734     target+=2;
735     }
736    
737     static __inline__ void emit_long(uae_u32 x)
738     {
739     *((uae_u32*)target)=x;
740     target+=4;
741     }
742    
743 gbeauche 1.24 static __inline__ void emit_quad(uae_u64 x)
744     {
745     *((uae_u64*)target)=x;
746     target+=8;
747     }
748    
749 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
750     {
751     memcpy((uae_u8 *)target,block,blocklen);
752     target+=blocklen;
753     }
754    
755 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
756     {
757     #if 1
758     // gb-- We have specialized byteswapping functions, just use them
759     return do_byteswap_32(v);
760     #else
761     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
762     #endif
763     }
764    
765     /********************************************************************
766     * Getting the information about the target CPU *
767     ********************************************************************/
768    
769     #include "codegen_x86.cpp"
770    
771     void set_target(uae_u8* t)
772     {
773     target=t;
774     }
775    
776     static __inline__ uae_u8* get_target_noopt(void)
777     {
778     return target;
779     }
780    
781     __inline__ uae_u8* get_target(void)
782     {
783     return get_target_noopt();
784     }
785    
786    
787     /********************************************************************
788     * Flags status handling. EMIT TIME! *
789     ********************************************************************/
790    
791     static void bt_l_ri_noclobber(R4 r, IMM i);
792    
793     static void make_flags_live_internal(void)
794     {
795     if (live.flags_in_flags==VALID)
796     return;
797     Dif (live.flags_on_stack==TRASH) {
798     write_log("Want flags, got something on stack, but it is TRASH\n");
799     abort();
800     }
801     if (live.flags_on_stack==VALID) {
802     int tmp;
803     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
804     raw_reg_to_flags(tmp);
805     unlock2(tmp);
806    
807     live.flags_in_flags=VALID;
808     return;
809     }
810     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
811     live.flags_in_flags,live.flags_on_stack);
812     abort();
813     }
814    
815     static void flags_to_stack(void)
816     {
817     if (live.flags_on_stack==VALID)
818     return;
819     if (!live.flags_are_important) {
820     live.flags_on_stack=VALID;
821     return;
822     }
823     Dif (live.flags_in_flags!=VALID)
824     abort();
825     else {
826     int tmp;
827     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
828     raw_flags_to_reg(tmp);
829     unlock2(tmp);
830     }
831     live.flags_on_stack=VALID;
832     }
833    
834     static __inline__ void clobber_flags(void)
835     {
836     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
837     flags_to_stack();
838     live.flags_in_flags=TRASH;
839     }
840    
841     /* Prepare for leaving the compiled stuff */
842     static __inline__ void flush_flags(void)
843     {
844     flags_to_stack();
845     return;
846     }
847    
848     int touchcnt;
849    
850     /********************************************************************
851 gbeauche 1.18 * Partial register flushing for optimized calls *
852     ********************************************************************/
853    
854     struct regusage {
855     uae_u16 rmask;
856     uae_u16 wmask;
857     };
858    
859     static inline void ru_set(uae_u16 *mask, int reg)
860     {
861     #if USE_OPTIMIZED_CALLS
862     *mask |= 1 << reg;
863     #endif
864     }
865    
866     static inline bool ru_get(const uae_u16 *mask, int reg)
867     {
868     #if USE_OPTIMIZED_CALLS
869     return (*mask & (1 << reg));
870     #else
871     /* Default: instruction reads & write to register */
872     return true;
873     #endif
874     }
875    
876     static inline void ru_set_read(regusage *ru, int reg)
877     {
878     ru_set(&ru->rmask, reg);
879     }
880    
881     static inline void ru_set_write(regusage *ru, int reg)
882     {
883     ru_set(&ru->wmask, reg);
884     }
885    
886     static inline bool ru_read_p(const regusage *ru, int reg)
887     {
888     return ru_get(&ru->rmask, reg);
889     }
890    
891     static inline bool ru_write_p(const regusage *ru, int reg)
892     {
893     return ru_get(&ru->wmask, reg);
894     }
895    
896     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
897     wordsizes size, int write_mode)
898     {
899     switch (mode) {
900     case Areg:
901     reg += 8;
902     /* fall through */
903     case Dreg:
904     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
905     break;
906     case Ad16:
907     /* skip displacment */
908     m68k_pc_offset += 2;
909     case Aind:
910     case Aipi:
911     case Apdi:
912     ru_set_read(ru, reg+8);
913     break;
914     case Ad8r:
915     ru_set_read(ru, reg+8);
916     /* fall through */
917     case PC8r: {
918     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
919     reg = (dp >> 12) & 15;
920     ru_set_read(ru, reg);
921     if (dp & 0x100)
922     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
923     break;
924     }
925     case PC16:
926     case absw:
927     case imm0:
928     case imm1:
929     m68k_pc_offset += 2;
930     break;
931     case absl:
932     case imm2:
933     m68k_pc_offset += 4;
934     break;
935     case immi:
936     m68k_pc_offset += (size == sz_long) ? 4 : 2;
937     break;
938     }
939     }
940    
941     /* TODO: split into a static initialization part and a dynamic one
942     (instructions depending on extension words) */
943     static void ru_fill(regusage *ru, uae_u32 opcode)
944     {
945     m68k_pc_offset += 2;
946    
947     /* Default: no register is used or written to */
948     ru->rmask = 0;
949     ru->wmask = 0;
950    
951     uae_u32 real_opcode = cft_map(opcode);
952     struct instr *dp = &table68k[real_opcode];
953    
954     bool rw_dest = true;
955     bool handled = false;
956    
957     /* Handle some instructions specifically */
958     uae_u16 reg, ext;
959     switch (dp->mnemo) {
960     case i_BFCHG:
961     case i_BFCLR:
962     case i_BFEXTS:
963     case i_BFEXTU:
964     case i_BFFFO:
965     case i_BFINS:
966     case i_BFSET:
967     case i_BFTST:
968     ext = comp_get_iword((m68k_pc_offset+=2)-2);
969     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
970     if (ext & 0x020) ru_set_read(ru, ext & 7);
971     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
972     if (dp->dmode == Dreg)
973     ru_set_read(ru, dp->dreg);
974     switch (dp->mnemo) {
975     case i_BFEXTS:
976     case i_BFEXTU:
977     case i_BFFFO:
978     ru_set_write(ru, (ext >> 12) & 7);
979     break;
980     case i_BFINS:
981     ru_set_read(ru, (ext >> 12) & 7);
982     /* fall through */
983     case i_BFCHG:
984     case i_BFCLR:
985     case i_BSET:
986     if (dp->dmode == Dreg)
987     ru_set_write(ru, dp->dreg);
988     break;
989     }
990     handled = true;
991     rw_dest = false;
992     break;
993    
994     case i_BTST:
995     rw_dest = false;
996     break;
997    
998     case i_CAS:
999     {
1000     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1001     int Du = ext & 7;
1002     ru_set_read(ru, Du);
1003     int Dc = (ext >> 6) & 7;
1004     ru_set_read(ru, Dc);
1005     ru_set_write(ru, Dc);
1006     break;
1007     }
1008     case i_CAS2:
1009     {
1010     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1011     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1012     Rn1 = (ext >> 12) & 15;
1013     Du1 = (ext >> 6) & 7;
1014     Dc1 = ext & 7;
1015     ru_set_read(ru, Rn1);
1016     ru_set_read(ru, Du1);
1017     ru_set_read(ru, Dc1);
1018     ru_set_write(ru, Dc1);
1019     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1020     Rn2 = (ext >> 12) & 15;
1021     Du2 = (ext >> 6) & 7;
1022     Dc2 = ext & 7;
1023     ru_set_read(ru, Rn2);
1024     ru_set_read(ru, Du2);
1025     ru_set_write(ru, Dc2);
1026     break;
1027     }
1028     case i_DIVL: case i_MULL:
1029     m68k_pc_offset += 2;
1030     break;
1031     case i_LEA:
1032     case i_MOVE: case i_MOVEA: case i_MOVE16:
1033     rw_dest = false;
1034     break;
1035     case i_PACK: case i_UNPK:
1036     rw_dest = false;
1037     m68k_pc_offset += 2;
1038     break;
1039     case i_TRAPcc:
1040     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1041     break;
1042     case i_RTR:
1043     /* do nothing, just for coverage debugging */
1044     break;
1045     /* TODO: handle EXG instruction */
1046     }
1047    
1048     /* Handle A-Traps better */
1049     if ((real_opcode & 0xf000) == 0xa000) {
1050     handled = true;
1051     }
1052    
1053     /* Handle EmulOps better */
1054     if ((real_opcode & 0xff00) == 0x7100) {
1055     handled = true;
1056     ru->rmask = 0xffff;
1057     ru->wmask = 0;
1058     }
1059    
1060     if (dp->suse && !handled)
1061     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1062    
1063     if (dp->duse && !handled)
1064     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1065    
1066     if (rw_dest)
1067     ru->rmask |= ru->wmask;
1068    
1069     handled = handled || dp->suse || dp->duse;
1070    
1071     /* Mark all registers as used/written if the instruction may trap */
1072     if (may_trap(opcode)) {
1073     handled = true;
1074     ru->rmask = 0xffff;
1075     ru->wmask = 0xffff;
1076     }
1077    
1078     if (!handled) {
1079     write_log("ru_fill: %04x = { %04x, %04x }\n",
1080     real_opcode, ru->rmask, ru->wmask);
1081     abort();
1082     }
1083     }
1084    
1085     /********************************************************************
1086 gbeauche 1.1 * register allocation per block logging *
1087     ********************************************************************/
1088    
1089     static uae_s8 vstate[VREGS];
1090     static uae_s8 vwritten[VREGS];
1091     static uae_s8 nstate[N_REGS];
1092    
1093     #define L_UNKNOWN -127
1094     #define L_UNAVAIL -1
1095     #define L_NEEDED -2
1096     #define L_UNNEEDED -3
1097    
1098     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1099     {
1100     int i;
1101    
1102     for (i = 0; i < VREGS; i++)
1103     s->virt[i] = vstate[i];
1104     for (i = 0; i < N_REGS; i++)
1105     s->nat[i] = nstate[i];
1106     }
1107    
1108     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1109     {
1110     int i;
1111     int reverse = 0;
1112    
1113     for (i = 0; i < VREGS; i++) {
1114     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1115     return 1;
1116     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1117     reverse++;
1118     }
1119     for (i = 0; i < N_REGS; i++) {
1120     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1121     return 1;
1122     if (nstate[i] < 0 && s->nat[i] >= 0)
1123     reverse++;
1124     }
1125     if (reverse >= 2 && USE_MATCH)
1126     return 1; /* In this case, it might be worth recompiling the
1127     * callers */
1128     return 0;
1129     }
1130    
1131     static __inline__ void log_startblock(void)
1132     {
1133     int i;
1134    
1135     for (i = 0; i < VREGS; i++) {
1136     vstate[i] = L_UNKNOWN;
1137     vwritten[i] = 0;
1138     }
1139     for (i = 0; i < N_REGS; i++)
1140     nstate[i] = L_UNKNOWN;
1141     }
1142    
1143     /* Using an n-reg for a temp variable */
1144     static __inline__ void log_isused(int n)
1145     {
1146     if (nstate[n] == L_UNKNOWN)
1147     nstate[n] = L_UNAVAIL;
1148     }
1149    
1150     static __inline__ void log_visused(int r)
1151     {
1152     if (vstate[r] == L_UNKNOWN)
1153     vstate[r] = L_NEEDED;
1154     }
1155    
1156     static __inline__ void do_load_reg(int n, int r)
1157     {
1158     if (r == FLAGTMP)
1159     raw_load_flagreg(n, r);
1160     else if (r == FLAGX)
1161     raw_load_flagx(n, r);
1162     else
1163 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1164 gbeauche 1.1 }
1165    
1166     static __inline__ void check_load_reg(int n, int r)
1167     {
1168 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1169 gbeauche 1.1 }
1170    
1171     static __inline__ void log_vwrite(int r)
1172     {
1173     vwritten[r] = 1;
1174     }
1175    
1176     /* Using an n-reg to hold a v-reg */
1177     static __inline__ void log_isreg(int n, int r)
1178     {
1179     static int count = 0;
1180    
1181     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1182     nstate[n] = r;
1183     else {
1184     do_load_reg(n, r);
1185     if (nstate[n] == L_UNKNOWN)
1186     nstate[n] = L_UNAVAIL;
1187     }
1188     if (vstate[r] == L_UNKNOWN)
1189     vstate[r] = L_NEEDED;
1190     }
1191    
1192     static __inline__ void log_clobberreg(int r)
1193     {
1194     if (vstate[r] == L_UNKNOWN)
1195     vstate[r] = L_UNNEEDED;
1196     }
1197    
1198     /* This ends all possibility of clever register allocation */
1199    
1200     static __inline__ void log_flush(void)
1201     {
1202     int i;
1203    
1204     for (i = 0; i < VREGS; i++)
1205     if (vstate[i] == L_UNKNOWN)
1206     vstate[i] = L_NEEDED;
1207     for (i = 0; i < N_REGS; i++)
1208     if (nstate[i] == L_UNKNOWN)
1209     nstate[i] = L_UNAVAIL;
1210     }
1211    
1212     static __inline__ void log_dump(void)
1213     {
1214     int i;
1215    
1216     return;
1217    
1218     write_log("----------------------\n");
1219     for (i = 0; i < N_REGS; i++) {
1220     switch (nstate[i]) {
1221     case L_UNKNOWN:
1222     write_log("Nat %d : UNKNOWN\n", i);
1223     break;
1224     case L_UNAVAIL:
1225     write_log("Nat %d : UNAVAIL\n", i);
1226     break;
1227     default:
1228     write_log("Nat %d : %d\n", i, nstate[i]);
1229     break;
1230     }
1231     }
1232     for (i = 0; i < VREGS; i++) {
1233     if (vstate[i] == L_UNNEEDED)
1234     write_log("Virt %d: UNNEEDED\n", i);
1235     }
1236     }
1237    
1238     /********************************************************************
1239     * register status handling. EMIT TIME! *
1240     ********************************************************************/
1241    
1242     static __inline__ void set_status(int r, int status)
1243     {
1244     if (status == ISCONST)
1245     log_clobberreg(r);
1246     live.state[r].status=status;
1247     }
1248    
1249     static __inline__ int isinreg(int r)
1250     {
1251     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1252     }
1253    
1254     static __inline__ void adjust_nreg(int r, uae_u32 val)
1255     {
1256     if (!val)
1257     return;
1258     raw_lea_l_brr(r,r,val);
1259     }
1260    
1261     static void tomem(int r)
1262     {
1263     int rr=live.state[r].realreg;
1264    
1265     if (isinreg(r)) {
1266     if (live.state[r].val && live.nat[rr].nholds==1
1267     && !live.nat[rr].locked) {
1268     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1269     // live.state[r].val,r,rr,target);
1270     adjust_nreg(rr,live.state[r].val);
1271     live.state[r].val=0;
1272     live.state[r].dirtysize=4;
1273     set_status(r,DIRTY);
1274     }
1275     }
1276    
1277     if (live.state[r].status==DIRTY) {
1278     switch (live.state[r].dirtysize) {
1279 gbeauche 1.24 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1280     case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1281     case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1282 gbeauche 1.1 default: abort();
1283     }
1284     log_vwrite(r);
1285     set_status(r,CLEAN);
1286     live.state[r].dirtysize=0;
1287     }
1288     }
1289    
1290     static __inline__ int isconst(int r)
1291     {
1292     return live.state[r].status==ISCONST;
1293     }
1294    
1295     int is_const(int r)
1296     {
1297     return isconst(r);
1298     }
1299    
1300     static __inline__ void writeback_const(int r)
1301     {
1302     if (!isconst(r))
1303     return;
1304     Dif (live.state[r].needflush==NF_HANDLER) {
1305     write_log("Trying to write back constant NF_HANDLER!\n");
1306     abort();
1307     }
1308    
1309 gbeauche 1.24 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1310 gbeauche 1.1 log_vwrite(r);
1311     live.state[r].val=0;
1312     set_status(r,INMEM);
1313     }
1314    
1315     static __inline__ void tomem_c(int r)
1316     {
1317     if (isconst(r)) {
1318     writeback_const(r);
1319     }
1320     else
1321     tomem(r);
1322     }
1323    
1324     static void evict(int r)
1325     {
1326     int rr;
1327    
1328     if (!isinreg(r))
1329     return;
1330     tomem(r);
1331     rr=live.state[r].realreg;
1332    
1333     Dif (live.nat[rr].locked &&
1334     live.nat[rr].nholds==1) {
1335     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1336     abort();
1337     }
1338    
1339     live.nat[rr].nholds--;
1340     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1341     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1342     int thisind=live.state[r].realind;
1343    
1344     live.nat[rr].holds[thisind]=topreg;
1345     live.state[topreg].realind=thisind;
1346     }
1347     live.state[r].realreg=-1;
1348     set_status(r,INMEM);
1349     }
1350    
1351     static __inline__ void free_nreg(int r)
1352     {
1353     int i=live.nat[r].nholds;
1354    
1355     while (i) {
1356     int vr;
1357    
1358     --i;
1359     vr=live.nat[r].holds[i];
1360     evict(vr);
1361     }
1362     Dif (live.nat[r].nholds!=0) {
1363     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1364     abort();
1365     }
1366     }
1367    
1368     /* Use with care! */
1369     static __inline__ void isclean(int r)
1370     {
1371     if (!isinreg(r))
1372     return;
1373     live.state[r].validsize=4;
1374     live.state[r].dirtysize=0;
1375     live.state[r].val=0;
1376     set_status(r,CLEAN);
1377     }
1378    
1379     static __inline__ void disassociate(int r)
1380     {
1381     isclean(r);
1382     evict(r);
1383     }
1384    
1385     static __inline__ void set_const(int r, uae_u32 val)
1386     {
1387     disassociate(r);
1388     live.state[r].val=val;
1389     set_status(r,ISCONST);
1390     }
1391    
1392     static __inline__ uae_u32 get_offset(int r)
1393     {
1394     return live.state[r].val;
1395     }
1396    
1397     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1398     {
1399     int bestreg;
1400     uae_s32 when;
1401     int i;
1402     uae_s32 badness=0; /* to shut up gcc */
1403     bestreg=-1;
1404     when=2000000000;
1405    
1406     for (i=N_REGS;i--;) {
1407     badness=live.nat[i].touched;
1408     if (live.nat[i].nholds==0)
1409     badness=0;
1410     if (i==hint)
1411     badness-=200000000;
1412     if (!live.nat[i].locked && badness<when) {
1413     if ((size==1 && live.nat[i].canbyte) ||
1414     (size==2 && live.nat[i].canword) ||
1415     (size==4)) {
1416     bestreg=i;
1417     when=badness;
1418     if (live.nat[i].nholds==0 && hint<0)
1419     break;
1420     if (i==hint)
1421     break;
1422     }
1423     }
1424     }
1425     Dif (bestreg==-1)
1426     abort();
1427    
1428     if (live.nat[bestreg].nholds>0) {
1429     free_nreg(bestreg);
1430     }
1431     if (isinreg(r)) {
1432     int rr=live.state[r].realreg;
1433     /* This will happen if we read a partially dirty register at a
1434     bigger size */
1435     Dif (willclobber || live.state[r].validsize>=size)
1436     abort();
1437     Dif (live.nat[rr].nholds!=1)
1438     abort();
1439     if (size==4 && live.state[r].validsize==2) {
1440     log_isused(bestreg);
1441     log_visused(r);
1442 gbeauche 1.24 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1443 gbeauche 1.1 raw_bswap_32(bestreg);
1444     raw_zero_extend_16_rr(rr,rr);
1445     raw_zero_extend_16_rr(bestreg,bestreg);
1446     raw_bswap_32(bestreg);
1447     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1448     live.state[r].validsize=4;
1449     live.nat[rr].touched=touchcnt++;
1450     return rr;
1451     }
1452     if (live.state[r].validsize==1) {
1453     /* Nothing yet */
1454     }
1455     evict(r);
1456     }
1457    
1458     if (!willclobber) {
1459     if (live.state[r].status!=UNDEF) {
1460     if (isconst(r)) {
1461     raw_mov_l_ri(bestreg,live.state[r].val);
1462     live.state[r].val=0;
1463     live.state[r].dirtysize=4;
1464     set_status(r,DIRTY);
1465     log_isused(bestreg);
1466     }
1467     else {
1468     log_isreg(bestreg, r); /* This will also load it! */
1469     live.state[r].dirtysize=0;
1470     set_status(r,CLEAN);
1471     }
1472     }
1473     else {
1474     live.state[r].val=0;
1475     live.state[r].dirtysize=0;
1476     set_status(r,CLEAN);
1477     log_isused(bestreg);
1478     }
1479     live.state[r].validsize=4;
1480     }
1481     else { /* this is the easiest way, but not optimal. FIXME! */
1482     /* Now it's trickier, but hopefully still OK */
1483     if (!isconst(r) || size==4) {
1484     live.state[r].validsize=size;
1485     live.state[r].dirtysize=size;
1486     live.state[r].val=0;
1487     set_status(r,DIRTY);
1488     if (size == 4) {
1489     log_clobberreg(r);
1490     log_isused(bestreg);
1491     }
1492     else {
1493     log_visused(r);
1494     log_isused(bestreg);
1495     }
1496     }
1497     else {
1498     if (live.state[r].status!=UNDEF)
1499     raw_mov_l_ri(bestreg,live.state[r].val);
1500     live.state[r].val=0;
1501     live.state[r].validsize=4;
1502     live.state[r].dirtysize=4;
1503     set_status(r,DIRTY);
1504     log_isused(bestreg);
1505     }
1506     }
1507     live.state[r].realreg=bestreg;
1508     live.state[r].realind=live.nat[bestreg].nholds;
1509     live.nat[bestreg].touched=touchcnt++;
1510     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1511     live.nat[bestreg].nholds++;
1512    
1513     return bestreg;
1514     }
1515    
1516     static int alloc_reg(int r, int size, int willclobber)
1517     {
1518     return alloc_reg_hinted(r,size,willclobber,-1);
1519     }
1520    
1521     static void unlock2(int r)
1522     {
1523     Dif (!live.nat[r].locked)
1524     abort();
1525     live.nat[r].locked--;
1526     }
1527    
1528     static void setlock(int r)
1529     {
1530     live.nat[r].locked++;
1531     }
1532    
1533    
1534     static void mov_nregs(int d, int s)
1535     {
1536     int ns=live.nat[s].nholds;
1537     int nd=live.nat[d].nholds;
1538     int i;
1539    
1540     if (s==d)
1541     return;
1542    
1543     if (nd>0)
1544     free_nreg(d);
1545    
1546     log_isused(d);
1547     raw_mov_l_rr(d,s);
1548    
1549     for (i=0;i<live.nat[s].nholds;i++) {
1550     int vs=live.nat[s].holds[i];
1551    
1552     live.state[vs].realreg=d;
1553     live.state[vs].realind=i;
1554     live.nat[d].holds[i]=vs;
1555     }
1556     live.nat[d].nholds=live.nat[s].nholds;
1557    
1558     live.nat[s].nholds=0;
1559     }
1560    
1561    
1562     static __inline__ void make_exclusive(int r, int size, int spec)
1563     {
1564     int clobber;
1565     reg_status oldstate;
1566     int rr=live.state[r].realreg;
1567     int nr;
1568     int nind;
1569     int ndirt=0;
1570     int i;
1571    
1572     if (!isinreg(r))
1573     return;
1574     if (live.nat[rr].nholds==1)
1575     return;
1576     for (i=0;i<live.nat[rr].nholds;i++) {
1577     int vr=live.nat[rr].holds[i];
1578     if (vr!=r &&
1579     (live.state[vr].status==DIRTY || live.state[vr].val))
1580     ndirt++;
1581     }
1582     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1583     /* Everything else is clean, so let's keep this register */
1584     for (i=0;i<live.nat[rr].nholds;i++) {
1585     int vr=live.nat[rr].holds[i];
1586     if (vr!=r) {
1587     evict(vr);
1588     i--; /* Try that index again! */
1589     }
1590     }
1591     Dif (live.nat[rr].nholds!=1) {
1592     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1593     rr,live.nat[rr].nholds,r);
1594     abort();
1595     }
1596     return;
1597     }
1598    
1599     /* We have to split the register */
1600     oldstate=live.state[r];
1601    
1602     setlock(rr); /* Make sure this doesn't go away */
1603     /* Forget about r being in the register rr */
1604     disassociate(r);
1605     /* Get a new register, that we will clobber completely */
1606     if (oldstate.status==DIRTY) {
1607     /* If dirtysize is <4, we need a register that can handle the
1608     eventual smaller memory store! Thanks to Quake68k for exposing
1609     this detail ;-) */
1610     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1611     }
1612     else {
1613     nr=alloc_reg_hinted(r,4,1,spec);
1614     }
1615     nind=live.state[r].realind;
1616     live.state[r]=oldstate; /* Keep all the old state info */
1617     live.state[r].realreg=nr;
1618     live.state[r].realind=nind;
1619    
1620     if (size<live.state[r].validsize) {
1621     if (live.state[r].val) {
1622     /* Might as well compensate for the offset now */
1623     raw_lea_l_brr(nr,rr,oldstate.val);
1624     live.state[r].val=0;
1625     live.state[r].dirtysize=4;
1626     set_status(r,DIRTY);
1627     }
1628     else
1629     raw_mov_l_rr(nr,rr); /* Make another copy */
1630     }
1631     unlock2(rr);
1632     }
1633    
1634     static __inline__ void add_offset(int r, uae_u32 off)
1635     {
1636     live.state[r].val+=off;
1637     }
1638    
1639     static __inline__ void remove_offset(int r, int spec)
1640     {
1641     reg_status oldstate;
1642     int rr;
1643    
1644     if (isconst(r))
1645     return;
1646     if (live.state[r].val==0)
1647     return;
1648     if (isinreg(r) && live.state[r].validsize<4)
1649     evict(r);
1650    
1651     if (!isinreg(r))
1652     alloc_reg_hinted(r,4,0,spec);
1653    
1654     Dif (live.state[r].validsize!=4) {
1655     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1656     abort();
1657     }
1658     make_exclusive(r,0,-1);
1659     /* make_exclusive might have done the job already */
1660     if (live.state[r].val==0)
1661     return;
1662    
1663     rr=live.state[r].realreg;
1664    
1665     if (live.nat[rr].nholds==1) {
1666     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1667     // live.state[r].val,r,rr,target);
1668     adjust_nreg(rr,live.state[r].val);
1669     live.state[r].dirtysize=4;
1670     live.state[r].val=0;
1671     set_status(r,DIRTY);
1672     return;
1673     }
1674     write_log("Failed in remove_offset\n");
1675     abort();
1676     }
1677    
1678     static __inline__ void remove_all_offsets(void)
1679     {
1680     int i;
1681    
1682     for (i=0;i<VREGS;i++)
1683     remove_offset(i,-1);
1684     }
1685    
1686 gbeauche 1.28 static inline void flush_reg_count(void)
1687     {
1688     #if RECORD_REGISTER_USAGE
1689     for (int r = 0; r < 16; r++)
1690     if (reg_count_local[r])
1691     ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1692     #endif
1693     }
1694    
1695     static inline void record_register(int r)
1696     {
1697     #if RECORD_REGISTER_USAGE
1698     if (r < 16)
1699     reg_count_local[r]++;
1700     #endif
1701     }
1702    
1703 gbeauche 1.1 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1704     {
1705     int n;
1706     int answer=-1;
1707    
1708 gbeauche 1.28 record_register(r);
1709 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1710     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1711     }
1712     if (!can_offset)
1713     remove_offset(r,spec);
1714    
1715     if (isinreg(r) && live.state[r].validsize>=size) {
1716     n=live.state[r].realreg;
1717     switch(size) {
1718     case 1:
1719     if (live.nat[n].canbyte || spec>=0) {
1720     answer=n;
1721     }
1722     break;
1723     case 2:
1724     if (live.nat[n].canword || spec>=0) {
1725     answer=n;
1726     }
1727     break;
1728     case 4:
1729     answer=n;
1730     break;
1731     default: abort();
1732     }
1733     if (answer<0)
1734     evict(r);
1735     }
1736     /* either the value was in memory to start with, or it was evicted and
1737     is in memory now */
1738     if (answer<0) {
1739     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1740     }
1741    
1742     if (spec>=0 && spec!=answer) {
1743     /* Too bad */
1744     mov_nregs(spec,answer);
1745     answer=spec;
1746     }
1747     live.nat[answer].locked++;
1748     live.nat[answer].touched=touchcnt++;
1749     return answer;
1750     }
1751    
1752    
1753    
1754     static int readreg(int r, int size)
1755     {
1756     return readreg_general(r,size,-1,0);
1757     }
1758    
1759     static int readreg_specific(int r, int size, int spec)
1760     {
1761     return readreg_general(r,size,spec,0);
1762     }
1763    
1764     static int readreg_offset(int r, int size)
1765     {
1766     return readreg_general(r,size,-1,1);
1767     }
1768    
1769     /* writereg_general(r, size, spec)
1770     *
1771     * INPUT
1772     * - r : mid-layer register
1773     * - size : requested size (1/2/4)
1774     * - spec : -1 if find or make a register free, otherwise specifies
1775     * the physical register to use in any case
1776     *
1777     * OUTPUT
1778     * - hard (physical, x86 here) register allocated to virtual register r
1779     */
1780     static __inline__ int writereg_general(int r, int size, int spec)
1781     {
1782     int n;
1783     int answer=-1;
1784    
1785 gbeauche 1.28 record_register(r);
1786 gbeauche 1.1 if (size<4) {
1787     remove_offset(r,spec);
1788     }
1789    
1790     make_exclusive(r,size,spec);
1791     if (isinreg(r)) {
1792     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1793     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1794     n=live.state[r].realreg;
1795    
1796     Dif (live.nat[n].nholds!=1)
1797     abort();
1798     switch(size) {
1799     case 1:
1800     if (live.nat[n].canbyte || spec>=0) {
1801     live.state[r].dirtysize=ndsize;
1802     live.state[r].validsize=nvsize;
1803     answer=n;
1804     }
1805     break;
1806     case 2:
1807     if (live.nat[n].canword || spec>=0) {
1808     live.state[r].dirtysize=ndsize;
1809     live.state[r].validsize=nvsize;
1810     answer=n;
1811     }
1812     break;
1813     case 4:
1814     live.state[r].dirtysize=ndsize;
1815     live.state[r].validsize=nvsize;
1816     answer=n;
1817     break;
1818     default: abort();
1819     }
1820     if (answer<0)
1821     evict(r);
1822     }
1823     /* either the value was in memory to start with, or it was evicted and
1824     is in memory now */
1825     if (answer<0) {
1826     answer=alloc_reg_hinted(r,size,1,spec);
1827     }
1828     if (spec>=0 && spec!=answer) {
1829     mov_nregs(spec,answer);
1830     answer=spec;
1831     }
1832     if (live.state[r].status==UNDEF)
1833     live.state[r].validsize=4;
1834     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1835     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1836    
1837     live.nat[answer].locked++;
1838     live.nat[answer].touched=touchcnt++;
1839     if (size==4) {
1840     live.state[r].val=0;
1841     }
1842     else {
1843     Dif (live.state[r].val) {
1844     write_log("Problem with val\n");
1845     abort();
1846     }
1847     }
1848     set_status(r,DIRTY);
1849     return answer;
1850     }
1851    
1852     static int writereg(int r, int size)
1853     {
1854     return writereg_general(r,size,-1);
1855     }
1856    
1857     static int writereg_specific(int r, int size, int spec)
1858     {
1859     return writereg_general(r,size,spec);
1860     }
1861    
1862     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1863     {
1864     int n;
1865     int answer=-1;
1866    
1867 gbeauche 1.28 record_register(r);
1868 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1869     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1870     }
1871     remove_offset(r,spec);
1872     make_exclusive(r,0,spec);
1873    
1874     Dif (wsize<rsize) {
1875     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1876     abort();
1877     }
1878     if (isinreg(r) && live.state[r].validsize>=rsize) {
1879     n=live.state[r].realreg;
1880     Dif (live.nat[n].nholds!=1)
1881     abort();
1882    
1883     switch(rsize) {
1884     case 1:
1885     if (live.nat[n].canbyte || spec>=0) {
1886     answer=n;
1887     }
1888     break;
1889     case 2:
1890     if (live.nat[n].canword || spec>=0) {
1891     answer=n;
1892     }
1893     break;
1894     case 4:
1895     answer=n;
1896     break;
1897     default: abort();
1898     }
1899     if (answer<0)
1900     evict(r);
1901     }
1902     /* either the value was in memory to start with, or it was evicted and
1903     is in memory now */
1904     if (answer<0) {
1905     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1906     }
1907    
1908     if (spec>=0 && spec!=answer) {
1909     /* Too bad */
1910     mov_nregs(spec,answer);
1911     answer=spec;
1912     }
1913     if (wsize>live.state[r].dirtysize)
1914     live.state[r].dirtysize=wsize;
1915     if (wsize>live.state[r].validsize)
1916     live.state[r].validsize=wsize;
1917     set_status(r,DIRTY);
1918    
1919     live.nat[answer].locked++;
1920     live.nat[answer].touched=touchcnt++;
1921    
1922     Dif (live.state[r].val) {
1923     write_log("Problem with val(rmw)\n");
1924     abort();
1925     }
1926     return answer;
1927     }
1928    
1929     static int rmw(int r, int wsize, int rsize)
1930     {
1931     return rmw_general(r,wsize,rsize,-1);
1932     }
1933    
1934     static int rmw_specific(int r, int wsize, int rsize, int spec)
1935     {
1936     return rmw_general(r,wsize,rsize,spec);
1937     }
1938    
1939    
1940     /* needed for restoring the carry flag on non-P6 cores */
1941     static void bt_l_ri_noclobber(R4 r, IMM i)
1942     {
1943     int size=4;
1944     if (i<16)
1945     size=2;
1946     r=readreg(r,size);
1947     raw_bt_l_ri(r,i);
1948     unlock2(r);
1949     }
1950    
1951     /********************************************************************
1952     * FPU register status handling. EMIT TIME! *
1953     ********************************************************************/
1954    
1955     static void f_tomem(int r)
1956     {
1957     if (live.fate[r].status==DIRTY) {
1958     #if USE_LONG_DOUBLE
1959 gbeauche 1.24 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1960 gbeauche 1.1 #else
1961 gbeauche 1.24 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1962 gbeauche 1.1 #endif
1963     live.fate[r].status=CLEAN;
1964     }
1965     }
1966    
1967     static void f_tomem_drop(int r)
1968     {
1969     if (live.fate[r].status==DIRTY) {
1970     #if USE_LONG_DOUBLE
1971 gbeauche 1.24 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1972 gbeauche 1.1 #else
1973 gbeauche 1.24 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1974 gbeauche 1.1 #endif
1975     live.fate[r].status=INMEM;
1976     }
1977     }
1978    
1979    
1980     static __inline__ int f_isinreg(int r)
1981     {
1982     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1983     }
1984    
1985     static void f_evict(int r)
1986     {
1987     int rr;
1988    
1989     if (!f_isinreg(r))
1990     return;
1991     rr=live.fate[r].realreg;
1992     if (live.fat[rr].nholds==1)
1993     f_tomem_drop(r);
1994     else
1995     f_tomem(r);
1996    
1997     Dif (live.fat[rr].locked &&
1998     live.fat[rr].nholds==1) {
1999     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2000     abort();
2001     }
2002    
2003     live.fat[rr].nholds--;
2004     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2005     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2006     int thisind=live.fate[r].realind;
2007     live.fat[rr].holds[thisind]=topreg;
2008     live.fate[topreg].realind=thisind;
2009     }
2010     live.fate[r].status=INMEM;
2011     live.fate[r].realreg=-1;
2012     }
2013    
2014     static __inline__ void f_free_nreg(int r)
2015     {
2016     int i=live.fat[r].nholds;
2017    
2018     while (i) {
2019     int vr;
2020    
2021     --i;
2022     vr=live.fat[r].holds[i];
2023     f_evict(vr);
2024     }
2025     Dif (live.fat[r].nholds!=0) {
2026     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2027     abort();
2028     }
2029     }
2030    
2031    
2032     /* Use with care! */
2033     static __inline__ void f_isclean(int r)
2034     {
2035     if (!f_isinreg(r))
2036     return;
2037     live.fate[r].status=CLEAN;
2038     }
2039    
2040     static __inline__ void f_disassociate(int r)
2041     {
2042     f_isclean(r);
2043     f_evict(r);
2044     }
2045    
2046    
2047    
2048     static int f_alloc_reg(int r, int willclobber)
2049     {
2050     int bestreg;
2051     uae_s32 when;
2052     int i;
2053     uae_s32 badness;
2054     bestreg=-1;
2055     when=2000000000;
2056     for (i=N_FREGS;i--;) {
2057     badness=live.fat[i].touched;
2058     if (live.fat[i].nholds==0)
2059     badness=0;
2060    
2061     if (!live.fat[i].locked && badness<when) {
2062     bestreg=i;
2063     when=badness;
2064     if (live.fat[i].nholds==0)
2065     break;
2066     }
2067     }
2068     Dif (bestreg==-1)
2069     abort();
2070    
2071     if (live.fat[bestreg].nholds>0) {
2072     f_free_nreg(bestreg);
2073     }
2074     if (f_isinreg(r)) {
2075     f_evict(r);
2076     }
2077    
2078     if (!willclobber) {
2079     if (live.fate[r].status!=UNDEF) {
2080     #if USE_LONG_DOUBLE
2081 gbeauche 1.24 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2082 gbeauche 1.1 #else
2083 gbeauche 1.24 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2084 gbeauche 1.1 #endif
2085     }
2086     live.fate[r].status=CLEAN;
2087     }
2088     else {
2089     live.fate[r].status=DIRTY;
2090     }
2091     live.fate[r].realreg=bestreg;
2092     live.fate[r].realind=live.fat[bestreg].nholds;
2093     live.fat[bestreg].touched=touchcnt++;
2094     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2095     live.fat[bestreg].nholds++;
2096    
2097     return bestreg;
2098     }
2099    
2100     static void f_unlock(int r)
2101     {
2102     Dif (!live.fat[r].locked)
2103     abort();
2104     live.fat[r].locked--;
2105     }
2106    
2107     static void f_setlock(int r)
2108     {
2109     live.fat[r].locked++;
2110     }
2111    
2112     static __inline__ int f_readreg(int r)
2113     {
2114     int n;
2115     int answer=-1;
2116    
2117     if (f_isinreg(r)) {
2118     n=live.fate[r].realreg;
2119     answer=n;
2120     }
2121     /* either the value was in memory to start with, or it was evicted and
2122     is in memory now */
2123     if (answer<0)
2124     answer=f_alloc_reg(r,0);
2125    
2126     live.fat[answer].locked++;
2127     live.fat[answer].touched=touchcnt++;
2128     return answer;
2129     }
2130    
2131     static __inline__ void f_make_exclusive(int r, int clobber)
2132     {
2133     freg_status oldstate;
2134     int rr=live.fate[r].realreg;
2135     int nr;
2136     int nind;
2137     int ndirt=0;
2138     int i;
2139    
2140     if (!f_isinreg(r))
2141     return;
2142     if (live.fat[rr].nholds==1)
2143     return;
2144     for (i=0;i<live.fat[rr].nholds;i++) {
2145     int vr=live.fat[rr].holds[i];
2146     if (vr!=r && live.fate[vr].status==DIRTY)
2147     ndirt++;
2148     }
2149     if (!ndirt && !live.fat[rr].locked) {
2150     /* Everything else is clean, so let's keep this register */
2151     for (i=0;i<live.fat[rr].nholds;i++) {
2152     int vr=live.fat[rr].holds[i];
2153     if (vr!=r) {
2154     f_evict(vr);
2155     i--; /* Try that index again! */
2156     }
2157     }
2158     Dif (live.fat[rr].nholds!=1) {
2159     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2160     for (i=0;i<live.fat[rr].nholds;i++) {
2161     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2162     live.fate[live.fat[rr].holds[i]].realreg,
2163     live.fate[live.fat[rr].holds[i]].realind);
2164     }
2165     write_log("\n");
2166     abort();
2167     }
2168     return;
2169     }
2170    
2171     /* We have to split the register */
2172     oldstate=live.fate[r];
2173    
2174     f_setlock(rr); /* Make sure this doesn't go away */
2175     /* Forget about r being in the register rr */
2176     f_disassociate(r);
2177     /* Get a new register, that we will clobber completely */
2178     nr=f_alloc_reg(r,1);
2179     nind=live.fate[r].realind;
2180     if (!clobber)
2181     raw_fmov_rr(nr,rr); /* Make another copy */
2182     live.fate[r]=oldstate; /* Keep all the old state info */
2183     live.fate[r].realreg=nr;
2184     live.fate[r].realind=nind;
2185     f_unlock(rr);
2186     }
2187    
2188    
2189     static __inline__ int f_writereg(int r)
2190     {
2191     int n;
2192     int answer=-1;
2193    
2194     f_make_exclusive(r,1);
2195     if (f_isinreg(r)) {
2196     n=live.fate[r].realreg;
2197     answer=n;
2198     }
2199     if (answer<0) {
2200     answer=f_alloc_reg(r,1);
2201     }
2202     live.fate[r].status=DIRTY;
2203     live.fat[answer].locked++;
2204     live.fat[answer].touched=touchcnt++;
2205     return answer;
2206     }
2207    
2208     static int f_rmw(int r)
2209     {
2210     int n;
2211    
2212     f_make_exclusive(r,0);
2213     if (f_isinreg(r)) {
2214     n=live.fate[r].realreg;
2215     }
2216     else
2217     n=f_alloc_reg(r,0);
2218     live.fate[r].status=DIRTY;
2219     live.fat[n].locked++;
2220     live.fat[n].touched=touchcnt++;
2221     return n;
2222     }
2223    
2224     static void fflags_into_flags_internal(uae_u32 tmp)
2225     {
2226     int r;
2227    
2228     clobber_flags();
2229     r=f_readreg(FP_RESULT);
2230     if (FFLAG_NREG_CLOBBER_CONDITION) {
2231     int tmp2=tmp;
2232     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2233     raw_fflags_into_flags(r);
2234     unlock2(tmp);
2235     forget_about(tmp2);
2236     }
2237     else
2238     raw_fflags_into_flags(r);
2239     f_unlock(r);
2240 gbeauche 1.19 live_flags();
2241 gbeauche 1.1 }
2242    
2243    
2244    
2245    
2246     /********************************************************************
2247     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2248     ********************************************************************/
2249    
2250     /*
2251     * RULES FOR HANDLING REGISTERS:
2252     *
2253     * * In the function headers, order the parameters
2254     * - 1st registers written to
2255     * - 2nd read/modify/write registers
2256     * - 3rd registers read from
2257     * * Before calling raw_*, you must call readreg, writereg or rmw for
2258     * each register
2259     * * The order for this is
2260     * - 1st call remove_offset for all registers written to with size<4
2261     * - 2nd call readreg for all registers read without offset
2262     * - 3rd call rmw for all rmw registers
2263     * - 4th call readreg_offset for all registers that can handle offsets
2264     * - 5th call get_offset for all the registers from the previous step
2265     * - 6th call writereg for all written-to registers
2266     * - 7th call raw_*
2267     * - 8th unlock2 all registers that were locked
2268     */
2269    
2270     MIDFUNC(0,live_flags,(void))
2271     {
2272     live.flags_on_stack=TRASH;
2273     live.flags_in_flags=VALID;
2274     live.flags_are_important=1;
2275     }
2276     MENDFUNC(0,live_flags,(void))
2277    
2278     MIDFUNC(0,dont_care_flags,(void))
2279     {
2280     live.flags_are_important=0;
2281     }
2282     MENDFUNC(0,dont_care_flags,(void))
2283    
2284    
2285     MIDFUNC(0,duplicate_carry,(void))
2286     {
2287     evict(FLAGX);
2288     make_flags_live_internal();
2289 gbeauche 1.24 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2290 gbeauche 1.1 log_vwrite(FLAGX);
2291     }
2292     MENDFUNC(0,duplicate_carry,(void))
2293    
2294     MIDFUNC(0,restore_carry,(void))
2295     {
2296     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2297     bt_l_ri_noclobber(FLAGX,0);
2298     }
2299     else { /* Avoid the stall the above creates.
2300     This is slow on non-P6, though.
2301     */
2302     COMPCALL(rol_b_ri(FLAGX,8));
2303     isclean(FLAGX);
2304     }
2305     }
2306     MENDFUNC(0,restore_carry,(void))
2307    
2308     MIDFUNC(0,start_needflags,(void))
2309     {
2310     needflags=1;
2311     }
2312     MENDFUNC(0,start_needflags,(void))
2313    
2314     MIDFUNC(0,end_needflags,(void))
2315     {
2316     needflags=0;
2317     }
2318     MENDFUNC(0,end_needflags,(void))
2319    
2320     MIDFUNC(0,make_flags_live,(void))
2321     {
2322     make_flags_live_internal();
2323     }
2324     MENDFUNC(0,make_flags_live,(void))
2325    
2326     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2327     {
2328     clobber_flags();
2329     fflags_into_flags_internal(tmp);
2330     }
2331     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2332    
2333    
2334     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2335     {
2336     int size=4;
2337     if (i<16)
2338     size=2;
2339     CLOBBER_BT;
2340     r=readreg(r,size);
2341     raw_bt_l_ri(r,i);
2342     unlock2(r);
2343     }
2344     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2345    
2346     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2347     {
2348     CLOBBER_BT;
2349     r=readreg(r,4);
2350     b=readreg(b,4);
2351     raw_bt_l_rr(r,b);
2352     unlock2(r);
2353     unlock2(b);
2354     }
2355     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2356    
2357     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2358     {
2359     int size=4;
2360     if (i<16)
2361     size=2;
2362     CLOBBER_BT;
2363     r=rmw(r,size,size);
2364     raw_btc_l_ri(r,i);
2365     unlock2(r);
2366     }
2367     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2368    
2369     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2370     {
2371     CLOBBER_BT;
2372     b=readreg(b,4);
2373     r=rmw(r,4,4);
2374     raw_btc_l_rr(r,b);
2375     unlock2(r);
2376     unlock2(b);
2377     }
2378     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2379    
2380    
2381     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2382     {
2383     int size=4;
2384     if (i<16)
2385     size=2;
2386     CLOBBER_BT;
2387     r=rmw(r,size,size);
2388     raw_btr_l_ri(r,i);
2389     unlock2(r);
2390     }
2391     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2392    
2393     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2394     {
2395     CLOBBER_BT;
2396     b=readreg(b,4);
2397     r=rmw(r,4,4);
2398     raw_btr_l_rr(r,b);
2399     unlock2(r);
2400     unlock2(b);
2401     }
2402     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2403    
2404    
2405     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2406     {
2407     int size=4;
2408     if (i<16)
2409     size=2;
2410     CLOBBER_BT;
2411     r=rmw(r,size,size);
2412     raw_bts_l_ri(r,i);
2413     unlock2(r);
2414     }
2415     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2416    
2417     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2418     {
2419     CLOBBER_BT;
2420     b=readreg(b,4);
2421     r=rmw(r,4,4);
2422     raw_bts_l_rr(r,b);
2423     unlock2(r);
2424     unlock2(b);
2425     }
2426     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2427    
2428     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2429     {
2430     CLOBBER_MOV;
2431     d=writereg(d,4);
2432     raw_mov_l_rm(d,s);
2433     unlock2(d);
2434     }
2435     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2436    
2437    
2438     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2439     {
2440     r=readreg(r,4);
2441     raw_call_r(r);
2442     unlock2(r);
2443     }
2444     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2445    
2446     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2447     {
2448     CLOBBER_SUB;
2449     raw_sub_l_mi(d,s) ;
2450     }
2451     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2452    
2453     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2454     {
2455     CLOBBER_MOV;
2456     raw_mov_l_mi(d,s) ;
2457     }
2458     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2459    
2460     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2461     {
2462     CLOBBER_MOV;
2463     raw_mov_w_mi(d,s) ;
2464     }
2465     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2466    
2467     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2468     {
2469     CLOBBER_MOV;
2470     raw_mov_b_mi(d,s) ;
2471     }
2472     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2473    
2474     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2475     {
2476     if (!i && !needflags)
2477     return;
2478     CLOBBER_ROL;
2479     r=rmw(r,1,1);
2480     raw_rol_b_ri(r,i);
2481     unlock2(r);
2482     }
2483     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2484    
2485     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2486     {
2487     if (!i && !needflags)
2488     return;
2489     CLOBBER_ROL;
2490     r=rmw(r,2,2);
2491     raw_rol_w_ri(r,i);
2492     unlock2(r);
2493     }
2494     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2495    
2496     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2497     {
2498     if (!i && !needflags)
2499     return;
2500     CLOBBER_ROL;
2501     r=rmw(r,4,4);
2502     raw_rol_l_ri(r,i);
2503     unlock2(r);
2504     }
2505     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2506    
2507     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2508     {
2509     if (isconst(r)) {
2510     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2511     return;
2512     }
2513     CLOBBER_ROL;
2514     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2515     d=rmw(d,4,4);
2516     Dif (r!=1) {
2517     write_log("Illegal register %d in raw_rol_b\n",r);
2518     abort();
2519     }
2520     raw_rol_l_rr(d,r) ;
2521     unlock2(r);
2522     unlock2(d);
2523     }
2524     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2525    
2526     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2527     { /* Can only do this with r==1, i.e. cl */
2528    
2529     if (isconst(r)) {
2530     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2531     return;
2532     }
2533     CLOBBER_ROL;
2534     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2535     d=rmw(d,2,2);
2536     Dif (r!=1) {
2537     write_log("Illegal register %d in raw_rol_b\n",r);
2538     abort();
2539     }
2540     raw_rol_w_rr(d,r) ;
2541     unlock2(r);
2542     unlock2(d);
2543     }
2544     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2545    
2546     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2547     { /* Can only do this with r==1, i.e. cl */
2548    
2549     if (isconst(r)) {
2550     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2551     return;
2552     }
2553    
2554     CLOBBER_ROL;
2555     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2556     d=rmw(d,1,1);
2557     Dif (r!=1) {
2558     write_log("Illegal register %d in raw_rol_b\n",r);
2559     abort();
2560     }
2561     raw_rol_b_rr(d,r) ;
2562     unlock2(r);
2563     unlock2(d);
2564     }
2565     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2566    
2567    
2568     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2569     {
2570     if (isconst(r)) {
2571     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2572     return;
2573     }
2574     CLOBBER_SHLL;
2575     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2576     d=rmw(d,4,4);
2577     Dif (r!=1) {
2578     write_log("Illegal register %d in raw_rol_b\n",r);
2579     abort();
2580     }
2581     raw_shll_l_rr(d,r) ;
2582     unlock2(r);
2583     unlock2(d);
2584     }
2585     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2586    
2587     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2588     { /* Can only do this with r==1, i.e. cl */
2589    
2590     if (isconst(r)) {
2591     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2592     return;
2593     }
2594     CLOBBER_SHLL;
2595     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2596     d=rmw(d,2,2);
2597     Dif (r!=1) {
2598     write_log("Illegal register %d in raw_shll_b\n",r);
2599     abort();
2600     }
2601     raw_shll_w_rr(d,r) ;
2602     unlock2(r);
2603     unlock2(d);
2604     }
2605     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2606    
2607     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2608     { /* Can only do this with r==1, i.e. cl */
2609    
2610     if (isconst(r)) {
2611     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2612     return;
2613     }
2614    
2615     CLOBBER_SHLL;
2616     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2617     d=rmw(d,1,1);
2618     Dif (r!=1) {
2619     write_log("Illegal register %d in raw_shll_b\n",r);
2620     abort();
2621     }
2622     raw_shll_b_rr(d,r) ;
2623     unlock2(r);
2624     unlock2(d);
2625     }
2626     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2627    
2628    
2629     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2630     {
2631     if (!i && !needflags)
2632     return;
2633     CLOBBER_ROR;
2634     r=rmw(r,1,1);
2635     raw_ror_b_ri(r,i);
2636     unlock2(r);
2637     }
2638     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2639    
2640     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2641     {
2642     if (!i && !needflags)
2643     return;
2644     CLOBBER_ROR;
2645     r=rmw(r,2,2);
2646     raw_ror_w_ri(r,i);
2647     unlock2(r);
2648     }
2649     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2650    
2651     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2652     {
2653     if (!i && !needflags)
2654     return;
2655     CLOBBER_ROR;
2656     r=rmw(r,4,4);
2657     raw_ror_l_ri(r,i);
2658     unlock2(r);
2659     }
2660     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2661    
2662     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2663     {
2664     if (isconst(r)) {
2665     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2666     return;
2667     }
2668     CLOBBER_ROR;
2669     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2670     d=rmw(d,4,4);
2671     raw_ror_l_rr(d,r) ;
2672     unlock2(r);
2673     unlock2(d);
2674     }
2675     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2676    
2677     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2678     {
2679     if (isconst(r)) {
2680     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2681     return;
2682     }
2683     CLOBBER_ROR;
2684     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2685     d=rmw(d,2,2);
2686     raw_ror_w_rr(d,r) ;
2687     unlock2(r);
2688     unlock2(d);
2689     }
2690     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2691    
2692     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2693     {
2694     if (isconst(r)) {
2695     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2696     return;
2697     }
2698    
2699     CLOBBER_ROR;
2700     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2701     d=rmw(d,1,1);
2702     raw_ror_b_rr(d,r) ;
2703     unlock2(r);
2704     unlock2(d);
2705     }
2706     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2707    
2708     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2709     {
2710     if (isconst(r)) {
2711     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2712     return;
2713     }
2714     CLOBBER_SHRL;
2715     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2716     d=rmw(d,4,4);
2717     Dif (r!=1) {
2718     write_log("Illegal register %d in raw_rol_b\n",r);
2719     abort();
2720     }
2721     raw_shrl_l_rr(d,r) ;
2722     unlock2(r);
2723     unlock2(d);
2724     }
2725     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2726    
2727     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2728     { /* Can only do this with r==1, i.e. cl */
2729    
2730     if (isconst(r)) {
2731     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2732     return;
2733     }
2734     CLOBBER_SHRL;
2735     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2736     d=rmw(d,2,2);
2737     Dif (r!=1) {
2738     write_log("Illegal register %d in raw_shrl_b\n",r);
2739     abort();
2740     }
2741     raw_shrl_w_rr(d,r) ;
2742     unlock2(r);
2743     unlock2(d);
2744     }
2745     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2746    
2747     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2748     { /* Can only do this with r==1, i.e. cl */
2749    
2750     if (isconst(r)) {
2751     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2752     return;
2753     }
2754    
2755     CLOBBER_SHRL;
2756     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2757     d=rmw(d,1,1);
2758     Dif (r!=1) {
2759     write_log("Illegal register %d in raw_shrl_b\n",r);
2760     abort();
2761     }
2762     raw_shrl_b_rr(d,r) ;
2763     unlock2(r);
2764     unlock2(d);
2765     }
2766     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2767    
2768    
2769    
2770     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2771     {
2772     if (!i && !needflags)
2773     return;
2774     if (isconst(r) && !needflags) {
2775     live.state[r].val<<=i;
2776     return;
2777     }
2778     CLOBBER_SHLL;
2779     r=rmw(r,4,4);
2780     raw_shll_l_ri(r,i);
2781     unlock2(r);
2782     }
2783     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2784    
2785     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2786     {
2787     if (!i && !needflags)
2788     return;
2789     CLOBBER_SHLL;
2790     r=rmw(r,2,2);
2791     raw_shll_w_ri(r,i);
2792     unlock2(r);
2793     }
2794     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2795    
2796     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2797     {
2798     if (!i && !needflags)
2799     return;
2800     CLOBBER_SHLL;
2801     r=rmw(r,1,1);
2802     raw_shll_b_ri(r,i);
2803     unlock2(r);
2804     }
2805     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2806    
2807     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2808     {
2809     if (!i && !needflags)
2810     return;
2811     if (isconst(r) && !needflags) {
2812     live.state[r].val>>=i;
2813     return;
2814     }
2815     CLOBBER_SHRL;
2816     r=rmw(r,4,4);
2817     raw_shrl_l_ri(r,i);
2818     unlock2(r);
2819     }
2820     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2821    
2822     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2823     {
2824     if (!i && !needflags)
2825     return;
2826     CLOBBER_SHRL;
2827     r=rmw(r,2,2);
2828     raw_shrl_w_ri(r,i);
2829     unlock2(r);
2830     }
2831     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2832    
2833     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2834     {
2835     if (!i && !needflags)
2836     return;
2837     CLOBBER_SHRL;
2838     r=rmw(r,1,1);
2839     raw_shrl_b_ri(r,i);
2840     unlock2(r);
2841     }
2842     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2843    
2844     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2845     {
2846     if (!i && !needflags)
2847     return;
2848     CLOBBER_SHRA;
2849     r=rmw(r,4,4);
2850     raw_shra_l_ri(r,i);
2851     unlock2(r);
2852     }
2853     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2854    
2855     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2856     {
2857     if (!i && !needflags)
2858     return;
2859     CLOBBER_SHRA;
2860     r=rmw(r,2,2);
2861     raw_shra_w_ri(r,i);
2862     unlock2(r);
2863     }
2864     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2865    
2866     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2867     {
2868     if (!i && !needflags)
2869     return;
2870     CLOBBER_SHRA;
2871     r=rmw(r,1,1);
2872     raw_shra_b_ri(r,i);
2873     unlock2(r);
2874     }
2875     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2876    
2877     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2878     {
2879     if (isconst(r)) {
2880     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2881     return;
2882     }
2883     CLOBBER_SHRA;
2884     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2885     d=rmw(d,4,4);
2886     Dif (r!=1) {
2887     write_log("Illegal register %d in raw_rol_b\n",r);
2888     abort();
2889     }
2890     raw_shra_l_rr(d,r) ;
2891     unlock2(r);
2892     unlock2(d);
2893     }
2894     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2895    
2896     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2897     { /* Can only do this with r==1, i.e. cl */
2898    
2899     if (isconst(r)) {
2900     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2901     return;
2902     }
2903     CLOBBER_SHRA;
2904     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2905     d=rmw(d,2,2);
2906     Dif (r!=1) {
2907     write_log("Illegal register %d in raw_shra_b\n",r);
2908     abort();
2909     }
2910     raw_shra_w_rr(d,r) ;
2911     unlock2(r);
2912     unlock2(d);
2913     }
2914     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2915    
2916     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2917     { /* Can only do this with r==1, i.e. cl */
2918    
2919     if (isconst(r)) {
2920     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2921     return;
2922     }
2923    
2924     CLOBBER_SHRA;
2925     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2926     d=rmw(d,1,1);
2927     Dif (r!=1) {
2928     write_log("Illegal register %d in raw_shra_b\n",r);
2929     abort();
2930     }
2931     raw_shra_b_rr(d,r) ;
2932     unlock2(r);
2933     unlock2(d);
2934     }
2935     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2936    
2937    
2938     MIDFUNC(2,setcc,(W1 d, IMM cc))
2939     {
2940     CLOBBER_SETCC;
2941     d=writereg(d,1);
2942     raw_setcc(d,cc);
2943     unlock2(d);
2944     }
2945     MENDFUNC(2,setcc,(W1 d, IMM cc))
2946    
2947     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2948     {
2949     CLOBBER_SETCC;
2950     raw_setcc_m(d,cc);
2951     }
2952     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2953    
2954     MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2955     {
2956     if (d==s)
2957     return;
2958     CLOBBER_CMOV;
2959     s=readreg(s,4);
2960     d=rmw(d,4,4);
2961     raw_cmov_l_rr(d,s,cc);
2962     unlock2(s);
2963     unlock2(d);
2964     }
2965     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2966    
2967     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2968     {
2969     CLOBBER_CMOV;
2970     d=rmw(d,4,4);
2971     raw_cmov_l_rm(d,s,cc);
2972     unlock2(d);
2973     }
2974     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2975    
2976 gbeauche 1.26 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2977     {
2978     CLOBBER_BSF;
2979     s = readreg(s, 4);
2980     d = writereg(d, 4);
2981     raw_bsf_l_rr(d, s);
2982     unlock2(s);
2983     unlock2(d);
2984     }
2985     MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2986    
2987     /* Set the Z flag depending on the value in s. Note that the
2988     value has to be 0 or -1 (or, more precisely, for non-zero
2989     values, bit 14 must be set)! */
2990     MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2991 gbeauche 1.1 {
2992 gbeauche 1.26 CLOBBER_BSF;
2993     s=rmw_specific(s,4,4,FLAG_NREG3);
2994     tmp=writereg(tmp,4);
2995     raw_flags_set_zero(s, tmp);
2996     unlock2(tmp);
2997     unlock2(s);
2998 gbeauche 1.1 }
2999 gbeauche 1.26 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3000 gbeauche 1.1
3001     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3002     {
3003     CLOBBER_MUL;
3004     s=readreg(s,4);
3005     d=rmw(d,4,4);
3006     raw_imul_32_32(d,s);
3007     unlock2(s);
3008     unlock2(d);
3009     }
3010     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3011    
3012     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3013     {
3014     CLOBBER_MUL;
3015     s=rmw_specific(s,4,4,MUL_NREG2);
3016     d=rmw_specific(d,4,4,MUL_NREG1);
3017     raw_imul_64_32(d,s);
3018     unlock2(s);
3019     unlock2(d);
3020     }
3021     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3022    
3023     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3024     {
3025     CLOBBER_MUL;
3026     s=rmw_specific(s,4,4,MUL_NREG2);
3027     d=rmw_specific(d,4,4,MUL_NREG1);
3028     raw_mul_64_32(d,s);
3029     unlock2(s);
3030     unlock2(d);
3031     }
3032     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3033    
3034     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3035     {
3036     CLOBBER_MUL;
3037     s=readreg(s,4);
3038     d=rmw(d,4,4);
3039     raw_mul_32_32(d,s);
3040     unlock2(s);
3041     unlock2(d);
3042     }
3043     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3044    
3045 gbeauche 1.24 #if SIZEOF_VOID_P == 8
3046     MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3047     {
3048     int isrmw;
3049    
3050     if (isconst(s)) {
3051     set_const(d,(uae_s32)live.state[s].val);
3052     return;
3053     }
3054    
3055     CLOBBER_SE32;
3056     isrmw=(s==d);
3057     if (!isrmw) {
3058     s=readreg(s,4);
3059     d=writereg(d,4);
3060     }
3061     else { /* If we try to lock this twice, with different sizes, we
3062     are int trouble! */
3063     s=d=rmw(s,4,4);
3064     }
3065     raw_sign_extend_32_rr(d,s);
3066     if (!isrmw) {
3067     unlock2(d);
3068     unlock2(s);
3069     }
3070     else {
3071     unlock2(s);
3072     }
3073     }
3074     MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3075     #endif
3076    
3077 gbeauche 1.1 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3078     {
3079     int isrmw;
3080    
3081     if (isconst(s)) {
3082     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3083     return;
3084     }
3085    
3086     CLOBBER_SE16;
3087     isrmw=(s==d);
3088     if (!isrmw) {
3089     s=readreg(s,2);
3090     d=writereg(d,4);
3091     }
3092     else { /* If we try to lock this twice, with different sizes, we
3093     are int trouble! */
3094     s=d=rmw(s,4,2);
3095     }
3096     raw_sign_extend_16_rr(d,s);
3097     if (!isrmw) {
3098     unlock2(d);
3099     unlock2(s);
3100     }
3101     else {
3102     unlock2(s);
3103     }
3104     }
3105     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3106    
3107     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3108     {
3109     int isrmw;
3110    
3111     if (isconst(s)) {
3112     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3113     return;
3114     }
3115    
3116     isrmw=(s==d);
3117     CLOBBER_SE8;
3118     if (!isrmw) {
3119     s=readreg(s,1);
3120     d=writereg(d,4);
3121     }
3122     else { /* If we try to lock this twice, with different sizes, we
3123     are int trouble! */
3124     s=d=rmw(s,4,1);
3125     }
3126    
3127     raw_sign_extend_8_rr(d,s);
3128    
3129     if (!isrmw) {
3130     unlock2(d);
3131     unlock2(s);
3132     }
3133     else {
3134     unlock2(s);
3135     }
3136     }
3137     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3138    
3139    
3140     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3141     {
3142     int isrmw;
3143    
3144     if (isconst(s)) {
3145     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3146     return;
3147     }
3148    
3149     isrmw=(s==d);
3150     CLOBBER_ZE16;
3151     if (!isrmw) {
3152     s=readreg(s,2);
3153     d=writereg(d,4);
3154     }
3155     else { /* If we try to lock this twice, with different sizes, we
3156     are int trouble! */
3157     s=d=rmw(s,4,2);
3158     }
3159     raw_zero_extend_16_rr(d,s);
3160     if (!isrmw) {
3161     unlock2(d);
3162     unlock2(s);
3163     }
3164     else {
3165     unlock2(s);
3166     }
3167     }
3168     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3169    
3170     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3171     {
3172     int isrmw;
3173     if (isconst(s)) {
3174     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3175     return;
3176     }
3177    
3178     isrmw=(s==d);
3179     CLOBBER_ZE8;
3180     if (!isrmw) {
3181     s=readreg(s,1);
3182     d=writereg(d,4);
3183     }
3184     else { /* If we try to lock this twice, with different sizes, we
3185     are int trouble! */
3186     s=d=rmw(s,4,1);
3187     }
3188    
3189     raw_zero_extend_8_rr(d,s);
3190    
3191     if (!isrmw) {
3192     unlock2(d);
3193     unlock2(s);
3194     }
3195     else {
3196     unlock2(s);
3197     }
3198     }
3199     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3200    
3201     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3202     {
3203     if (d==s)
3204     return;
3205     if (isconst(s)) {
3206     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3207     return;
3208     }
3209    
3210     CLOBBER_MOV;
3211     s=readreg(s,1);
3212     d=writereg(d,1);
3213     raw_mov_b_rr(d,s);
3214     unlock2(d);
3215     unlock2(s);
3216     }
3217     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3218    
3219     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3220     {
3221     if (d==s)
3222     return;
3223     if (isconst(s)) {
3224     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3225     return;
3226     }
3227    
3228     CLOBBER_MOV;
3229     s=readreg(s,2);
3230     d=writereg(d,2);
3231     raw_mov_w_rr(d,s);
3232     unlock2(d);
3233     unlock2(s);
3234     }
3235     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3236    
3237    
3238     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3239     {
3240     CLOBBER_MOV;
3241     baser=readreg(baser,4);
3242     index=readreg(index,4);
3243     d=writereg(d,4);
3244    
3245     raw_mov_l_rrm_indexed(d,baser,index,factor);
3246     unlock2(d);
3247     unlock2(baser);
3248     unlock2(index);
3249     }
3250     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3251    
3252     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3253     {
3254     CLOBBER_MOV;
3255     baser=readreg(baser,4);
3256     index=readreg(index,4);
3257     d=writereg(d,2);
3258    
3259     raw_mov_w_rrm_indexed(d,baser,index,factor);
3260     unlock2(d);
3261     unlock2(baser);
3262     unlock2(index);
3263     }
3264     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3265    
3266     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3267     {
3268     CLOBBER_MOV;
3269     baser=readreg(baser,4);
3270     index=readreg(index,4);
3271     d=writereg(d,1);
3272    
3273     raw_mov_b_rrm_indexed(d,baser,index,factor);
3274    
3275     unlock2(d);
3276     unlock2(baser);
3277     unlock2(index);
3278     }
3279     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3280    
3281    
3282     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3283     {
3284     CLOBBER_MOV;
3285     baser=readreg(baser,4);
3286     index=readreg(index,4);
3287     s=readreg(s,4);
3288    
3289     Dif (baser==s || index==s)
3290     abort();
3291    
3292    
3293     raw_mov_l_mrr_indexed(baser,index,factor,s);
3294     unlock2(s);
3295     unlock2(baser);
3296     unlock2(index);
3297     }
3298     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3299    
3300     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3301     {
3302     CLOBBER_MOV;
3303     baser=readreg(baser,4);
3304     index=readreg(index,4);
3305     s=readreg(s,2);
3306    
3307     raw_mov_w_mrr_indexed(baser,index,factor,s);
3308     unlock2(s);
3309     unlock2(baser);
3310     unlock2(index);
3311     }
3312     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3313    
3314     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3315     {
3316     CLOBBER_MOV;
3317     s=readreg(s,1);
3318     baser=readreg(baser,4);
3319     index=readreg(index,4);
3320    
3321     raw_mov_b_mrr_indexed(baser,index,factor,s);
3322     unlock2(s);
3323     unlock2(baser);
3324     unlock2(index);
3325     }
3326     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3327    
3328    
3329     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3330     {
3331     int basereg=baser;
3332     int indexreg=index;
3333    
3334     CLOBBER_MOV;
3335     s=readreg(s,4);
3336     baser=readreg_offset(baser,4);
3337     index=readreg_offset(index,4);
3338    
3339     base+=get_offset(basereg);
3340     base+=factor*get_offset(indexreg);
3341    
3342     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3343     unlock2(s);
3344     unlock2(baser);
3345     unlock2(index);
3346     }
3347     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3348    
3349     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3350     {
3351     int basereg=baser;
3352     int indexreg=index;
3353    
3354     CLOBBER_MOV;
3355     s=readreg(s,2);
3356     baser=readreg_offset(baser,4);
3357     index=readreg_offset(index,4);
3358    
3359     base+=get_offset(basereg);
3360     base+=factor*get_offset(indexreg);
3361    
3362     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3363     unlock2(s);
3364     unlock2(baser);
3365     unlock2(index);
3366     }
3367     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3368    
3369     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3370     {
3371     int basereg=baser;
3372     int indexreg=index;
3373    
3374     CLOBBER_MOV;
3375     s=readreg(s,1);
3376     baser=readreg_offset(baser,4);
3377     index=readreg_offset(index,4);
3378    
3379     base+=get_offset(basereg);
3380     base+=factor*get_offset(indexreg);
3381    
3382     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3383     unlock2(s);
3384     unlock2(baser);
3385     unlock2(index);
3386     }
3387     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3388    
3389    
3390    
3391     /* Read a long from base+baser+factor*index */
3392     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3393     {
3394     int basereg=baser;
3395     int indexreg=index;
3396    
3397     CLOBBER_MOV;
3398     baser=readreg_offset(baser,4);
3399     index=readreg_offset(index,4);
3400     base+=get_offset(basereg);
3401     base+=factor*get_offset(indexreg);
3402     d=writereg(d,4);
3403     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3404     unlock2(d);
3405     unlock2(baser);
3406     unlock2(index);
3407     }
3408     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3409    
3410    
3411     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3412     {
3413     int basereg=baser;
3414     int indexreg=index;
3415    
3416     CLOBBER_MOV;
3417     remove_offset(d,-1);
3418     baser=readreg_offset(baser,4);
3419     index=readreg_offset(index,4);
3420     base+=get_offset(basereg);
3421     base+=factor*get_offset(indexreg);
3422     d=writereg(d,2);
3423     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3424     unlock2(d);
3425     unlock2(baser);
3426     unlock2(index);
3427     }
3428     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3429    
3430    
3431     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3432     {
3433     int basereg=baser;
3434     int indexreg=index;
3435    
3436     CLOBBER_MOV;
3437     remove_offset(d,-1);
3438     baser=readreg_offset(baser,4);
3439     index=readreg_offset(index,4);
3440     base+=get_offset(basereg);
3441     base+=factor*get_offset(indexreg);
3442     d=writereg(d,1);
3443     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3444     unlock2(d);
3445     unlock2(baser);
3446     unlock2(index);
3447     }
3448     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3449    
3450     /* Read a long from base+factor*index */
3451     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3452     {
3453     int indexreg=index;
3454    
3455     if (isconst(index)) {
3456     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3457     return;
3458     }
3459    
3460     CLOBBER_MOV;
3461     index=readreg_offset(index,4);
3462     base+=get_offset(indexreg)*factor;
3463     d=writereg(d,4);
3464    
3465     raw_mov_l_rm_indexed(d,base,index,factor);
3466     unlock2(index);
3467     unlock2(d);
3468     }
3469     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3470    
3471    
3472     /* read the long at the address contained in s+offset and store in d */
3473     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3474     {
3475     if (isconst(s)) {
3476     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3477     return;
3478     }
3479     CLOBBER_MOV;
3480     s=readreg(s,4);
3481     d=writereg(d,4);
3482    
3483     raw_mov_l_rR(d,s,offset);
3484     unlock2(d);
3485     unlock2(s);
3486     }
3487     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3488    
3489     /* read the word at the address contained in s+offset and store in d */
3490     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3491     {
3492     if (isconst(s)) {
3493     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3494     return;
3495     }
3496     CLOBBER_MOV;
3497     s=readreg(s,4);
3498     d=writereg(d,2);
3499    
3500     raw_mov_w_rR(d,s,offset);
3501     unlock2(d);
3502     unlock2(s);
3503     }
3504     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3505    
3506     /* read the word at the address contained in s+offset and store in d */
3507     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3508     {
3509     if (isconst(s)) {
3510     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3511     return;
3512     }
3513     CLOBBER_MOV;
3514     s=readreg(s,4);
3515     d=writereg(d,1);
3516    
3517     raw_mov_b_rR(d,s,offset);
3518     unlock2(d);
3519     unlock2(s);
3520     }
3521     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3522    
3523     /* read the long at the address contained in s+offset and store in d */
3524     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3525     {
3526     int sreg=s;
3527     if (isconst(s)) {
3528     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3529     return;
3530     }
3531     CLOBBER_MOV;
3532     s=readreg_offset(s,4);
3533     offset+=get_offset(sreg);
3534     d=writereg(d,4);
3535    
3536     raw_mov_l_brR(d,s,offset);
3537     unlock2(d);
3538     unlock2(s);
3539     }
3540     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3541    
3542     /* read the word at the address contained in s+offset and store in d */
3543     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3544     {
3545     int sreg=s;
3546     if (isconst(s)) {
3547     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3548     return;
3549     }
3550     CLOBBER_MOV;
3551     remove_offset(d,-1);
3552     s=readreg_offset(s,4);
3553     offset+=get_offset(sreg);
3554     d=writereg(d,2);
3555    
3556     raw_mov_w_brR(d,s,offset);
3557     unlock2(d);
3558     unlock2(s);
3559     }
3560     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3561    
3562     /* read the word at the address contained in s+offset and store in d */
3563     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3564     {
3565     int sreg=s;
3566     if (isconst(s)) {
3567     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3568     return;
3569     }
3570     CLOBBER_MOV;
3571     remove_offset(d,-1);
3572     s=readreg_offset(s,4);
3573     offset+=get_offset(sreg);
3574     d=writereg(d,1);
3575    
3576     raw_mov_b_brR(d,s,offset);
3577     unlock2(d);
3578     unlock2(s);
3579     }
3580     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3581    
3582     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3583     {
3584     int dreg=d;
3585     if (isconst(d)) {
3586     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3587     return;
3588     }
3589    
3590     CLOBBER_MOV;
3591     d=readreg_offset(d,4);
3592     offset+=get_offset(dreg);
3593     raw_mov_l_Ri(d,i,offset);
3594     unlock2(d);
3595     }
3596     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3597    
3598     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3599     {
3600     int dreg=d;
3601     if (isconst(d)) {
3602     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3603     return;
3604     }
3605    
3606     CLOBBER_MOV;
3607     d=readreg_offset(d,4);
3608     offset+=get_offset(dreg);
3609     raw_mov_w_Ri(d,i,offset);
3610     unlock2(d);
3611     }
3612     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3613    
3614     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3615     {
3616     int dreg=d;
3617     if (isconst(d)) {
3618     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3619     return;
3620     }
3621    
3622     CLOBBER_MOV;
3623     d=readreg_offset(d,4);
3624     offset+=get_offset(dreg);
3625     raw_mov_b_Ri(d,i,offset);
3626     unlock2(d);
3627     }
3628     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3629    
3630     /* Warning! OFFSET is byte sized only! */
3631     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3632     {
3633     if (isconst(d)) {
3634     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3635     return;
3636     }
3637     if (isconst(s)) {
3638     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3639     return;
3640     }
3641    
3642     CLOBBER_MOV;
3643     s=readreg(s,4);
3644     d=readreg(d,4);
3645    
3646     raw_mov_l_Rr(d,s,offset);
3647     unlock2(d);
3648     unlock2(s);
3649     }
3650     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3651    
3652     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3653     {
3654     if (isconst(d)) {
3655     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3656     return;
3657     }
3658     if (isconst(s)) {
3659     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3660     return;
3661     }
3662    
3663     CLOBBER_MOV;
3664     s=readreg(s,2);
3665     d=readreg(d,4);
3666     raw_mov_w_Rr(d,s,offset);
3667     unlock2(d);
3668     unlock2(s);
3669     }
3670     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3671    
3672     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3673     {
3674     if (isconst(d)) {
3675     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3676     return;
3677     }
3678     if (isconst(s)) {
3679     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3680     return;
3681     }
3682    
3683     CLOBBER_MOV;
3684     s=readreg(s,1);
3685     d=readreg(d,4);
3686     raw_mov_b_Rr(d,s,offset);
3687     unlock2(d);
3688     unlock2(s);
3689     }
3690     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3691    
3692     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3693     {
3694     if (isconst(s)) {
3695     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3696     return;
3697     }
3698     #if USE_OFFSET
3699     if (d==s) {
3700     add_offset(d,offset);
3701     return;
3702     }
3703     #endif
3704     CLOBBER_LEA;
3705     s=readreg(s,4);
3706     d=writereg(d,4);
3707     raw_lea_l_brr(d,s,offset);
3708     unlock2(d);
3709     unlock2(s);
3710     }
3711     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3712    
3713     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3714     {
3715     if (!offset) {
3716     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3717     return;
3718     }
3719     CLOBBER_LEA;
3720     s=readreg(s,4);
3721     index=readreg(index,4);
3722     d=writereg(d,4);
3723    
3724     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3725     unlock2(d);
3726     unlock2(index);
3727     unlock2(s);
3728     }
3729     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3730    
3731     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3732     {
3733     CLOBBER_LEA;
3734     s=readreg(s,4);
3735     index=readreg(index,4);
3736     d=writereg(d,4);
3737    
3738     raw_lea_l_rr_indexed(d,s,index,factor);
3739     unlock2(d);
3740     unlock2(index);
3741     unlock2(s);
3742     }
3743     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3744    
3745     /* write d to the long at the address contained in s+offset */
3746     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3747     {
3748     int dreg=d;
3749     if (isconst(d)) {
3750     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3751     return;
3752     }
3753    
3754     CLOBBER_MOV;
3755     s=readreg(s,4);
3756     d=readreg_offset(d,4);
3757     offset+=get_offset(dreg);
3758    
3759     raw_mov_l_bRr(d,s,offset);
3760     unlock2(d);
3761     unlock2(s);
3762     }
3763     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3764    
3765     /* write the word at the address contained in s+offset and store in d */
3766     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3767     {
3768     int dreg=d;
3769    
3770     if (isconst(d)) {
3771     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3772     return;
3773     }
3774    
3775     CLOBBER_MOV;
3776     s=readreg(s,2);
3777     d=readreg_offset(d,4);
3778     offset+=get_offset(dreg);
3779     raw_mov_w_bRr(d,s,offset);
3780     unlock2(d);
3781     unlock2(s);
3782     }
3783     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3784    
3785     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3786     {
3787     int dreg=d;
3788     if (isconst(d)) {
3789     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3790     return;
3791     }
3792    
3793     CLOBBER_MOV;
3794     s=readreg(s,1);
3795     d=readreg_offset(d,4);
3796     offset+=get_offset(dreg);
3797     raw_mov_b_bRr(d,s,offset);
3798     unlock2(d);
3799     unlock2(s);
3800     }
3801     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3802    
3803     MIDFUNC(1,bswap_32,(RW4 r))
3804     {
3805     int reg=r;
3806    
3807     if (isconst(r)) {
3808     uae_u32 oldv=live.state[r].val;
3809     live.state[r].val=reverse32(oldv);
3810     return;
3811     }
3812    
3813     CLOBBER_SW32;
3814     r=rmw(r,4,4);
3815     raw_bswap_32(r);
3816     unlock2(r);
3817     }
3818     MENDFUNC(1,bswap_32,(RW4 r))
3819    
3820     MIDFUNC(1,bswap_16,(RW2 r))
3821     {
3822     if (isconst(r)) {
3823     uae_u32 oldv=live.state[r].val;
3824     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3825     (oldv&0xffff0000);
3826     return;
3827     }
3828    
3829     CLOBBER_SW16;
3830     r=rmw(r,2,2);
3831    
3832     raw_bswap_16(r);
3833     unlock2(r);
3834     }
3835     MENDFUNC(1,bswap_16,(RW2 r))
3836    
3837    
3838    
3839     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3840     {
3841     int olds;
3842    
3843     if (d==s) { /* How pointless! */
3844     return;
3845     }
3846     if (isconst(s)) {
3847     COMPCALL(mov_l_ri)(d,live.state[s].val);
3848     return;
3849     }
3850     olds=s;
3851     disassociate(d);
3852     s=readreg_offset(s,4);
3853     live.state[d].realreg=s;
3854     live.state[d].realind=live.nat[s].nholds;
3855     live.state[d].val=live.state[olds].val;
3856     live.state[d].validsize=4;
3857     live.state[d].dirtysize=4;
3858     set_status(d,DIRTY);
3859    
3860     live.nat[s].holds[live.nat[s].nholds]=d;
3861     live.nat[s].nholds++;
3862     log_clobberreg(d);
3863     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3864     d,s,live.state[d].realind,live.nat[s].nholds); */
3865     unlock2(s);
3866     }
3867     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3868    
3869     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3870     {
3871     if (isconst(s)) {
3872     COMPCALL(mov_l_mi)(d,live.state[s].val);
3873     return;
3874     }
3875     CLOBBER_MOV;
3876     s=readreg(s,4);
3877    
3878     raw_mov_l_mr(d,s);
3879     unlock2(s);
3880     }
3881     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3882    
3883    
3884     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3885     {
3886     if (isconst(s)) {
3887     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3888     return;
3889     }
3890     CLOBBER_MOV;
3891     s=readreg(s,2);
3892    
3893     raw_mov_w_mr(d,s);
3894     unlock2(s);
3895     }
3896     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3897    
3898     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3899     {
3900     CLOBBER_MOV;
3901     d=writereg(d,2);
3902    
3903     raw_mov_w_rm(d,s);
3904     unlock2(d);
3905     }
3906     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3907    
3908     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3909     {
3910     if (isconst(s)) {
3911     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3912     return;
3913     }
3914    
3915     CLOBBER_MOV;
3916     s=readreg(s,1);
3917    
3918     raw_mov_b_mr(d,s);
3919     unlock2(s);
3920     }
3921     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3922    
3923     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3924     {
3925     CLOBBER_MOV;
3926     d=writereg(d,1);
3927    
3928     raw_mov_b_rm(d,s);
3929     unlock2(d);
3930     }
3931     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3932    
3933     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3934     {
3935     set_const(d,s);
3936     return;
3937     }
3938     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3939    
3940     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3941     {
3942     CLOBBER_MOV;
3943     d=writereg(d,2);
3944    
3945     raw_mov_w_ri(d,s);
3946     unlock2(d);
3947     }
3948     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3949    
3950     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3951     {
3952     CLOBBER_MOV;
3953     d=writereg(d,1);
3954    
3955     raw_mov_b_ri(d,s);
3956     unlock2(d);
3957     }
3958     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3959    
3960    
3961     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3962     {
3963     CLOBBER_ADD;
3964     raw_add_l_mi(d,s) ;
3965     }
3966     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3967    
3968     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3969     {
3970     CLOBBER_ADD;
3971     raw_add_w_mi(d,s) ;
3972     }
3973     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3974    
3975     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3976     {
3977     CLOBBER_ADD;
3978     raw_add_b_mi(d,s) ;
3979     }
3980     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3981    
3982    
3983     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3984     {
3985     CLOBBER_TEST;
3986     d=readreg(d,4);
3987    
3988     raw_test_l_ri(d,i);
3989     unlock2(d);
3990     }
3991     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3992    
3993     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3994     {
3995     CLOBBER_TEST;
3996     d=readreg(d,4);
3997     s=readreg(s,4);
3998    
3999     raw_test_l_rr(d,s);;
4000     unlock2(d);
4001     unlock2(s);
4002     }
4003     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4004    
4005     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4006     {
4007     CLOBBER_TEST;
4008     d=readreg(d,2);
4009     s=readreg(s,2);
4010    
4011     raw_test_w_rr(d,s);
4012     unlock2(d);
4013     unlock2(s);
4014     }
4015     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4016    
4017     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4018     {
4019     CLOBBER_TEST;
4020     d=readreg(d,1);
4021     s=readreg(s,1);
4022    
4023     raw_test_b_rr(d,s);
4024     unlock2(d);
4025     unlock2(s);
4026     }
4027     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4028    
4029    
4030     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4031     {
4032     if (isconst(d) && !needflags) {
4033     live.state[d].val &= i;
4034     return;
4035     }
4036    
4037     CLOBBER_AND;
4038     d=rmw(d,4,4);
4039    
4040     raw_and_l_ri(d,i);
4041     unlock2(d);
4042     }
4043     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4044    
4045     MIDFUNC(2,and_l,(RW4 d, R4 s))
4046     {
4047     CLOBBER_AND;
4048     s=readreg(s,4);
4049     d=rmw(d,4,4);
4050    
4051     raw_and_l(d,s);
4052     unlock2(d);
4053     unlock2(s);
4054     }
4055     MENDFUNC(2,and_l,(RW4 d, R4 s))
4056    
4057     MIDFUNC(2,and_w,(RW2 d, R2 s))
4058     {
4059     CLOBBER_AND;
4060     s=readreg(s,2);
4061     d=rmw(d,2,2);
4062    
4063     raw_and_w(d,s);
4064     unlock2(d);
4065     unlock2(s);
4066     }
4067     MENDFUNC(2,and_w,(RW2 d, R2 s))
4068    
4069     MIDFUNC(2,and_b,(RW1 d, R1 s))
4070     {
4071     CLOBBER_AND;
4072     s=readreg(s,1);
4073     d=rmw(d,1,1);
4074    
4075     raw_and_b(d,s);
4076     unlock2(d);
4077     unlock2(s);
4078     }
4079     MENDFUNC(2,and_b,(RW1 d, R1 s))
4080    
4081     // gb-- used for making an fpcr value in compemu_fpp.cpp
4082     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4083     {
4084     CLOBBER_OR;
4085     d=rmw(d,4,4);
4086    
4087     raw_or_l_rm(d,s);
4088     unlock2(d);
4089     }
4090     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4091    
4092     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4093     {
4094     if (isconst(d) && !needflags) {
4095     live.state[d].val|=i;
4096     return;
4097     }
4098     CLOBBER_OR;
4099     d=rmw(d,4,4);
4100    
4101     raw_or_l_ri(d,i);
4102     unlock2(d);
4103     }
4104     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4105    
4106     MIDFUNC(2,or_l,(RW4 d, R4 s))
4107     {
4108     if (isconst(d) && isconst(s) && !needflags) {
4109     live.state[d].val|=live.state[s].val;
4110     return;
4111     }
4112     CLOBBER_OR;
4113     s=readreg(s,4);
4114     d=rmw(d,4,4);
4115    
4116     raw_or_l(d,s);
4117     unlock2(d);
4118     unlock2(s);
4119     }
4120     MENDFUNC(2,or_l,(RW4 d, R4 s))
4121    
4122     MIDFUNC(2,or_w,(RW2 d, R2 s))
4123     {
4124     CLOBBER_OR;
4125     s=readreg(s,2);
4126     d=rmw(d,2,2);
4127    
4128     raw_or_w(d,s);
4129     unlock2(d);
4130     unlock2(s);
4131     }
4132     MENDFUNC(2,or_w,(RW2 d, R2 s))
4133    
4134     MIDFUNC(2,or_b,(RW1 d, R1 s))
4135     {
4136     CLOBBER_OR;
4137     s=readreg(s,1);
4138     d=rmw(d,1,1);
4139    
4140     raw_or_b(d,s);
4141     unlock2(d);
4142     unlock2(s);
4143     }
4144     MENDFUNC(2,or_b,(RW1 d, R1 s))
4145    
4146     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4147     {
4148     CLOBBER_ADC;
4149     s=readreg(s,4);
4150     d=rmw(d,4,4);
4151    
4152     raw_adc_l(d,s);
4153    
4154     unlock2(d);
4155     unlock2(s);
4156     }
4157     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4158    
4159     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4160     {
4161     CLOBBER_ADC;
4162     s=readreg(s,2);
4163     d=rmw(d,2,2);
4164    
4165     raw_adc_w(d,s);
4166     unlock2(d);
4167     unlock2(s);
4168     }
4169     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4170    
4171     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4172     {
4173     CLOBBER_ADC;
4174     s=readreg(s,1);
4175     d=rmw(d,1,1);
4176    
4177     raw_adc_b(d,s);
4178     unlock2(d);
4179     unlock2(s);
4180     }
4181     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4182    
4183     MIDFUNC(2,add_l,(RW4 d, R4 s))
4184     {
4185     if (isconst(s)) {
4186     COMPCALL(add_l_ri)(d,live.state[s].val);
4187     return;
4188     }
4189    
4190     CLOBBER_ADD;
4191     s=readreg(s,4);
4192     d=rmw(d,4,4);
4193    
4194     raw_add_l(d,s);
4195    
4196     unlock2(d);
4197     unlock2(s);
4198     }
4199     MENDFUNC(2,add_l,(RW4 d, R4 s))
4200    
4201     MIDFUNC(2,add_w,(RW2 d, R2 s))
4202     {
4203     if (isconst(s)) {
4204     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4205     return;
4206     }
4207    
4208     CLOBBER_ADD;
4209     s=readreg(s,2);
4210     d=rmw(d,2,2);
4211    
4212     raw_add_w(d,s);
4213     unlock2(d);
4214     unlock2(s);
4215     }
4216     MENDFUNC(2,add_w,(RW2 d, R2 s))
4217    
4218     MIDFUNC(2,add_b,(RW1 d, R1 s))
4219     {
4220     if (isconst(s)) {
4221     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4222     return;
4223     }
4224    
4225     CLOBBER_ADD;
4226     s=readreg(s,1);
4227     d=rmw(d,1,1);
4228    
4229     raw_add_b(d,s);
4230     unlock2(d);
4231     unlock2(s);
4232     }
4233     MENDFUNC(2,add_b,(RW1 d, R1 s))
4234    
4235     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4236     {
4237     if (!i && !needflags)
4238     return;
4239     if (isconst(d) && !needflags) {
4240     live.state[d].val-=i;
4241     return;
4242     }
4243     #if USE_OFFSET
4244     if (!needflags) {
4245     add_offset(d,-i);
4246     return;
4247     }
4248     #endif
4249    
4250     CLOBBER_SUB;
4251     d=rmw(d,4,4);
4252    
4253     raw_sub_l_ri(d,i);
4254     unlock2(d);
4255     }
4256     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4257    
4258     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4259     {
4260     if (!i && !needflags)
4261     return;
4262    
4263     CLOBBER_SUB;
4264     d=rmw(d,2,2);
4265    
4266     raw_sub_w_ri(d,i);
4267     unlock2(d);
4268     }
4269     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4270    
4271     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4272     {
4273     if (!i && !needflags)
4274     return;
4275    
4276     CLOBBER_SUB;
4277     d=rmw(d,1,1);
4278    
4279     raw_sub_b_ri(d,i);
4280    
4281     unlock2(d);
4282     }
4283     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4284    
4285     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4286     {
4287     if (!i && !needflags)
4288     return;
4289     if (isconst(d) && !needflags) {
4290     live.state[d].val+=i;
4291     return;
4292     }
4293     #if USE_OFFSET
4294     if (!needflags) {
4295     add_offset(d,i);
4296     return;
4297     }
4298     #endif
4299     CLOBBER_ADD;
4300     d=rmw(d,4,4);
4301     raw_add_l_ri(d,i);
4302     unlock2(d);
4303     }
4304     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4305    
4306     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4307     {
4308     if (!i && !needflags)
4309     return;
4310    
4311     CLOBBER_ADD;
4312     d=rmw(d,2,2);
4313    
4314     raw_add_w_ri(d,i);
4315     unlock2(d);
4316     }
4317     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4318    
4319     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4320     {
4321     if (!i && !needflags)
4322     return;
4323    
4324     CLOBBER_ADD;
4325     d=rmw(d,1,1);
4326    
4327     raw_add_b_ri(d,i);
4328    
4329     unlock2(d);
4330     }
4331     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4332    
4333     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4334     {
4335     CLOBBER_SBB;
4336     s=readreg(s,4);
4337     d=rmw(d,4,4);
4338    
4339     raw_sbb_l(d,s);
4340     unlock2(d);
4341     unlock2(s);
4342     }
4343     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4344    
4345     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4346     {
4347     CLOBBER_SBB;
4348     s=readreg(s,2);
4349     d=rmw(d,2,2);
4350    
4351     raw_sbb_w(d,s);
4352     unlock2(d);
4353     unlock2(s);
4354     }
4355     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4356    
4357     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4358     {
4359     CLOBBER_SBB;
4360     s=readreg(s,1);
4361     d=rmw(d,1,1);
4362    
4363     raw_sbb_b(d,s);
4364     unlock2(d);
4365     unlock2(s);
4366     }
4367     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4368    
4369     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4370     {
4371     if (isconst(s)) {
4372     COMPCALL(sub_l_ri)(d,live.state[s].val);
4373     return;
4374     }
4375    
4376     CLOBBER_SUB;
4377     s=readreg(s,4);
4378     d=rmw(d,4,4);
4379    
4380     raw_sub_l(d,s);
4381     unlock2(d);
4382     unlock2(s);
4383     }
4384     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4385    
4386     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4387     {
4388     if (isconst(s)) {
4389     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4390     return;
4391     }
4392    
4393     CLOBBER_SUB;
4394     s=readreg(s,2);
4395     d=rmw(d,2,2);
4396    
4397     raw_sub_w(d,s);
4398     unlock2(d);
4399     unlock2(s);
4400     }
4401     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4402    
4403     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4404     {
4405     if (isconst(s)) {
4406     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4407     return;
4408     }
4409    
4410     CLOBBER_SUB;
4411     s=readreg(s,1);
4412     d=rmw(d,1,1);
4413    
4414     raw_sub_b(d,s);
4415     unlock2(d);
4416     unlock2(s);
4417     }
4418     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4419    
4420     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4421     {
4422     CLOBBER_CMP;
4423     s=readreg(s,4);
4424     d=readreg(d,4);
4425    
4426     raw_cmp_l(d,s);
4427     unlock2(d);
4428     unlock2(s);
4429     }
4430     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4431    
4432     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4433     {
4434     CLOBBER_CMP;
4435     r=readreg(r,4);
4436    
4437     raw_cmp_l_ri(r,i);
4438     unlock2(r);
4439     }
4440     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4441    
4442     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4443     {
4444     CLOBBER_CMP;
4445     s=readreg(s,2);
4446     d=readreg(d,2);
4447    
4448     raw_cmp_w(d,s);
4449     unlock2(d);
4450     unlock2(s);
4451     }
4452     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4453    
4454     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4455     {
4456     CLOBBER_CMP;
4457     s=readreg(s,1);
4458     d=readreg(d,1);
4459    
4460     raw_cmp_b(d,s);
4461     unlock2(d);
4462     unlock2(s);
4463     }
4464     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4465    
4466    
4467     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4468     {
4469     CLOBBER_XOR;
4470     s=readreg(s,4);
4471     d=rmw(d,4,4);
4472    
4473     raw_xor_l(d,s);
4474     unlock2(d);
4475     unlock2(s);
4476     }
4477     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4478    
4479     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4480     {
4481     CLOBBER_XOR;
4482     s=readreg(s,2);
4483     d=rmw(d,2,2);
4484    
4485     raw_xor_w(d,s);
4486     unlock2(d);
4487     unlock2(s);
4488     }
4489     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4490    
4491     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4492     {
4493     CLOBBER_XOR;
4494     s=readreg(s,1);
4495     d=rmw(d,1,1);
4496    
4497     raw_xor_b(d,s);
4498     unlock2(d);
4499     unlock2(s);
4500     }
4501     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4502    
4503     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4504     {
4505     clobber_flags();
4506     remove_all_offsets();
4507     if (osize==4) {
4508     if (out1!=in1 && out1!=r) {
4509     COMPCALL(forget_about)(out1);
4510     }
4511     }
4512     else {
4513     tomem_c(out1);
4514     }
4515    
4516     in1=readreg_specific(in1,isize,REG_PAR1);
4517     r=readreg(r,4);
4518     prepare_for_call_1(); /* This should ensure that there won't be
4519     any need for swapping nregs in prepare_for_call_2
4520     */
4521     #if USE_NORMAL_CALLING_CONVENTION
4522     raw_push_l_r(in1);
4523     #endif
4524     unlock2(in1);
4525     unlock2(r);
4526    
4527     prepare_for_call_2();
4528     raw_call_r(r);
4529    
4530     #if USE_NORMAL_CALLING_CONVENTION
4531     raw_inc_sp(4);
4532     #endif
4533    
4534    
4535     live.nat[REG_RESULT].holds[0]=out1;
4536     live.nat[REG_RESULT].nholds=1;
4537     live.nat[REG_RESULT].touched=touchcnt++;
4538    
4539     live.state[out1].realreg=REG_RESULT;
4540     live.state[out1].realind=0;
4541     live.state[out1].val=0;
4542     live.state[out1].validsize=osize;
4543     live.state[out1].dirtysize=osize;
4544     set_status(out1,DIRTY);
4545     }
4546     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4547    
4548     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4549     {
4550     clobber_flags();
4551     remove_all_offsets();
4552     in1=readreg_specific(in1,isize1,REG_PAR1);
4553     in2=readreg_specific(in2,isize2,REG_PAR2);
4554     r=readreg(r,4);
4555     prepare_for_call_1(); /* This should ensure that there won't be
4556     any need for swapping nregs in prepare_for_call_2
4557     */
4558     #if USE_NORMAL_CALLING_CONVENTION
4559     raw_push_l_r(in2);
4560     raw_push_l_r(in1);
4561     #endif
4562     unlock2(r);
4563     unlock2(in1);
4564     unlock2(in2);
4565     prepare_for_call_2();
4566     raw_call_r(r);
4567     #if USE_NORMAL_CALLING_CONVENTION
4568     raw_inc_sp(8);
4569     #endif
4570     }
4571     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4572    
4573     /* forget_about() takes a mid-layer register */
4574     MIDFUNC(1,forget_about,(W4 r))
4575     {
4576     if (isinreg(r))
4577     disassociate(r);
4578     live.state[r].val=0;
4579     set_status(r,UNDEF);
4580     }
4581     MENDFUNC(1,forget_about,(W4 r))
4582    
4583     MIDFUNC(0,nop,(void))
4584     {
4585     raw_nop();
4586     }
4587     MENDFUNC(0,nop,(void))
4588    
4589    
4590     MIDFUNC(1,f_forget_about,(FW r))
4591     {
4592     if (f_isinreg(r))
4593     f_disassociate(r);
4594     live.fate[r].status=UNDEF;
4595     }
4596     MENDFUNC(1,f_forget_about,(FW r))
4597    
4598     MIDFUNC(1,fmov_pi,(FW r))
4599     {
4600     r=f_writereg(r);
4601     raw_fmov_pi(r);
4602     f_unlock(r);
4603     }
4604     MENDFUNC(1,fmov_pi,(FW r))
4605    
4606     MIDFUNC(1,fmov_log10_2,(FW r))
4607     {
4608     r=f_writereg(r);
4609     raw_fmov_log10_2(r);
4610     f_unlock(r);
4611     }
4612     MENDFUNC(1,fmov_log10_2,(FW r))
4613    
4614     MIDFUNC(1,fmov_log2_e,(FW r))
4615     {
4616     r=f_writereg(r);
4617     raw_fmov_log2_e(r);
4618     f_unlock(r);
4619     }
4620     MENDFUNC(1,fmov_log2_e,(FW r))
4621    
4622     MIDFUNC(1,fmov_loge_2,(FW r))
4623     {
4624     r=f_writereg(r);
4625     raw_fmov_loge_2(r);
4626     f_unlock(r);
4627     }
4628     MENDFUNC(1,fmov_loge_2,(FW r))
4629    
4630     MIDFUNC(1,fmov_1,(FW r))
4631     {
4632     r=f_writereg(r);
4633     raw_fmov_1(r);
4634     f_unlock(r);
4635     }
4636     MENDFUNC(1,fmov_1,(FW r))
4637    
4638     MIDFUNC(1,fmov_0,(FW r))
4639     {
4640     r=f_writereg(r);
4641     raw_fmov_0(r);
4642     f_unlock(r);
4643     }
4644     MENDFUNC(1,fmov_0,(FW r))
4645    
4646     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4647     {
4648     r=f_writereg(r);
4649     raw_fmov_rm(r,m);
4650     f_unlock(r);
4651     }
4652     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4653    
4654     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4655     {
4656     r=f_writereg(r);
4657     raw_fmovi_rm(r,m);
4658     f_unlock(r);
4659     }
4660     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4661    
4662     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4663     {
4664     r=f_readreg(r);
4665     raw_fmovi_mr(m,r);
4666     f_unlock(r);
4667     }
4668     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4669    
4670     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4671     {
4672     r=f_writereg(r);
4673     raw_fmovs_rm(r,m);
4674     f_unlock(r);
4675     }
4676     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4677    
4678     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4679     {
4680     r=f_readreg(r);
4681     raw_fmovs_mr(m,r);
4682     f_unlock(r);
4683     }
4684     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4685    
4686     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4687     {
4688     r=f_readreg(r);
4689     raw_fmov_ext_mr(m,r);
4690     f_unlock(r);
4691     }
4692     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4693    
4694     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4695     {
4696     r=f_readreg(r);
4697     raw_fmov_mr(m,r);
4698     f_unlock(r);
4699     }
4700     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4701    
4702     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4703     {
4704     r=f_writereg(r);
4705     raw_fmov_ext_rm(r,m);
4706     f_unlock(r);
4707     }
4708     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4709    
4710     MIDFUNC(2,fmov_rr,(FW d, FR s))
4711     {
4712     if (d==s) { /* How pointless! */
4713     return;
4714     }
4715     #if USE_F_ALIAS
4716     f_disassociate(d);
4717     s=f_readreg(s);
4718     live.fate[d].realreg=s;
4719     live.fate[d].realind=live.fat[s].nholds;
4720     live.fate[d].status=DIRTY;
4721     live.fat[s].holds[live.fat[s].nholds]=d;
4722     live.fat[s].nholds++;
4723     f_unlock(s);
4724     #else
4725     s=f_readreg(s);
4726     d=f_writereg(d);
4727     raw_fmov_rr(d,s);
4728     f_unlock(s);
4729     f_unlock(d);
4730     #endif
4731     }
4732     MENDFUNC(2,fmov_rr,(FW d, FR s))
4733    
4734     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4735     {
4736     index=readreg(index,4);
4737    
4738     raw_fldcw_m_indexed(index,base);
4739     unlock2(index);
4740     }
4741     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4742    
4743     MIDFUNC(1,ftst_r,(FR r))
4744     {
4745     r=f_readreg(r);
4746     raw_ftst_r(r);
4747     f_unlock(r);
4748     }
4749     MENDFUNC(1,ftst_r,(FR r))
4750    
4751     MIDFUNC(0,dont_care_fflags,(void))
4752     {
4753     f_disassociate(FP_RESULT);
4754     }
4755     MENDFUNC(0,dont_care_fflags,(void))
4756    
4757     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4758     {
4759     s=f_readreg(s);
4760     d=f_writereg(d);
4761     raw_fsqrt_rr(d,s);
4762     f_unlock(s);
4763     f_unlock(d);
4764     }
4765     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4766    
4767     MIDFUNC(2,fabs_rr,(FW d, FR s))
4768     {
4769     s=f_readreg(s);
4770     d=f_writereg(d);
4771     raw_fabs_rr(d,s);
4772     f_unlock(s);
4773     f_unlock(d);
4774     }
4775     MENDFUNC(2,fabs_rr,(FW d, FR s))
4776    
4777     MIDFUNC(2,fsin_rr,(FW d, FR s))
4778     {
4779     s=f_readreg(s);
4780     d=f_writereg(d);
4781     raw_fsin_rr(d,s);
4782     f_unlock(s);
4783     f_unlock(d);
4784     }
4785     MENDFUNC(2,fsin_rr,(FW d, FR s))
4786    
4787     MIDFUNC(2,fcos_rr,(FW d, FR s))
4788     {
4789     s=f_readreg(s);
4790     d=f_writereg(d);
4791     raw_fcos_rr(d,s);
4792     f_unlock(s);
4793     f_unlock(d);
4794     }
4795     MENDFUNC(2,fcos_rr,(FW d, FR s))
4796    
4797     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4798     {
4799     s=f_readreg(s);
4800     d=f_writereg(d);
4801     raw_ftwotox_rr(d,s);
4802     f_unlock(s);
4803     f_unlock(d);
4804     }
4805     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4806    
4807     MIDFUNC(2,fetox_rr,(FW d, FR s))
4808     {
4809     s=f_readreg(s);
4810     d=f_writereg(d);
4811     raw_fetox_rr(d,s);
4812     f_unlock(s);
4813     f_unlock(d);
4814     }
4815     MENDFUNC(2,fetox_rr,(FW d, FR s))
4816    
4817     MIDFUNC(2,frndint_rr,(FW d, FR s))
4818     {
4819     s=f_readreg(s);
4820     d=f_writereg(d);
4821     raw_frndint_rr(d,s);
4822     f_unlock(s);
4823     f_unlock(d);
4824     }
4825     MENDFUNC(2,frndint_rr,(FW d, FR s))
4826    
4827     MIDFUNC(2,flog2_rr,(FW d, FR s))
4828     {
4829     s=f_readreg(s);
4830     d=f_writereg(d);
4831     raw_flog2_rr(d,s);
4832     f_unlock(s);
4833     f_unlock(d);
4834     }
4835     MENDFUNC(2,flog2_rr,(FW d, FR s))
4836    
4837     MIDFUNC(2,fneg_rr,(FW d, FR s))
4838     {
4839     s=f_readreg(s);
4840     d=f_writereg(d);
4841     raw_fneg_rr(d,s);
4842     f_unlock(s);
4843     f_unlock(d);
4844     }
4845     MENDFUNC(2,fneg_rr,(FW d, FR s))
4846    
4847     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4848     {
4849     s=f_readreg(s);
4850     d=f_rmw(d);
4851     raw_fadd_rr(d,s);
4852     f_unlock(s);
4853     f_unlock(d);
4854     }
4855     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4856    
4857     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4858     {
4859     s=f_readreg(s);
4860     d=f_rmw(d);
4861     raw_fsub_rr(d,s);
4862     f_unlock(s);
4863     f_unlock(d);
4864     }
4865     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4866    
4867     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4868     {
4869     d=f_readreg(d);
4870     s=f_readreg(s);
4871     raw_fcmp_rr(d,s);
4872     f_unlock(s);
4873     f_unlock(d);
4874     }
4875     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4876    
4877     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4878     {
4879     s=f_readreg(s);
4880     d=f_rmw(d);
4881     raw_fdiv_rr(d,s);
4882     f_unlock(s);
4883     f_unlock(d);
4884     }
4885     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4886    
4887     MIDFUNC(2,frem_rr,(FRW d, FR s))
4888     {
4889     s=f_readreg(s);
4890     d=f_rmw(d);
4891     raw_frem_rr(d,s);
4892     f_unlock(s);
4893     f_unlock(d);
4894     }
4895     MENDFUNC(2,frem_rr,(FRW d, FR s))
4896    
4897     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4898     {
4899     s=f_readreg(s);
4900     d=f_rmw(d);
4901     raw_frem1_rr(d,s);
4902     f_unlock(s);
4903     f_unlock(d);
4904     }
4905     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4906    
4907     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4908     {
4909     s=f_readreg(s);
4910     d=f_rmw(d);
4911     raw_fmul_rr(d,s);
4912     f_unlock(s);
4913     f_unlock(d);
4914     }
4915     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4916    
4917     /********************************************************************
4918     * Support functions exposed to gencomp. CREATE time *
4919     ********************************************************************/
4920    
4921 gbeauche 1.26 void set_zero(int r, int tmp)
4922     {
4923     if (setzflg_uses_bsf)
4924     bsf_l_rr(r,r);
4925     else
4926     simulate_bsf(tmp,r);
4927     }
4928    
4929 gbeauche 1.1 int kill_rodent(int r)
4930     {
4931     return KILLTHERAT &&
4932     have_rat_stall &&
4933     (live.state[r].status==INMEM ||
4934     live.state[r].status==CLEAN ||
4935     live.state[r].status==ISCONST ||
4936     live.state[r].dirtysize==4);
4937     }
4938    
4939     uae_u32 get_const(int r)
4940     {
4941     Dif (!isconst(r)) {
4942     write_log("Register %d should be constant, but isn't\n",r);
4943     abort();
4944     }
4945     return live.state[r].val;
4946     }
4947    
4948     void sync_m68k_pc(void)
4949     {
4950     if (m68k_pc_offset) {
4951     add_l_ri(PC_P,m68k_pc_offset);
4952     comp_pc_p+=m68k_pc_offset;
4953     m68k_pc_offset=0;
4954     }
4955     }
4956    
4957     /********************************************************************
4958     * Scratch registers management *
4959     ********************************************************************/
4960    
4961     struct scratch_t {
4962     uae_u32 regs[VREGS];
4963     fpu_register fregs[VFREGS];
4964     };
4965    
4966     static scratch_t scratch;
4967    
4968     /********************************************************************
4969     * Support functions exposed to newcpu *
4970     ********************************************************************/
4971    
4972     static inline const char *str_on_off(bool b)
4973     {
4974     return b ? "on" : "off";
4975     }
4976    
4977     void compiler_init(void)
4978     {
4979     static bool initialized = false;
4980     if (initialized)
4981     return;
4982 gbeauche 1.24
4983 gbeauche 1.1 #if JIT_DEBUG
4984     // JIT debug mode ?
4985     JITDebug = PrefsFindBool("jitdebug");
4986     #endif
4987     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4988    
4989     #ifdef USE_JIT_FPU
4990     // Use JIT compiler for FPU instructions ?
4991     avoid_fpu = !PrefsFindBool("jitfpu");
4992     #else
4993     // JIT FPU is always disabled
4994     avoid_fpu = true;
4995     #endif
4996     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4997    
4998     // Get size of the translation cache (in KB)
4999     cache_size = PrefsFindInt32("jitcachesize");
5000     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5001    
5002     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5003     raw_init_cpu();
5004 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
5005 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5006     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5007 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5008 gbeauche 1.1
5009     // Translation cache flush mechanism
5010     lazy_flush = PrefsFindBool("jitlazyflush");
5011     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5012     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5013    
5014     // Compiler features
5015     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5016     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5017     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5018 gbeauche 1.8 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
5019 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5020    
5021     // Build compiler tables
5022     build_comp();
5023    
5024     initialized = true;
5025    
5026 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
5027     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5028     #endif
5029    
5030 gbeauche 1.1 #if PROFILE_COMPILE_TIME
5031     write_log("<JIT compiler> : gather statistics on translation time\n");
5032     emul_start_time = clock();
5033     #endif
5034     }
5035    
5036     void compiler_exit(void)
5037     {
5038     #if PROFILE_COMPILE_TIME
5039     emul_end_time = clock();
5040     #endif
5041    
5042     // Deallocate translation cache
5043     if (compiled_code) {
5044 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5045 gbeauche 1.1 compiled_code = 0;
5046     }
5047 gbeauche 1.24
5048     // Deallocate popallspace
5049     if (popallspace) {
5050     vm_release(popallspace, POPALLSPACE_SIZE);
5051     popallspace = 0;
5052     }
5053 gbeauche 1.1
5054     #if PROFILE_COMPILE_TIME
5055     write_log("### Compile Block statistics\n");
5056     write_log("Number of calls to compile_block : %d\n", compile_count);
5057     uae_u32 emul_time = emul_end_time - emul_start_time;
5058     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5059     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5060     100.0*double(compile_time)/double(emul_time));
5061     write_log("\n");
5062     #endif
5063 gbeauche 1.9
5064     #if PROFILE_UNTRANSLATED_INSNS
5065     uae_u64 untranslated_count = 0;
5066     for (int i = 0; i < 65536; i++) {
5067     opcode_nums[i] = i;
5068     untranslated_count += raw_cputbl_count[i];
5069     }
5070     write_log("Sorting out untranslated instructions count...\n");
5071     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5072     write_log("\nRank Opc Count Name\n");
5073     for (int i = 0; i < untranslated_top_ten; i++) {
5074     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5075     struct instr *dp;
5076     struct mnemolookup *lookup;
5077     if (!count)
5078     break;
5079     dp = table68k + opcode_nums[i];
5080     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5081     ;
5082     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5083     }
5084     #endif
5085 gbeauche 1.28
5086     #if RECORD_REGISTER_USAGE
5087     int reg_count_ids[16];
5088     uint64 tot_reg_count = 0;
5089     for (int i = 0; i < 16; i++) {
5090     reg_count_ids[i] = i;
5091     tot_reg_count += reg_count[i];
5092     }
5093     qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5094     uint64 cum_reg_count = 0;
5095     for (int i = 0; i < 16; i++) {
5096     int r = reg_count_ids[i];
5097     cum_reg_count += reg_count[r];
5098     printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5099     reg_count[r],
5100     100.0*double(reg_count[r])/double(tot_reg_count),
5101     100.0*double(cum_reg_count)/double(tot_reg_count));
5102     }
5103     #endif
5104 gbeauche 1.1 }
5105    
5106     bool compiler_use_jit(void)
5107     {
5108     // Check for the "jit" prefs item
5109     if (!PrefsFindBool("jit"))
5110     return false;
5111    
5112     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5113     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5114     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5115     return false;
5116     }
5117    
5118     // FIXME: there are currently problems with JIT compilation and anything below a 68040
5119     if (CPUType < 4) {
5120     write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5121     return false;
5122     }
5123    
5124     return true;
5125     }
5126    
5127     void init_comp(void)
5128     {
5129     int i;
5130     uae_s8* cb=can_byte;
5131     uae_s8* cw=can_word;
5132     uae_s8* au=always_used;
5133    
5134 gbeauche 1.28 #if RECORD_REGISTER_USAGE
5135     for (i=0;i<16;i++)
5136     reg_count_local[i] = 0;
5137     #endif
5138    
5139 gbeauche 1.1 for (i=0;i<VREGS;i++) {
5140     live.state[i].realreg=-1;
5141     live.state[i].needflush=NF_SCRATCH;
5142     live.state[i].val=0;
5143     set_status(i,UNDEF);
5144     }
5145    
5146     for (i=0;i<VFREGS;i++) {
5147     live.fate[i].status=UNDEF;
5148     live.fate[i].realreg=-1;
5149     live.fate[i].needflush=NF_SCRATCH;
5150     }
5151    
5152     for (i=0;i<VREGS;i++) {
5153     if (i<16) { /* First 16 registers map to 68k registers */
5154     live.state[i].mem=((uae_u32*)&regs)+i;
5155     live.state[i].needflush=NF_TOMEM;
5156     set_status(i,INMEM);
5157     }
5158     else
5159     live.state[i].mem=scratch.regs+i;
5160     }
5161     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5162     live.state[PC_P].needflush=NF_TOMEM;
5163 gbeauche 1.24 set_const(PC_P,(uintptr)comp_pc_p);
5164 gbeauche 1.1
5165 gbeauche 1.24 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5166 gbeauche 1.1 live.state[FLAGX].needflush=NF_TOMEM;
5167     set_status(FLAGX,INMEM);
5168    
5169 gbeauche 1.24 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5170 gbeauche 1.1 live.state[FLAGTMP].needflush=NF_TOMEM;
5171     set_status(FLAGTMP,INMEM);
5172    
5173     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5174     set_status(NEXT_HANDLER,UNDEF);
5175    
5176     for (i=0;i<VFREGS;i++) {
5177     if (i<8) { /* First 8 registers map to 68k FPU registers */
5178     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5179     live.fate[i].needflush=NF_TOMEM;
5180     live.fate[i].status=INMEM;
5181     }
5182     else if (i==FP_RESULT) {
5183     live.fate[i].mem=(uae_u32*)(&fpu.result);
5184     live.fate[i].needflush=NF_TOMEM;
5185     live.fate[i].status=INMEM;
5186     }
5187     else
5188 gbeauche 1.25 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5189 gbeauche 1.1 }
5190    
5191    
5192     for (i=0;i<N_REGS;i++) {
5193     live.nat[i].touched=0;
5194     live.nat[i].nholds=0;
5195     live.nat[i].locked=0;
5196     if (*cb==i) {
5197     live.nat[i].canbyte=1; cb++;
5198     } else live.nat[i].canbyte=0;
5199     if (*cw==i) {
5200     live.nat[i].canword=1; cw++;
5201     } else live.nat[i].canword=0;
5202     if (*au==i) {
5203     live.nat[i].locked=1; au++;
5204     }
5205     }
5206    
5207     for (i=0;i<N_FREGS;i++) {
5208     live.fat[i].touched=0;
5209     live.fat[i].nholds=0;
5210     live.fat[i].locked=0;
5211     }
5212    
5213     touchcnt=1;
5214     m68k_pc_offset=0;
5215     live.flags_in_flags=TRASH;
5216     live.flags_on_stack=VALID;
5217     live.flags_are_important=1;
5218    
5219     raw_fp_init();
5220     }
5221    
5222     /* Only do this if you really mean it! The next call should be to init!*/
5223     void flush(int save_regs)
5224     {
5225     int fi,i;
5226    
5227     log_flush();
5228     flush_flags(); /* low level */
5229     sync_m68k_pc(); /* mid level */
5230    
5231     if (save_regs) {
5232     for (i=0;i<VFREGS;i++) {
5233     if (live.fate[i].needflush==NF_SCRATCH ||
5234     live.fate[i].status==CLEAN) {
5235     f_disassociate(i);
5236     }
5237     }
5238     for (i=0;i<VREGS;i++) {
5239     if (live.state[i].needflush==NF_TOMEM) {
5240     switch(live.state[i].status) {
5241     case INMEM:
5242     if (live.state[i].val) {
5243 gbeauche 1.24 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5244 gbeauche 1.1 log_vwrite(i);
5245     live.state[i].val=0;
5246     }
5247     break;
5248     case CLEAN:
5249     case DIRTY:
5250     remove_offset(i,-1); tomem(i); break;
5251     case ISCONST:
5252     if (i!=PC_P)
5253     writeback_const(i);
5254     break;
5255     default: break;
5256     }
5257     Dif (live.state[i].val && i!=PC_P) {
5258     write_log("Register %d still has val %x\n",
5259     i,live.state[i].val);
5260     }
5261     }
5262     }
5263     for (i=0;i<VFREGS;i++) {
5264     if (live.fate[i].needflush==NF_TOMEM &&
5265     live.fate[i].status==DIRTY) {
5266     f_evict(i);
5267     }
5268     }
5269     raw_fp_cleanup_drop();
5270     }
5271     if (needflags) {
5272     write_log("Warning! flush with needflags=1!\n");
5273     }
5274     }
5275    
5276     static void flush_keepflags(void)
5277     {
5278     int fi,i;
5279    
5280     for (i=0;i<VFREGS;i++) {
5281     if (live.fate[i].needflush==NF_SCRATCH ||
5282     live.fate[i].status==CLEAN) {
5283     f_disassociate(i);
5284     }
5285     }
5286     for (i=0;i<VREGS;i++) {
5287     if (live.state[i].needflush==NF_TOMEM) {
5288     switch(live.state[i].status) {
5289     case INMEM:
5290     /* Can't adjust the offset here --- that needs "add" */
5291     break;
5292     case CLEAN:
5293     case DIRTY:
5294     remove_offset(i,-1); tomem(i); break;
5295     case ISCONST:
5296     if (i!=PC_P)
5297     writeback_const(i);
5298     break;
5299     default: break;
5300     }
5301     }
5302     }
5303     for (i=0;i<VFREGS;i++) {
5304     if (live.fate[i].needflush==NF_TOMEM &&
5305     live.fate[i].status==DIRTY) {
5306     f_evict(i);
5307     }
5308     }
5309     raw_fp_cleanup_drop();
5310     }
5311    
5312     void freescratch(void)
5313     {
5314     int i;
5315     for (i=0;i<N_REGS;i++)
5316     if (live.nat[i].locked && i!=4)
5317     write_log("Warning! %d is locked\n",i);
5318    
5319     for (i=0;i<VREGS;i++)
5320     if (live.state[i].needflush==NF_SCRATCH) {
5321     forget_about(i);
5322     }
5323    
5324     for (i=0;i<VFREGS;i++)
5325     if (live.fate[i].needflush==NF_SCRATCH) {
5326     f_forget_about(i);
5327     }
5328     }
5329    
5330     /********************************************************************
5331     * Support functions, internal *
5332     ********************************************************************/
5333    
5334    
5335     static void align_target(uae_u32 a)
5336     {
5337 gbeauche 1.14 if (!a)
5338     return;
5339    
5340 gbeauche 1.12 if (tune_nop_fillers)
5341 gbeauche 1.24 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5342 gbeauche 1.12 else {
5343     /* Fill with NOPs --- makes debugging with gdb easier */
5344 gbeauche 1.24 while ((uintptr)target&(a-1))
5345 gbeauche 1.12 *target++=0x90;
5346     }
5347 gbeauche 1.1 }
5348    
5349     static __inline__ int isinrom(uintptr addr)
5350     {
5351     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5352     }
5353    
5354     static void flush_all(void)
5355     {
5356     int i;
5357    
5358     log_flush();
5359     for (i=0;i<VREGS;i++)
5360     if (live.state[i].status==DIRTY) {
5361     if (!call_saved[live.state[i].realreg]) {
5362     tomem(i);
5363     }
5364     }
5365     for (i=0;i<VFREGS;i++)
5366     if (f_isinreg(i))
5367     f_evict(i);
5368     raw_fp_cleanup_drop();
5369     }
5370    
5371     /* Make sure all registers that will get clobbered by a call are
5372     save and sound in memory */
5373     static void prepare_for_call_1(void)
5374     {
5375     flush_all(); /* If there are registers that don't get clobbered,
5376     * we should be a bit more selective here */
5377     }
5378    
5379     /* We will call a C routine in a moment. That will clobber all registers,
5380     so we need to disassociate everything */
5381     static void prepare_for_call_2(void)
5382     {
5383     int i;
5384     for (i=0;i<N_REGS;i++)
5385     if (!call_saved[i] && live.nat[i].nholds>0)
5386     free_nreg(i);
5387    
5388     for (i=0;i<N_FREGS;i++)
5389     if (live.fat[i].nholds>0)
5390     f_free_nreg(i);
5391    
5392     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5393     flags at the very start of the call_r
5394     functions! */
5395     }
5396    
5397     /********************************************************************
5398     * Memory access and related functions, CREATE time *
5399     ********************************************************************/
5400    
5401     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5402     {
5403     next_pc_p=not_taken;
5404     taken_pc_p=taken;
5405     branch_cc=cond;
5406     }
5407    
5408    
5409     static uae_u32 get_handler_address(uae_u32 addr)
5410     {
5411     uae_u32 cl=cacheline(addr);
5412 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5413     return (uintptr)&(bi->direct_handler_to_use);
5414 gbeauche 1.1 }
5415    
5416     static uae_u32 get_handler(uae_u32 addr)
5417     {
5418     uae_u32 cl=cacheline(addr);
5419 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5420     return (uintptr)bi->direct_handler_to_use;
5421 gbeauche 1.1 }
5422    
5423     static void load_handler(int reg, uae_u32 addr)
5424     {
5425     mov_l_rm(reg,get_handler_address(addr));
5426     }
5427    
5428     /* This version assumes that it is writing *real* memory, and *will* fail
5429     * if that assumption is wrong! No branches, no second chances, just
5430     * straight go-for-it attitude */
5431    
5432 gbeauche 1.24 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5433 gbeauche 1.1 {
5434     int f=tmp;
5435    
5436     if (clobber)
5437     f=source;
5438 gbeauche 1.24
5439     #if SIZEOF_VOID_P == 8
5440 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5441     sign_extend_32_rr(address, address);
5442 gbeauche 1.24 #endif
5443    
5444 gbeauche 1.1 switch(size) {
5445     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5446     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5447     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5448     }
5449     forget_about(tmp);
5450     forget_about(f);
5451     }
5452    
5453     void writebyte(int address, int source, int tmp)
5454     {
5455 gbeauche 1.24 writemem_real(address,source,1,tmp,0);
5456 gbeauche 1.1 }
5457    
5458     static __inline__ void writeword_general(int address, int source, int tmp,
5459     int clobber)
5460     {
5461 gbeauche 1.24 writemem_real(address,source,2,tmp,clobber);
5462 gbeauche 1.1 }
5463    
5464     void writeword_clobber(int address, int source, int tmp)
5465     {
5466     writeword_general(address,source,tmp,1);
5467     }
5468    
5469     void writeword(int address, int source, int tmp)
5470     {
5471     writeword_general(address,source,tmp,0);
5472     }
5473    
5474     static __inline__ void writelong_general(int address, int source, int tmp,
5475     int clobber)
5476     {
5477 gbeauche 1.24 writemem_real(address,source,4,tmp,clobber);
5478 gbeauche 1.1 }
5479    
5480     void writelong_clobber(int address, int source, int tmp)
5481     {
5482     writelong_general(address,source,tmp,1);
5483     }
5484    
5485     void writelong(int address, int source, int tmp)
5486     {
5487     writelong_general(address,source,tmp,0);
5488     }
5489    
5490    
5491    
5492     /* This version assumes that it is reading *real* memory, and *will* fail
5493     * if that assumption is wrong! No branches, no second chances, just
5494     * straight go-for-it attitude */
5495    
5496 gbeauche 1.24 static void readmem_real(int address, int dest, int size, int tmp)
5497 gbeauche 1.1 {
5498     int f=tmp;
5499    
5500     if (size==4 && address!=dest)
5501     f=dest;
5502    
5503 gbeauche 1.24 #if SIZEOF_VOID_P == 8
5504 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5505     sign_extend_32_rr(address, address);
5506 gbeauche 1.24 #endif
5507    
5508 gbeauche 1.1 switch(size) {
5509     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5510     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5511     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5512     }
5513     forget_about(tmp);
5514     }
5515    
5516     void readbyte(int address, int dest, int tmp)
5517     {
5518 gbeauche 1.24 readmem_real(address,dest,1,tmp);
5519 gbeauche 1.1 }
5520    
5521     void readword(int address, int dest, int tmp)
5522     {
5523 gbeauche 1.24 readmem_real(address,dest,2,tmp);
5524 gbeauche 1.1 }
5525    
5526     void readlong(int address, int dest, int tmp)
5527     {
5528 gbeauche 1.24 readmem_real(address,dest,4,tmp);
5529 gbeauche 1.1 }
5530    
5531     void get_n_addr(int address, int dest, int tmp)
5532     {
5533     // a is the register containing the virtual address
5534     // after the offset had been fetched
5535     int a=tmp;
5536    
5537     // f is the register that will contain the offset
5538     int f=tmp;
5539    
5540     // a == f == tmp if (address == dest)
5541     if (address!=dest) {
5542     a=address;
5543     f=dest;
5544     }
5545    
5546     #if REAL_ADDRESSING
5547     mov_l_rr(dest, address);
5548     #elif DIRECT_ADDRESSING
5549     lea_l_brr(dest,address,MEMBaseDiff);
5550     #endif
5551     forget_about(tmp);
5552     }
5553    
5554     void get_n_addr_jmp(int address, int dest, int tmp)
5555     {
5556     /* For this, we need to get the same address as the rest of UAE
5557     would --- otherwise we end up translating everything twice */
5558     get_n_addr(address,dest,tmp);
5559     }
5560    
5561    
5562     /* base is a register, but dp is an actual value.
5563     target is a register, as is tmp */
5564     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5565     {
5566     int reg = (dp >> 12) & 15;
5567     int regd_shift=(dp >> 9) & 3;
5568    
5569     if (dp & 0x100) {
5570     int ignorebase=(dp&0x80);
5571     int ignorereg=(dp&0x40);
5572     int addbase=0;
5573     int outer=0;
5574    
5575     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5576     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5577    
5578     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5579     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5580    
5581     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5582     if (!ignorereg) {
5583     if ((dp & 0x800) == 0)
5584     sign_extend_16_rr(target,reg);
5585     else
5586     mov_l_rr(target,reg);
5587     shll_l_ri(target,regd_shift);
5588     }
5589     else
5590     mov_l_ri(target,0);
5591    
5592     /* target is now regd */
5593     if (!ignorebase)
5594     add_l(target,base);
5595     add_l_ri(target,addbase);
5596     if (dp&0x03) readlong(target,target,tmp);
5597     } else { /* do the getlong first, then add regd */
5598     if (!ignorebase) {
5599     mov_l_rr(target,base);
5600     add_l_ri(target,addbase);
5601     }
5602     else
5603     mov_l_ri(target,addbase);
5604     if (dp&0x03) readlong(target,target,tmp);
5605    
5606     if (!ignorereg) {
5607     if ((dp & 0x800) == 0)
5608     sign_extend_16_rr(tmp,reg);
5609     else
5610     mov_l_rr(tmp,reg);
5611     shll_l_ri(tmp,regd_shift);
5612     /* tmp is now regd */
5613     add_l(target,tmp);
5614     }
5615     }
5616     add_l_ri(target,outer);
5617     }
5618     else { /* 68000 version */
5619     if ((dp & 0x800) == 0) { /* Sign extend */
5620     sign_extend_16_rr(target,reg);
5621     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5622     }
5623     else {
5624     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5625     }
5626     }
5627     forget_about(tmp);
5628     }
5629    
5630    
5631    
5632    
5633    
5634     void set_cache_state(int enabled)
5635     {
5636     if (enabled!=letit)
5637     flush_icache_hard(77);
5638     letit=enabled;
5639     }
5640    
5641     int get_cache_state(void)
5642     {
5643     return letit;
5644     }
5645    
5646     uae_u32 get_jitted_size(void)
5647     {
5648     if (compiled_code)
5649     return current_compile_p-compiled_code;
5650     return 0;
5651     }
5652    
5653 gbeauche 1.20 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5654     const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5655    
5656     static uint8 *do_alloc_code(uint32 size, int depth)
5657     {
5658     #if defined(__linux__) && 0
5659     /*
5660     This is a really awful hack that is known to work on Linux at
5661     least.
5662    
5663     The trick here is to make sure the allocated cache is nearby
5664     code segment, and more precisely in the positive half of a
5665     32-bit address space. i.e. addr < 0x80000000. Actually, it
5666     turned out that a 32-bit binary run on AMD64 yields a cache
5667     allocated around 0xa0000000, thus causing some troubles when
5668     translating addresses from m68k to x86.
5669     */
5670     static uint8 * code_base = NULL;
5671     if (code_base == NULL) {
5672     uintptr page_size = getpagesize();
5673     uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5674     if (boundaries < page_size)
5675     boundaries = page_size;
5676     code_base = (uint8 *)sbrk(0);
5677     for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5678     if (vm_acquire_fixed(code_base, size) == 0) {
5679     uint8 *code = code_base;
5680     code_base += size;
5681     return code;
5682     }
5683     code_base += boundaries;
5684     }
5685     return NULL;
5686     }
5687    
5688     if (vm_acquire_fixed(code_base, size) == 0) {
5689     uint8 *code = code_base;
5690     code_base += size;
5691     return code;
5692     }
5693    
5694     if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5695     return NULL;
5696    
5697     return do_alloc_code(size, depth + 1);
5698     #else
5699     uint8 *code = (uint8 *)vm_acquire(size);
5700     return code == VM_MAP_FAILED ? NULL : code;
5701     #endif
5702     }
5703    
5704     static inline uint8 *alloc_code(uint32 size)
5705     {
5706     return do_alloc_code(size, 0);
5707     }
5708    
5709 gbeauche 1.1 void alloc_cache(void)
5710     {
5711     if (compiled_code) {
5712     flush_icache_hard(6);
5713 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5714 gbeauche 1.1 compiled_code = 0;
5715     }
5716    
5717     if (cache_size == 0)
5718     return;
5719    
5720     while (!compiled_code && cache_size) {
5721 gbeauche 1.20 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5722 gbeauche 1.1 compiled_code = 0;
5723     cache_size /= 2;
5724     }
5725     }
5726 gbeauche 1.25 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5727 gbeauche 1.1
5728     if (compiled_code) {
5729     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5730     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5731     current_compile_p = compiled_code;
5732     current_cache_size = 0;
5733     }
5734     }
5735    
5736    
5737    
5738 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5739 gbeauche 1.1
5740 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5741 gbeauche 1.1 {
5742 gbeauche 1.8 uae_u32 k1 = 0;
5743     uae_u32 k2 = 0;
5744    
5745     #if USE_CHECKSUM_INFO
5746     checksum_info *csi = bi->csi;
5747     Dif(!csi) abort();
5748     while (csi) {
5749     uae_s32 len = csi->length;
5750 gbeauche 1.24 uintptr tmp = (uintptr)csi->start_p;
5751 gbeauche 1.8 #else
5752     uae_s32 len = bi->len;
5753 gbeauche 1.24 uintptr tmp = (uintptr)bi->min_pcp;
5754 gbeauche 1.8 #endif
5755     uae_u32*pos;
5756 gbeauche 1.1
5757 gbeauche 1.8 len += (tmp & 3);
5758 gbeauche 1.24 tmp &= ~((uintptr)3);
5759 gbeauche 1.8 pos = (uae_u32 *)tmp;
5760    
5761     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5762     while (len > 0) {
5763     k1 += *pos;
5764     k2 ^= *pos;
5765     pos++;
5766     len -= 4;
5767     }
5768     }
5769 gbeauche 1.1
5770 gbeauche 1.8 #if USE_CHECKSUM_INFO
5771     csi = csi->next;
5772 gbeauche 1.1 }
5773 gbeauche 1.8 #endif
5774    
5775     *c1 = k1;
5776     *c2 = k2;
5777 gbeauche 1.1 }
5778    
5779 gbeauche 1.8 #if 0
5780 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5781 gbeauche 1.1 {
5782     uae_u32 k1=0;
5783     uae_u32 k2=0;
5784 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5785 gbeauche 1.24 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5786 gbeauche 1.1 uae_u32* pos;
5787    
5788     len+=(tmp&3);
5789     tmp&=(~3);
5790     pos=(uae_u32*)tmp;
5791    
5792     if (len<0 || len>MAX_CHECKSUM_LEN) {
5793     return;
5794     }
5795     else {
5796     while (len>0) {
5797     write_log("%08x ",*pos);
5798     pos++;
5799     len-=4;
5800     }
5801     write_log(" bla\n");
5802     }
5803     }
5804 gbeauche 1.8 #endif
5805 gbeauche 1.1
5806    
5807     int check_for_cache_miss(void)
5808     {
5809     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5810    
5811     if (bi) {
5812     int cl=cacheline(regs.pc_p);
5813     if (bi!=cache_tags[cl+1].bi) {
5814     raise_in_cl_list(bi);
5815     return 1;
5816     }
5817     }
5818     return 0;
5819     }
5820    
5821    
5822     static void recompile_block(void)
5823     {
5824     /* An existing block's countdown code has expired. We need to make
5825     sure that execute_normal doesn't refuse to recompile due to a
5826     perceived cache miss... */
5827     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5828    
5829     Dif (!bi)
5830     abort();
5831     raise_in_cl_list(bi);
5832     execute_normal();
5833     return;
5834     }
5835     static void cache_miss(void)
5836     {
5837     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5838     uae_u32 cl=cacheline(regs.pc_p);
5839     blockinfo* bi2=get_blockinfo(cl);
5840    
5841     if (!bi) {
5842     execute_normal(); /* Compile this block now */
5843     return;
5844     }
5845     Dif (!bi2 || bi==bi2) {
5846     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5847     abort();
5848     }
5849     raise_in_cl_list(bi);
5850     return;
5851     }
5852    
5853     static int called_check_checksum(blockinfo* bi);
5854    
5855     static inline int block_check_checksum(blockinfo* bi)
5856     {
5857     uae_u32 c1,c2;
5858 gbeauche 1.7 bool isgood;
5859 gbeauche 1.1
5860     if (bi->status!=BI_NEED_CHECK)
5861     return 1; /* This block is in a checked state */
5862    
5863     checksum_count++;
5864 gbeauche 1.7
5865 gbeauche 1.1 if (bi->c1 || bi->c2)
5866     calc_checksum(bi,&c1,&c2);
5867     else {
5868     c1=c2=1; /* Make sure it doesn't match */
5869 gbeauche 1.7 }
5870 gbeauche 1.1
5871     isgood=(c1==bi->c1 && c2==bi->c2);
5872 gbeauche 1.7
5873 gbeauche 1.1 if (isgood) {
5874     /* This block is still OK. So we reactivate. Of course, that
5875     means we have to move it into the needs-to-be-flushed list */
5876     bi->handler_to_use=bi->handler;
5877     set_dhtu(bi,bi->direct_handler);
5878     bi->status=BI_CHECKING;
5879     isgood=called_check_checksum(bi);
5880     }
5881     if (isgood) {
5882     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5883     c1,c2,bi->c1,bi->c2);*/
5884     remove_from_list(bi);
5885     add_to_active(bi);
5886     raise_in_cl_list(bi);
5887     bi->status=BI_ACTIVE;
5888     }
5889     else {
5890     /* This block actually changed. We need to invalidate it,
5891     and set it up to be recompiled */
5892     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5893     c1,c2,bi->c1,bi->c2); */
5894     invalidate_block(bi);
5895     raise_in_cl_list(bi);
5896     }
5897     return isgood;
5898     }
5899    
5900     static int called_check_checksum(blockinfo* bi)
5901     {
5902     dependency* x=bi->deplist;
5903     int isgood=1;
5904     int i;
5905    
5906     for (i=0;i<2 && isgood;i++) {
5907     if (bi->dep[i].jmp_off) {
5908     isgood=block_check_checksum(bi->dep[i].target);
5909     }
5910     }
5911     return isgood;
5912     }
5913    
5914     static void check_checksum(void)
5915     {
5916     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5917     uae_u32 cl=cacheline(regs.pc_p);
5918     blockinfo* bi2=get_blockinfo(cl);
5919    
5920     /* These are not the droids you are looking for... */
5921     if (!bi) {
5922     /* Whoever is the primary target is in a dormant state, but
5923     calling it was accidental, and we should just compile this
5924     new block */
5925     execute_normal();
5926     return;
5927     }
5928     if (bi!=bi2) {
5929     /* The block was hit accidentally, but it does exist. Cache miss */
5930     cache_miss();
5931     return;
5932     }
5933    
5934     if (!block_check_checksum(bi))
5935     execute_normal();
5936     }
5937    
5938     static __inline__ void match_states(blockinfo* bi)
5939     {
5940     int i;
5941     smallstate* s=&(bi->env);
5942    
5943     if (bi->status==BI_NEED_CHECK) {
5944     block_check_checksum(bi);
5945     }
5946     if (bi->status==BI_ACTIVE ||
5947     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5948     block makes (about not using
5949     certain vregs) */
5950     for (i=0;i<16;i++) {
5951     if (s->virt[i]==L_UNNEEDED) {
5952     // write_log("unneeded reg %d at %p\n",i,target);
5953     COMPCALL(forget_about)(i); // FIXME
5954     }
5955     }
5956     }
5957     flush(1);
5958    
5959     /* And now deal with the *demands* the block makes */
5960     for (i=0;i<N_REGS;i++) {
5961     int v=s->nat[i];
5962     if (v>=0) {
5963     // printf("Loading reg %d into %d at %p\n",v,i,target);
5964     readreg_specific(v,4,i);
5965     // do_load_reg(i,v);
5966     // setlock(i);
5967     }
5968     }
5969     for (i=0;i<N_REGS;i++) {
5970     int v=s->nat[i];
5971     if (v>=0) {
5972     unlock2(i);
5973     }
5974     }
5975     }
5976    
5977     static __inline__ void create_popalls(void)
5978     {
5979     int i,r;
5980    
5981 gbeauche 1.24 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5982     write_log("FATAL: Could not allocate popallspace!\n");
5983     abort();
5984     }
5985     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5986    
5987 gbeauche 1.1 current_compile_p=popallspace;
5988     set_target(current_compile_p);
5989     #if USE_PUSH_POP
5990     /* If we can't use gcc inline assembly, we need to pop some
5991     registers before jumping back to the various get-out routines.
5992     This generates the code for it.
5993     */
5994 gbeauche 1.5 align_target(align_jumps);
5995     popall_do_nothing=get_target();
5996 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
5997     if (need_to_preserve[i])
5998     raw_pop_l_r(i);
5999     }
6000 gbeauche 1.24 raw_jmp((uintptr)do_nothing);
6001 gbeauche 1.1
6002 gbeauche 1.5 align_target(align_jumps);
6003 gbeauche 1.1 popall_execute_normal=get_target();
6004     for (i=0;i<N_REGS;i++) {
6005     if (need_to_preserve[i])
6006     raw_pop_l_r(i);
6007     }
6008 gbeauche 1.24 raw_jmp((uintptr)execute_normal);
6009 gbeauche 1.1
6010 gbeauche 1.5 align_target(align_jumps);
6011 gbeauche 1.1 popall_cache_miss=get_target();
6012     for (i=0;i<N_REGS;i++) {
6013     if (need_to_preserve[i])
6014     raw_pop_l_r(i);
6015     }
6016 gbeauche 1.24 raw_jmp((uintptr)cache_miss);
6017 gbeauche 1.1
6018 gbeauche 1.5 align_target(align_jumps);
6019 gbeauche 1.1 popall_recompile_block=get_target();
6020     for (i=0;i<N_REGS;i++) {
6021     if (need_to_preserve[i])
6022     raw_pop_l_r(i);
6023     }
6024 gbeauche 1.24 raw_jmp((uintptr)recompile_block);
6025 gbeauche 1.5
6026     align_target(align_jumps);
6027 gbeauche 1.1 popall_exec_nostats=get_target();
6028     for (i=0;i<N_REGS;i++) {
6029     if (need_to_preserve[i])
6030     raw_pop_l_r(i);
6031     }
6032 gbeauche 1.24 raw_jmp((uintptr)exec_nostats);
6033 gbeauche 1.5
6034     align_target(align_jumps);
6035 gbeauche 1.1 popall_check_checksum=get_target();
6036     for (i=0;i<N_REGS;i++) {
6037     if (need_to_preserve[i])
6038     raw_pop_l_r(i);
6039     }
6040 gbeauche 1.24 raw_jmp((uintptr)check_checksum);
6041 gbeauche 1.5
6042     align_target(align_jumps);
6043 gbeauche 1.1 current_compile_p=get_target();
6044     #else
6045     popall_exec_nostats=(void *)exec_nostats;
6046     popall_execute_normal=(void *)execute_normal;
6047     popall_cache_miss=(void *)cache_miss;
6048     popall_recompile_block=(void *)recompile_block;
6049     popall_do_nothing=(void *)do_nothing;
6050     popall_check_checksum=(void *)check_checksum;
6051     #endif
6052    
6053     /* And now, the code to do the matching pushes and then jump
6054     into a handler routine */
6055     pushall_call_handler=get_target();
6056     #if USE_PUSH_POP
6057     for (i=N_REGS;i--;) {
6058     if (need_to_preserve[i])
6059     raw_push_l_r(i);
6060     }
6061     #endif
6062     r=REG_PC_TMP;
6063 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6064 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6065 gbeauche 1.24 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6066 gbeauche 1.6
6067 gbeauche 1.24 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
6068 gbeauche 1.6 align_target(align_jumps);
6069     m68k_compile_execute = (void (*)(void))get_target();
6070     for (i=N_REGS;i--;) {
6071     if (need_to_preserve[i])
6072     raw_push_l_r(i);
6073     }
6074     align_target(align_loops);
6075 gbeauche 1.24 uae_u32 dispatch_loop = (uintptr)get_target();
6076 gbeauche 1.6 r=REG_PC_TMP;
6077 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6078 gbeauche 1.6 raw_and_l_ri(r,TAGMASK);
6079 gbeauche 1.24 raw_call_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6080     raw_cmp_l_mi((uintptr)&regs.spcflags,0);
6081 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6082 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6083     raw_call((uintptr)m68k_do_specialties);
6084 gbeauche 1.6 raw_test_l_rr(REG_RESULT,REG_RESULT);
6085     raw_jcc_b_oponly(NATIVE_CC_EQ);
6086 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6087     raw_cmp_b_mi((uintptr)&quit_program,0);
6088 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6089 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6090 gbeauche 1.6 for (i=0;i<N_REGS;i++) {
6091     if (need_to_preserve[i])
6092     raw_pop_l_r(i);
6093     }
6094     raw_ret();
6095     #endif
6096 gbeauche 1.24
6097     // no need to further write into popallspace
6098     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6099 gbeauche 1.1 }
6100    
6101     static __inline__ void reset_lists(void)
6102     {
6103     int i;
6104    
6105     for (i=0;i<MAX_HOLD_BI;i++)
6106     hold_bi[i]=NULL;
6107     active=NULL;
6108     dormant=NULL;
6109     }
6110    
6111     static void prepare_block(blockinfo* bi)
6112     {
6113     int i;
6114    
6115     set_target(current_compile_p);
6116 gbeauche 1.5 align_target(align_jumps);
6117 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
6118 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6119     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6120     raw_jmp((uintptr)popall_execute_normal);
6121 gbeauche 1.1
6122 gbeauche 1.5 align_target(align_jumps);
6123 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
6124 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6125     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6126     raw_jmp((uintptr)popall_check_checksum);
6127 gbeauche 1.1 current_compile_p=get_target();
6128    
6129     bi->deplist=NULL;
6130     for (i=0;i<2;i++) {
6131     bi->dep[i].prev_p=NULL;
6132     bi->dep[i].next=NULL;
6133     }
6134     bi->env=default_ss;
6135     bi->status=BI_INVALID;
6136     bi->havestate=0;
6137     //bi->env=empty_ss;
6138     }
6139    
6140 gbeauche 1.21 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6141     static inline void reset_compop(int opcode)
6142 gbeauche 1.17 {
6143 gbeauche 1.21 compfunctbl[opcode] = NULL;
6144     nfcompfunctbl[opcode] = NULL;
6145     }
6146    
6147     static int read_opcode(const char *p)
6148     {
6149     int opcode = 0;
6150     for (int i = 0; i < 4; i++) {
6151     int op = p[i];
6152     switch (op) {
6153     case '0': case '1': case '2': case '3': case '4':
6154     case '5': case '6': case '7': case '8': case '9':
6155     opcode = (opcode << 4) | (op - '0');
6156     break;
6157     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6158     opcode = (opcode << 4) | ((op - 'a') + 10);
6159     break;
6160     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6161     opcode = (opcode << 4) | ((op - 'A') + 10);
6162     break;
6163     default:
6164     return -1;
6165     }
6166     }
6167     return opcode;
6168     }
6169    
6170     static bool merge_blacklist()
6171     {
6172     const char *blacklist = PrefsFindString("jitblacklist");
6173     if (blacklist) {
6174     const char *p = blacklist;
6175     for (;;) {
6176     if (*p == 0)
6177     return true;
6178    
6179     int opcode1 = read_opcode(p);
6180     if (opcode1 < 0)
6181     return false;
6182     p += 4;
6183    
6184     int opcode2 = opcode1;
6185     if (*p == '-') {
6186     p++;
6187     opcode2 = read_opcode(p);
6188     if (opcode2 < 0)
6189     return false;
6190     p += 4;
6191     }
6192    
6193     if (*p == 0 || *p == ';') {
6194     write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6195     for (int opcode = opcode1; opcode <= opcode2; opcode++)
6196     reset_compop(cft_map(opcode));
6197    
6198     if (*p++ == ';')
6199     continue;
6200    
6201     return true;
6202     }
6203    
6204     return false;
6205     }
6206     }
6207     return true;
6208 gbeauche 1.17 }
6209    
6210 gbeauche 1.1 void build_comp(void)
6211     {
6212     int i;
6213     int jumpcount=0;
6214     unsigned long opcode;
6215     struct comptbl* tbl=op_smalltbl_0_comp_ff;
6216     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6217     int count;
6218     int cpu_level = 0; // 68000 (default)
6219     if (CPUType == 4)
6220     cpu_level = 4; // 68040 with FPU
6221     else {
6222     if (FPUType)
6223     cpu_level = 3; // 68020 with FPU
6224     else if (CPUType >= 2)
6225     cpu_level = 2; // 68020
6226     else if (CPUType == 1)
6227     cpu_level = 1;
6228     }
6229     struct cputbl *nfctbl = (
6230     cpu_level == 4 ? op_smalltbl_0_nf
6231     : cpu_level == 3 ? op_smalltbl_1_nf
6232     : cpu_level == 2 ? op_smalltbl_2_nf
6233     : cpu_level == 1 ? op_smalltbl_3_nf
6234     : op_smalltbl_4_nf);
6235    
6236     write_log ("<JIT compiler> : building compiler function tables\n");
6237    
6238     for (opcode = 0; opcode < 65536; opcode++) {
6239 gbeauche 1.21 reset_compop(opcode);
6240 gbeauche 1.1 nfcpufunctbl[opcode] = op_illg_1;
6241     prop[opcode].use_flags = 0x1f;
6242     prop[opcode].set_flags = 0x1f;
6243     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6244     }
6245    
6246     for (i = 0; tbl[i].opcode < 65536; i++) {
6247     int cflow = table68k[tbl[i].opcode].cflow;
6248 gbeauche 1.10 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6249     cflow = fl_const_jump;
6250 gbeauche 1.8 else
6251 gbeauche 1.10 cflow &= ~fl_const_jump;
6252     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6253 gbeauche 1.1
6254     int uses_fpu = tbl[i].specific & 32;
6255 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6256 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6257     else
6258     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6259     }
6260 gbeauche 1.8
6261 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6262     int uses_fpu = tbl[i].specific & 32;
6263 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6264 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6265     else
6266     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6267    
6268     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6269     }
6270    
6271     for (i = 0; nfctbl[i].handler; i++) {
6272     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6273     }
6274    
6275     for (opcode = 0; opcode < 65536; opcode++) {
6276     compop_func *f;
6277     compop_func *nff;
6278     cpuop_func *nfcf;
6279     int isaddx,cflow;
6280    
6281     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6282     continue;
6283    
6284     if (table68k[opcode].handler != -1) {
6285     f = compfunctbl[cft_map(table68k[opcode].handler)];
6286     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6287     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6288     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6289     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6290     prop[cft_map(opcode)].cflow = cflow;
6291     prop[cft_map(opcode)].is_addx = isaddx;
6292     compfunctbl[cft_map(opcode)] = f;
6293     nfcompfunctbl[cft_map(opcode)] = nff;
6294     Dif (nfcf == op_illg_1)
6295     abort();
6296     nfcpufunctbl[cft_map(opcode)] = nfcf;
6297     }
6298     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6299     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6300     }
6301     for (i = 0; nfctbl[i].handler != NULL; i++) {
6302     if (nfctbl[i].specific)
6303     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6304     }
6305 gbeauche 1.21
6306     /* Merge in blacklist */
6307     if (!merge_blacklist())
6308     write_log("<JIT compiler> : blacklist merge failure!\n");
6309 gbeauche 1.1
6310     count=0;
6311     for (opcode = 0; opcode < 65536; opcode++) {
6312     if (compfunctbl[cft_map(opcode)])
6313     count++;
6314     }
6315     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6316    
6317     /* Initialise state */
6318     create_popalls();
6319     alloc_cache();
6320     reset_lists();
6321    
6322     for (i=0;i<TAGSIZE;i+=2) {
6323     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6324     cache_tags[i+1].bi=NULL;
6325     }
6326    
6327     #if 0
6328     for (i=0;i<N_REGS;i++) {
6329     empty_ss.nat[i].holds=-1;
6330     empty_ss.nat[i].validsize=0;
6331     empty_ss.nat[i].dirtysize=0;
6332     }
6333     #endif
6334     for (i=0;i<VREGS;i++) {
6335     empty_ss.virt[i]=L_NEEDED;
6336     }
6337     for (i=0;i<N_REGS;i++) {
6338     empty_ss.nat[i]=L_UNKNOWN;
6339     }
6340     default_ss=empty_ss;
6341     }
6342    
6343    
6344     static void flush_icache_none(int n)
6345     {
6346     /* Nothing to do. */
6347     }
6348    
6349     static void flush_icache_hard(int n)
6350     {
6351     uae_u32 i;
6352     blockinfo* bi, *dbi;
6353    
6354     hard_flush_count++;
6355     #if 0
6356     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6357     n,regs.pc,regs.pc_p,current_cache_size/1024);
6358     current_cache_size = 0;
6359     #endif
6360     bi=active;
6361     while(bi) {
6362     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6363     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6364     dbi=bi; bi=bi->next;
6365     free_blockinfo(dbi);
6366     }
6367     bi=dormant;
6368     while(bi) {
6369     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6370     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6371     dbi=bi; bi=bi->next;
6372     free_blockinfo(dbi);
6373     }
6374    
6375     reset_lists();
6376     if (!compiled_code)
6377     return;
6378     current_compile_p=compiled_code;
6379     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6380     }
6381    
6382    
6383     /* "Soft flushing" --- instead of actually throwing everything away,
6384     we simply mark everything as "needs to be checked".
6385     */
6386    
6387     static inline void flush_icache_lazy(int n)
6388     {
6389     uae_u32 i;
6390     blockinfo* bi;
6391     blockinfo* bi2;
6392    
6393     soft_flush_count++;
6394     if (!active)
6395     return;
6396    
6397     bi=active;
6398     while (bi) {
6399     uae_u32 cl=cacheline(bi->pc_p);
6400     if (bi->status==BI_INVALID ||
6401     bi->status==BI_NEED_RECOMP) {
6402     if (bi==cache_tags[cl+1].bi)
6403     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6404     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6405     set_dhtu(bi,bi->direct_pen);
6406     bi->status=BI_INVALID;
6407     }
6408     else {
6409     if (bi==cache_tags[cl+1].bi)
6410     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6411     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6412     set_dhtu(bi,bi->direct_pcc);
6413     bi->status=BI_NEED_CHECK;
6414     }
6415     bi2=bi;
6416     bi=bi->next;
6417     }
6418     /* bi2 is now the last entry in the active list */
6419     bi2->next=dormant;
6420     if (dormant)
6421     dormant->prev_p=&(bi2->next);
6422    
6423     dormant=active;
6424     active->prev_p=&dormant;
6425     active=NULL;
6426 gbeauche 1.22 }
6427    
6428     void flush_icache_range(uae_u32 start, uae_u32 length)
6429     {
6430     if (!active)
6431     return;
6432    
6433     #if LAZY_FLUSH_ICACHE_RANGE
6434     uae_u8 *start_p = get_real_address(start);
6435     blockinfo *bi = active;
6436     while (bi) {
6437     #if USE_CHECKSUM_INFO
6438     bool invalidate = false;
6439     for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6440     invalidate = (((start_p - csi->start_p) < csi->length) ||
6441     ((csi->start_p - start_p) < length));
6442     #else
6443     // Assume system is consistent and would invalidate the right range
6444     const bool invalidate = (bi->pc_p - start_p) < length;
6445     #endif
6446     if (invalidate) {
6447     uae_u32 cl = cacheline(bi->pc_p);
6448     if (bi == cache_tags[cl + 1].bi)
6449     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6450     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6451     set_dhtu(bi, bi->direct_pen);
6452     bi->status = BI_NEED_RECOMP;
6453     }
6454     bi = bi->next;
6455     }
6456     return;
6457     #endif
6458     flush_icache(-1);
6459 gbeauche 1.1 }
6460    
6461     static void catastrophe(void)
6462     {
6463     abort();
6464     }
6465    
6466     int failure;
6467    
6468     #define TARGET_M68K 0
6469     #define TARGET_POWERPC 1
6470     #define TARGET_X86 2
6471 gbeauche 1.24 #define TARGET_X86_64 3
6472 gbeauche 1.1 #if defined(i386) || defined(__i386__)
6473     #define TARGET_NATIVE TARGET_X86
6474     #endif
6475     #if defined(powerpc) || defined(__powerpc__)
6476     #define TARGET_NATIVE TARGET_POWERPC
6477     #endif
6478 gbeauche 1.24 #if defined(x86_64) || defined(__x86_64__)
6479     #define TARGET_NATIVE TARGET_X86_64
6480     #endif
6481 gbeauche 1.1
6482     #ifdef ENABLE_MON
6483 gbeauche 1.24 static uae_u32 mon_read_byte_jit(uintptr addr)
6484 gbeauche 1.1 {
6485     uae_u8 *m = (uae_u8 *)addr;
6486 gbeauche 1.24 return (uintptr)(*m);
6487 gbeauche 1.1 }
6488    
6489 gbeauche 1.24 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6490 gbeauche 1.1 {
6491     uae_u8 *m = (uae_u8 *)addr;
6492     *m = b;
6493     }
6494     #endif
6495    
6496     void disasm_block(int target, uint8 * start, size_t length)
6497     {
6498     if (!JITDebug)
6499     return;
6500    
6501     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6502     char disasm_str[200];
6503     sprintf(disasm_str, "%s $%x $%x",
6504     target == TARGET_M68K ? "d68" :
6505     target == TARGET_X86 ? "d86" :
6506 gbeauche 1.24 target == TARGET_X86_64 ? "d8664" :
6507 gbeauche 1.1 target == TARGET_POWERPC ? "d" : "x",
6508     start, start + length - 1);
6509    
6510 gbeauche 1.24 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6511     void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6512 gbeauche 1.1
6513     mon_read_byte = mon_read_byte_jit;
6514     mon_write_byte = mon_write_byte_jit;
6515    
6516     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6517     mon(4, arg);
6518    
6519     mon_read_byte = old_mon_read_byte;
6520     mon_write_byte = old_mon_write_byte;
6521     #endif
6522     }
6523    
6524 gbeauche 1.24 static void disasm_native_block(uint8 *start, size_t length)
6525 gbeauche 1.1 {
6526     disasm_block(TARGET_NATIVE, start, length);
6527     }
6528    
6529 gbeauche 1.24 static void disasm_m68k_block(uint8 *start, size_t length)
6530 gbeauche 1.1 {
6531     disasm_block(TARGET_M68K, start, length);
6532     }
6533    
6534     #ifdef HAVE_GET_WORD_UNSWAPPED
6535     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6536     #else
6537     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6538     #endif
6539    
6540     #if JIT_DEBUG
6541     static uae_u8 *last_regs_pc_p = 0;
6542     static uae_u8 *last_compiled_block_addr = 0;
6543    
6544     void compiler_dumpstate(void)
6545     {
6546     if (!JITDebug)
6547     return;
6548    
6549     write_log("### Host addresses\n");
6550     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6551     write_log("PC_P : %p\n", &regs.pc_p);
6552     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6553     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6554     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6555     write_log("\n");
6556    
6557     write_log("### M68k processor state\n");
6558     m68k_dumpstate(0);
6559     write_log("\n");
6560    
6561     write_log("### Block in Mac address space\n");
6562     write_log("M68K block : %p\n",
6563 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6564 gbeauche 1.1 write_log("Native block : %p (%d bytes)\n",
6565 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6566 gbeauche 1.1 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6567     write_log("\n");
6568     }
6569     #endif
6570    
6571     static void compile_block(cpu_history* pc_hist, int blocklen)
6572     {
6573     if (letit && compiled_code) {
6574     #if PROFILE_COMPILE_TIME
6575     compile_count++;
6576     clock_t start_time = clock();
6577     #endif
6578     #if JIT_DEBUG
6579     bool disasm_block = false;
6580     #endif
6581    
6582     /* OK, here we need to 'compile' a block */
6583     int i;
6584     int r;
6585     int was_comp=0;
6586     uae_u8 liveflags[MAXRUN+1];
6587 gbeauche 1.8 #if USE_CHECKSUM_INFO
6588     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6589 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6590     uintptr min_pcp=max_pcp;
6591 gbeauche 1.8 #else
6592 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[0].location;
6593     uintptr min_pcp=max_pcp;
6594 gbeauche 1.8 #endif
6595 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6596     void* specflags=(void*)&regs.spcflags;
6597     blockinfo* bi=NULL;
6598     blockinfo* bi2;
6599     int extra_len=0;
6600    
6601     redo_current_block=0;
6602     if (current_compile_p>=max_compile_start)
6603     flush_icache_hard(7);
6604    
6605     alloc_blockinfos();
6606    
6607     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6608     bi2=get_blockinfo(cl);
6609    
6610     optlev=bi->optlevel;
6611     if (bi->status!=BI_INVALID) {
6612     Dif (bi!=bi2) {
6613     /* I don't think it can happen anymore. Shouldn't, in
6614     any case. So let's make sure... */
6615     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6616     bi->count,bi->optlevel,bi->handler_to_use,
6617     cache_tags[cl].handler);
6618     abort();
6619     }
6620    
6621     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6622     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6623     /* What the heck? We are not supposed to be here! */
6624     abort();
6625     }
6626     }
6627     if (bi->count==-1) {
6628     optlev++;
6629     while (!optcount[optlev])
6630     optlev++;
6631     bi->count=optcount[optlev]-1;
6632     }
6633 gbeauche 1.24 current_block_pc_p=(uintptr)pc_hist[0].location;
6634 gbeauche 1.1
6635     remove_deps(bi); /* We are about to create new code */
6636     bi->optlevel=optlev;
6637     bi->pc_p=(uae_u8*)pc_hist[0].location;
6638 gbeauche 1.8 #if USE_CHECKSUM_INFO
6639     free_checksum_info_chain(bi->csi);
6640     bi->csi = NULL;
6641     #endif
6642 gbeauche 1.1
6643     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6644     i=blocklen;
6645     while (i--) {
6646     uae_u16* currpcp=pc_hist[i].location;
6647     uae_u32 op=DO_GET_OPCODE(currpcp);
6648    
6649 gbeauche 1.8 #if USE_CHECKSUM_INFO
6650     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6651     #if USE_INLINING
6652     if (is_const_jump(op)) {
6653     checksum_info *csi = alloc_checksum_info();
6654     csi->start_p = (uae_u8 *)min_pcp;
6655     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6656     csi->next = bi->csi;
6657     bi->csi = csi;
6658 gbeauche 1.24 max_pcp = (uintptr)currpcp;
6659 gbeauche 1.8 }
6660     #endif
6661 gbeauche 1.24 min_pcp = (uintptr)currpcp;
6662 gbeauche 1.8 #else
6663 gbeauche 1.24 if ((uintptr)currpcp<min_pcp)
6664     min_pcp=(uintptr)currpcp;
6665     if ((uintptr)currpcp>max_pcp)
6666     max_pcp=(uintptr)currpcp;
6667 gbeauche 1.8 #endif
6668 gbeauche 1.1
6669     liveflags[i]=((liveflags[i+1]&
6670     (~prop[op].set_flags))|
6671     prop[op].use_flags);
6672     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6673     liveflags[i]&= ~FLAG_Z;
6674     }
6675    
6676 gbeauche 1.8 #if USE_CHECKSUM_INFO
6677     checksum_info *csi = alloc_checksum_info();
6678     csi->start_p = (uae_u8 *)min_pcp;
6679     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6680     csi->next = bi->csi;
6681     bi->csi = csi;
6682     #endif
6683    
6684 gbeauche 1.1 bi->needed_flags=liveflags[0];
6685    
6686 gbeauche 1.5 align_target(align_loops);
6687 gbeauche 1.1 was_comp=0;
6688    
6689     bi->direct_handler=(cpuop_func *)get_target();
6690     set_dhtu(bi,bi->direct_handler);
6691     bi->status=BI_COMPILING;
6692 gbeauche 1.24 current_block_start_target=(uintptr)get_target();
6693 gbeauche 1.1
6694     log_startblock();
6695    
6696     if (bi->count>=0) { /* Need to generate countdown code */
6697 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6698     raw_sub_l_mi((uintptr)&(bi->count),1);
6699     raw_jl((uintptr)popall_recompile_block);
6700 gbeauche 1.1 }
6701     if (optlev==0) { /* No need to actually translate */
6702     /* Execute normally without keeping stats */
6703 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6704     raw_jmp((uintptr)popall_exec_nostats);
6705 gbeauche 1.1 }
6706     else {
6707     reg_alloc_run=0;
6708     next_pc_p=0;
6709     taken_pc_p=0;
6710     branch_cc=0;
6711    
6712     comp_pc_p=(uae_u8*)pc_hist[0].location;
6713     init_comp();
6714     was_comp=1;
6715    
6716     #if JIT_DEBUG
6717     if (JITDebug) {
6718 gbeauche 1.24 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6719     raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6720 gbeauche 1.1 }
6721     #endif
6722    
6723     for (i=0;i<blocklen &&
6724     get_target_noopt()<max_compile_start;i++) {
6725     cpuop_func **cputbl;
6726     compop_func **comptbl;
6727     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6728     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6729     if (!needed_flags) {
6730     cputbl=nfcpufunctbl;
6731     comptbl=nfcompfunctbl;
6732     }
6733     else {
6734     cputbl=cpufunctbl;
6735     comptbl=compfunctbl;
6736     }
6737    
6738     failure = 1; // gb-- defaults to failure state
6739     if (comptbl[opcode] && optlev>1) {
6740     failure=0;
6741     if (!was_comp) {
6742     comp_pc_p=(uae_u8*)pc_hist[i].location;
6743     init_comp();
6744     }
6745 gbeauche 1.18 was_comp=1;
6746 gbeauche 1.1
6747     comptbl[opcode](opcode);
6748     freescratch();
6749     if (!(liveflags[i+1] & FLAG_CZNV)) {
6750     /* We can forget about flags */
6751     dont_care_flags();
6752     }
6753     #if INDIVIDUAL_INST
6754     flush(1);
6755     nop();
6756     flush(1);
6757     was_comp=0;
6758     #endif
6759     }
6760    
6761     if (failure) {
6762     if (was_comp) {
6763     flush(1);
6764     was_comp=0;
6765     }
6766     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6767     #if USE_NORMAL_CALLING_CONVENTION
6768     raw_push_l_r(REG_PAR1);
6769     #endif
6770 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,
6771     (uintptr)pc_hist[i].location);
6772     raw_call((uintptr)cputbl[opcode]);
6773 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6774     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6775 gbeauche 1.24 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6776 gbeauche 1.9 #endif
6777 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6778     raw_inc_sp(4);
6779     #endif
6780    
6781     if (i < blocklen - 1) {
6782     uae_s8* branchadd;
6783    
6784 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)specflags);
6785 gbeauche 1.1 raw_test_l_rr(0,0);
6786     raw_jz_b_oponly();
6787     branchadd=(uae_s8 *)get_target();
6788     emit_byte(0);
6789 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6790     *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6791 gbeauche 1.1 }
6792     }
6793     }
6794     #if 1 /* This isn't completely kosher yet; It really needs to be
6795     be integrated into a general inter-block-dependency scheme */
6796     if (next_pc_p && taken_pc_p &&
6797     was_comp && taken_pc_p==current_block_pc_p) {
6798     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6799     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6800     uae_u8 x=bi1->needed_flags;
6801    
6802     if (x==0xff || 1) { /* To be on the safe side */
6803     uae_u16* next=(uae_u16*)next_pc_p;
6804     uae_u32 op=DO_GET_OPCODE(next);
6805    
6806     x=0x1f;
6807     x&=(~prop[op].set_flags);
6808     x|=prop[op].use_flags;
6809     }
6810    
6811     x|=bi2->needed_flags;
6812     if (!(x & FLAG_CZNV)) {
6813     /* We can forget about flags */
6814     dont_care_flags();
6815     extra_len+=2; /* The next instruction now is part of this
6816     block */
6817     }
6818    
6819     }
6820     #endif
6821     log_flush();
6822    
6823     if (next_pc_p) { /* A branch was registered */
6824 gbeauche 1.24 uintptr t1=next_pc_p;
6825     uintptr t2=taken_pc_p;
6826 gbeauche 1.1 int cc=branch_cc;
6827    
6828     uae_u32* branchadd;
6829     uae_u32* tba;
6830     bigstate tmp;
6831     blockinfo* tbi;
6832    
6833     if (taken_pc_p<next_pc_p) {
6834     /* backward branch. Optimize for the "taken" case ---
6835     which means the raw_jcc should fall through when
6836     the 68k branch is taken. */
6837     t1=taken_pc_p;
6838     t2=next_pc_p;
6839     cc=branch_cc^1;
6840     }
6841    
6842     tmp=live; /* ouch! This is big... */
6843     raw_jcc_l_oponly(cc);
6844     branchadd=(uae_u32*)get_target();
6845     emit_long(0);
6846    
6847     /* predicted outcome */
6848     tbi=get_blockinfo_addr_new((void*)t1,1);
6849     match_states(tbi);
6850 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6851 gbeauche 1.1 raw_jcc_l_oponly(4);
6852     tba=(uae_u32*)get_target();
6853 gbeauche 1.24 emit_long(get_handler(t1)-((uintptr)tba+4));
6854     raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6855 gbeauche 1.28 flush_reg_count();
6856 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6857 gbeauche 1.1 create_jmpdep(bi,0,tba,t1);
6858    
6859 gbeauche 1.5 align_target(align_jumps);
6860 gbeauche 1.1 /* not-predicted outcome */
6861 gbeauche 1.24 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6862 gbeauche 1.1 live=tmp; /* Ouch again */
6863     tbi=get_blockinfo_addr_new((void*)t2,1);
6864     match_states(tbi);
6865    
6866     //flush(1); /* Can only get here if was_comp==1 */
6867 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6868 gbeauche 1.1 raw_jcc_l_oponly(4);
6869     tba=(uae_u32*)get_target();
6870 gbeauche 1.24 emit_long(get_handler(t2)-((uintptr)tba+4));
6871     raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6872 gbeauche 1.28 flush_reg_count();
6873 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6874 gbeauche 1.1 create_jmpdep(bi,1,tba,t2);
6875     }
6876     else
6877     {
6878     if (was_comp) {
6879     flush(1);
6880     }
6881 gbeauche 1.28 flush_reg_count();
6882 gbeauche 1.1
6883     /* Let's find out where next_handler is... */
6884     if (was_comp && isinreg(PC_P)) {
6885     r=live.state[PC_P].realreg;
6886     raw_and_l_ri(r,TAGMASK);
6887     int r2 = (r==0) ? 1 : 0;
6888 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6889     raw_cmp_l_mi((uintptr)specflags,0);
6890 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6891 gbeauche 1.1 raw_jmp_r(r2);
6892     }
6893     else if (was_comp && isconst(PC_P)) {
6894     uae_u32 v=live.state[PC_P].val;
6895     uae_u32* tba;
6896     blockinfo* tbi;
6897    
6898 gbeauche 1.24 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6899 gbeauche 1.1 match_states(tbi);
6900    
6901 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6902 gbeauche 1.1 raw_jcc_l_oponly(4);
6903     tba=(uae_u32*)get_target();
6904 gbeauche 1.24 emit_long(get_handler(v)-((uintptr)tba+4));
6905     raw_mov_l_mi((uintptr)&regs.pc_p,v);
6906     raw_jmp((uintptr)popall_do_nothing);
6907 gbeauche 1.1 create_jmpdep(bi,0,tba,v);
6908     }
6909     else {
6910     r=REG_PC_TMP;
6911 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6912 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6913     int r2 = (r==0) ? 1 : 0;
6914 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6915     raw_cmp_l_mi((uintptr)specflags,0);
6916 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6917 gbeauche 1.1 raw_jmp_r(r2);
6918     }
6919     }
6920     }
6921    
6922     #if USE_MATCH
6923     if (callers_need_recompile(&live,&(bi->env))) {
6924     mark_callers_recompile(bi);
6925     }
6926    
6927     big_to_small_state(&live,&(bi->env));
6928     #endif
6929    
6930 gbeauche 1.8 #if USE_CHECKSUM_INFO
6931     remove_from_list(bi);
6932     if (trace_in_rom) {
6933     // No need to checksum that block trace on cache invalidation
6934     free_checksum_info_chain(bi->csi);
6935     bi->csi = NULL;
6936     add_to_dormant(bi);
6937     }
6938     else {
6939     calc_checksum(bi,&(bi->c1),&(bi->c2));
6940     add_to_active(bi);
6941     }
6942     #else
6943 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
6944     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6945     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6946     else
6947     max_pcp+=LONGEST_68K_INST;
6948 gbeauche 1.7
6949 gbeauche 1.1 bi->len=max_pcp-min_pcp;
6950     bi->min_pcp=min_pcp;
6951 gbeauche 1.7
6952 gbeauche 1.1 remove_from_list(bi);
6953     if (isinrom(min_pcp) && isinrom(max_pcp)) {
6954     add_to_dormant(bi); /* No need to checksum it on cache flush.
6955     Please don't start changing ROMs in
6956     flight! */
6957     }
6958     else {
6959     calc_checksum(bi,&(bi->c1),&(bi->c2));
6960     add_to_active(bi);
6961     }
6962 gbeauche 1.8 #endif
6963 gbeauche 1.1
6964     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6965    
6966     #if JIT_DEBUG
6967     if (JITDebug)
6968     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6969    
6970     if (JITDebug && disasm_block) {
6971     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6972     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6973     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6974     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6975     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6976     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6977     getchar();
6978     }
6979     #endif
6980    
6981     log_dump();
6982 gbeauche 1.5 align_target(align_jumps);
6983 gbeauche 1.1
6984     /* This is the non-direct handler */
6985     bi->handler=
6986     bi->handler_to_use=(cpuop_func *)get_target();
6987 gbeauche 1.24 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6988     raw_jnz((uintptr)popall_cache_miss);
6989 gbeauche 1.1 comp_pc_p=(uae_u8*)pc_hist[0].location;
6990    
6991     bi->status=BI_FINALIZING;
6992     init_comp();
6993     match_states(bi);
6994     flush(1);
6995    
6996 gbeauche 1.24 raw_jmp((uintptr)bi->direct_handler);
6997 gbeauche 1.1
6998     current_compile_p=get_target();
6999     raise_in_cl_list(bi);
7000    
7001     /* We will flush soon, anyway, so let's do it now */
7002     if (current_compile_p>=max_compile_start)
7003     flush_icache_hard(7);
7004    
7005     bi->status=BI_ACTIVE;
7006     if (redo_current_block)
7007     block_need_recompile(bi);
7008    
7009     #if PROFILE_COMPILE_TIME
7010     compile_time += (clock() - start_time);
7011     #endif
7012     }
7013     }
7014    
7015     void do_nothing(void)
7016     {
7017     /* What did you expect this to do? */
7018     }
7019    
7020     void exec_nostats(void)
7021     {
7022     for (;;) {
7023     uae_u32 opcode = GET_OPCODE;
7024     (*cpufunctbl[opcode])(opcode);
7025     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7026     return; /* We will deal with the spcflags in the caller */
7027     }
7028     }
7029     }
7030    
7031     void execute_normal(void)
7032     {
7033     if (!check_for_cache_miss()) {
7034     cpu_history pc_hist[MAXRUN];
7035     int blocklen = 0;
7036     #if REAL_ADDRESSING || DIRECT_ADDRESSING
7037     start_pc_p = regs.pc_p;
7038     start_pc = get_virtual_address(regs.pc_p);
7039     #else
7040     start_pc_p = regs.pc_oldp;
7041     start_pc = regs.pc;
7042     #endif
7043     for (;;) { /* Take note: This is the do-it-normal loop */
7044     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7045     uae_u32 opcode = GET_OPCODE;
7046     #if FLIGHT_RECORDER
7047     m68k_record_step(m68k_getpc());
7048     #endif
7049     (*cpufunctbl[opcode])(opcode);
7050     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7051     compile_block(pc_hist, blocklen);
7052     return; /* We will deal with the spcflags in the caller */
7053     }
7054     /* No need to check regs.spcflags, because if they were set,
7055     we'd have ended up inside that "if" */
7056     }
7057     }
7058     }
7059    
7060     typedef void (*compiled_handler)(void);
7061    
7062 gbeauche 1.24 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
7063 gbeauche 1.6 void (*m68k_compile_execute)(void) = NULL;
7064     #else
7065 gbeauche 1.1 void m68k_do_compile_execute(void)
7066     {
7067     for (;;) {
7068     ((compiled_handler)(pushall_call_handler))();
7069     /* Whenever we return from that, we should check spcflags */
7070     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7071     if (m68k_do_specialties ())
7072     return;
7073     }
7074     }
7075     }
7076 gbeauche 1.6 #endif