ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.37
Committed: 2006-02-26T18:58:18Z (18 years, 4 months ago) by gbeauche
Branch: MAIN
Changes since 1.36: +2 -1 lines
Log Message:
prefer lower indexes in register allocation, this avoids REX prefixes on
x86_64 when %r8 - %r15 are used (very light speedup expected)

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.29 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.11 * Gwenole Beauchesne
8     *
9 gbeauche 1.29 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.11 *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.24 /* NOTE: support for AMD64 assumes translation cache and other code
35     * buffers are allocated into a 32-bit address space because (i) B2/JIT
36     * code is not 64-bit clean and (ii) it's faster to resolve branches
37     * that way.
38     */
39     #if !defined(__i386__) && !defined(__x86_64__)
40     #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41     #endif
42    
43 gbeauche 1.1 #define USE_MATCH 0
44    
45     /* kludge for Brian, so he can compile under MSVC++ */
46     #define USE_NORMAL_CALLING_CONVENTION 0
47    
48     #ifndef WIN32
49 gbeauche 1.20 #include <unistd.h>
50 gbeauche 1.1 #include <sys/types.h>
51     #include <sys/mman.h>
52     #endif
53    
54     #include <stdlib.h>
55     #include <fcntl.h>
56     #include <errno.h>
57    
58     #include "sysdeps.h"
59     #include "cpu_emulation.h"
60     #include "main.h"
61     #include "prefs.h"
62     #include "user_strings.h"
63 gbeauche 1.2 #include "vm_alloc.h"
64 gbeauche 1.1
65     #include "m68k.h"
66     #include "memory.h"
67     #include "readcpu.h"
68     #include "newcpu.h"
69     #include "comptbl.h"
70     #include "compiler/compemu.h"
71     #include "fpu/fpu.h"
72     #include "fpu/flags.h"
73    
74     #define DEBUG 1
75     #include "debug.h"
76    
77     #ifdef ENABLE_MON
78     #include "mon.h"
79     #endif
80    
81     #ifndef WIN32
82 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
83     #define PROFILE_UNTRANSLATED_INSNS 1
84 gbeauche 1.1 #endif
85    
86 gbeauche 1.28 #if defined(__x86_64__) && 0
87     #define RECORD_REGISTER_USAGE 1
88     #endif
89    
90 gbeauche 1.1 #ifdef WIN32
91     #undef write_log
92     #define write_log dummy_write_log
93     static void dummy_write_log(const char *, ...) { }
94     #endif
95    
96     #if JIT_DEBUG
97     #undef abort
98     #define abort() do { \
99     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100     exit(EXIT_FAILURE); \
101     } while (0)
102     #endif
103    
104 gbeauche 1.28 #if RECORD_REGISTER_USAGE
105     static uint64 reg_count[16];
106     static int reg_count_local[16];
107    
108     static int reg_count_compare(const void *ap, const void *bp)
109     {
110     const int a = *((int *)ap);
111     const int b = *((int *)bp);
112     return reg_count[b] - reg_count[a];
113     }
114     #endif
115    
116 gbeauche 1.1 #if PROFILE_COMPILE_TIME
117     #include <time.h>
118     static uae_u32 compile_count = 0;
119     static clock_t compile_time = 0;
120     static clock_t emul_start_time = 0;
121     static clock_t emul_end_time = 0;
122     #endif
123    
124 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
125     const int untranslated_top_ten = 20;
126     static uae_u32 raw_cputbl_count[65536] = { 0, };
127     static uae_u16 opcode_nums[65536];
128    
129     static int untranslated_compfn(const void *e1, const void *e2)
130     {
131     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132     }
133     #endif
134    
135 gbeauche 1.24 static compop_func *compfunctbl[65536];
136     static compop_func *nfcompfunctbl[65536];
137     static cpuop_func *nfcpufunctbl[65536];
138 gbeauche 1.1 uae_u8* comp_pc_p;
139    
140 gbeauche 1.26 // From main_unix.cpp
141     extern bool ThirtyThreeBitAddressing;
142    
143 gbeauche 1.6 // From newcpu.cpp
144     extern bool quit_program;
145    
146 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
147     #if JIT_DEBUG
148     static bool JITDebug = false; // Enable runtime disassemblers through mon?
149     #else
150     const bool JITDebug = false; // Don't use JIT debug mode at all
151     #endif
152 gbeauche 1.33 #if USE_INLINING
153     static bool follow_const_jumps = true; // Flag: translation through constant jumps
154     #else
155     const bool follow_const_jumps = false;
156     #endif
157 gbeauche 1.1
158 gbeauche 1.22 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
159 gbeauche 1.1 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
160 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
161 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
162     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
163     static bool have_cmov = false; // target has CMOV instructions ?
164 gbeauche 1.30 static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
165 gbeauche 1.1 static bool have_rat_stall = true; // target has partial register stalls ?
166 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
167     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
168 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
169 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
170     static int align_jumps = 32; // Align the start of jumps
171 gbeauche 1.1 static int optcount[10] = {
172     10, // How often a block has to be executed before it is translated
173     0, // How often to use naive translation
174     0, 0, 0, 0,
175     -1, -1, -1, -1
176     };
177    
178     struct op_properties {
179     uae_u8 use_flags;
180     uae_u8 set_flags;
181     uae_u8 is_addx;
182     uae_u8 cflow;
183     };
184     static op_properties prop[65536];
185    
186     static inline int end_block(uae_u32 opcode)
187     {
188     return (prop[opcode].cflow & fl_end_block);
189     }
190    
191 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
192     {
193     return (prop[opcode].cflow == fl_const_jump);
194     }
195    
196 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
197     {
198     return (prop[opcode].cflow & fl_trap);
199     }
200    
201     static inline unsigned int cft_map (unsigned int f)
202     {
203     #ifndef HAVE_GET_WORD_UNSWAPPED
204     return f;
205     #else
206     return ((f >> 8) & 255) | ((f & 255) << 8);
207     #endif
208     }
209    
210 gbeauche 1.1 uae_u8* start_pc_p;
211     uae_u32 start_pc;
212     uae_u32 current_block_pc_p;
213 gbeauche 1.24 static uintptr current_block_start_target;
214 gbeauche 1.1 uae_u32 needed_flags;
215 gbeauche 1.24 static uintptr next_pc_p;
216     static uintptr taken_pc_p;
217 gbeauche 1.1 static int branch_cc;
218     static int redo_current_block;
219    
220     int segvcount=0;
221     int soft_flush_count=0;
222     int hard_flush_count=0;
223     int checksum_count=0;
224     static uae_u8* current_compile_p=NULL;
225     static uae_u8* max_compile_start;
226     static uae_u8* compiled_code=NULL;
227     static uae_s32 reg_alloc_run;
228 gbeauche 1.24 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
229     static uae_u8* popallspace=NULL;
230 gbeauche 1.1
231     void* pushall_call_handler=NULL;
232     static void* popall_do_nothing=NULL;
233     static void* popall_exec_nostats=NULL;
234     static void* popall_execute_normal=NULL;
235     static void* popall_cache_miss=NULL;
236     static void* popall_recompile_block=NULL;
237     static void* popall_check_checksum=NULL;
238    
239     /* The 68k only ever executes from even addresses. So right now, we
240     * waste half the entries in this array
241     * UPDATE: We now use those entries to store the start of the linked
242     * lists that we maintain for each hash result.
243     */
244     cacheline cache_tags[TAGSIZE];
245     int letit=0;
246     blockinfo* hold_bi[MAX_HOLD_BI];
247     blockinfo* active;
248     blockinfo* dormant;
249    
250     /* 68040 */
251     extern struct cputbl op_smalltbl_0_nf[];
252     extern struct comptbl op_smalltbl_0_comp_nf[];
253     extern struct comptbl op_smalltbl_0_comp_ff[];
254    
255     /* 68020 + 68881 */
256     extern struct cputbl op_smalltbl_1_nf[];
257    
258     /* 68020 */
259     extern struct cputbl op_smalltbl_2_nf[];
260    
261     /* 68010 */
262     extern struct cputbl op_smalltbl_3_nf[];
263    
264     /* 68000 */
265     extern struct cputbl op_smalltbl_4_nf[];
266    
267     /* 68000 slow but compatible. */
268     extern struct cputbl op_smalltbl_5_nf[];
269    
270     static void flush_icache_hard(int n);
271     static void flush_icache_lazy(int n);
272     static void flush_icache_none(int n);
273     void (*flush_icache)(int n) = flush_icache_none;
274    
275    
276    
277     bigstate live;
278     smallstate empty_ss;
279     smallstate default_ss;
280     static int optlev;
281    
282     static int writereg(int r, int size);
283     static void unlock2(int r);
284     static void setlock(int r);
285     static int readreg_specific(int r, int size, int spec);
286     static int writereg_specific(int r, int size, int spec);
287     static void prepare_for_call_1(void);
288     static void prepare_for_call_2(void);
289     static void align_target(uae_u32 a);
290    
291     static uae_s32 nextused[VREGS];
292    
293     uae_u32 m68k_pc_offset;
294    
295     /* Some arithmetic ooperations can be optimized away if the operands
296     * are known to be constant. But that's only a good idea when the
297     * side effects they would have on the flags are not important. This
298     * variable indicates whether we need the side effects or not
299     */
300     uae_u32 needflags=0;
301    
302     /* Flag handling is complicated.
303     *
304     * x86 instructions create flags, which quite often are exactly what we
305     * want. So at times, the "68k" flags are actually in the x86 flags.
306     *
307     * Then again, sometimes we do x86 instructions that clobber the x86
308     * flags, but don't represent a corresponding m68k instruction. In that
309     * case, we have to save them.
310     *
311     * We used to save them to the stack, but now store them back directly
312     * into the regflags.cznv of the traditional emulation. Thus some odd
313     * names.
314     *
315     * So flags can be in either of two places (used to be three; boy were
316     * things complicated back then!); And either place can contain either
317     * valid flags or invalid trash (and on the stack, there was also the
318     * option of "nothing at all", now gone). A couple of variables keep
319     * track of the respective states.
320     *
321     * To make things worse, we might or might not be interested in the flags.
322     * by default, we are, but a call to dont_care_flags can change that
323     * until the next call to live_flags. If we are not, pretty much whatever
324     * is in the register and/or the native flags is seen as valid.
325     */
326    
327     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
328     {
329     return cache_tags[cl+1].bi;
330     }
331    
332     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
333     {
334     blockinfo* bi=get_blockinfo(cacheline(addr));
335    
336     while (bi) {
337     if (bi->pc_p==addr)
338     return bi;
339     bi=bi->next_same_cl;
340     }
341     return NULL;
342     }
343    
344    
345     /*******************************************************************
346     * All sorts of list related functions for all of the lists *
347     *******************************************************************/
348    
349     static __inline__ void remove_from_cl_list(blockinfo* bi)
350     {
351     uae_u32 cl=cacheline(bi->pc_p);
352    
353     if (bi->prev_same_cl_p)
354     *(bi->prev_same_cl_p)=bi->next_same_cl;
355     if (bi->next_same_cl)
356     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
357     if (cache_tags[cl+1].bi)
358     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
359     else
360     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
361     }
362    
363     static __inline__ void remove_from_list(blockinfo* bi)
364     {
365     if (bi->prev_p)
366     *(bi->prev_p)=bi->next;
367     if (bi->next)
368     bi->next->prev_p=bi->prev_p;
369     }
370    
371     static __inline__ void remove_from_lists(blockinfo* bi)
372     {
373     remove_from_list(bi);
374     remove_from_cl_list(bi);
375     }
376    
377     static __inline__ void add_to_cl_list(blockinfo* bi)
378     {
379     uae_u32 cl=cacheline(bi->pc_p);
380    
381     if (cache_tags[cl+1].bi)
382     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
383     bi->next_same_cl=cache_tags[cl+1].bi;
384    
385     cache_tags[cl+1].bi=bi;
386     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
387    
388     cache_tags[cl].handler=bi->handler_to_use;
389     }
390    
391     static __inline__ void raise_in_cl_list(blockinfo* bi)
392     {
393     remove_from_cl_list(bi);
394     add_to_cl_list(bi);
395     }
396    
397     static __inline__ void add_to_active(blockinfo* bi)
398     {
399     if (active)
400     active->prev_p=&(bi->next);
401     bi->next=active;
402    
403     active=bi;
404     bi->prev_p=&active;
405     }
406    
407     static __inline__ void add_to_dormant(blockinfo* bi)
408     {
409     if (dormant)
410     dormant->prev_p=&(bi->next);
411     bi->next=dormant;
412    
413     dormant=bi;
414     bi->prev_p=&dormant;
415     }
416    
417     static __inline__ void remove_dep(dependency* d)
418     {
419     if (d->prev_p)
420     *(d->prev_p)=d->next;
421     if (d->next)
422     d->next->prev_p=d->prev_p;
423     d->prev_p=NULL;
424     d->next=NULL;
425     }
426    
427     /* This block's code is about to be thrown away, so it no longer
428     depends on anything else */
429     static __inline__ void remove_deps(blockinfo* bi)
430     {
431     remove_dep(&(bi->dep[0]));
432     remove_dep(&(bi->dep[1]));
433     }
434    
435     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
436     {
437     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
438     }
439    
440     /********************************************************************
441     * Soft flush handling support functions *
442     ********************************************************************/
443    
444     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
445     {
446     //write_log("bi is %p\n",bi);
447     if (dh!=bi->direct_handler_to_use) {
448     dependency* x=bi->deplist;
449     //write_log("bi->deplist=%p\n",bi->deplist);
450     while (x) {
451     //write_log("x is %p\n",x);
452     //write_log("x->next is %p\n",x->next);
453     //write_log("x->prev_p is %p\n",x->prev_p);
454    
455     if (x->jmp_off) {
456     adjust_jmpdep(x,dh);
457     }
458     x=x->next;
459     }
460     bi->direct_handler_to_use=dh;
461     }
462     }
463    
464     static __inline__ void invalidate_block(blockinfo* bi)
465     {
466     int i;
467    
468     bi->optlevel=0;
469     bi->count=optcount[0]-1;
470     bi->handler=NULL;
471     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
472     bi->direct_handler=NULL;
473     set_dhtu(bi,bi->direct_pen);
474     bi->needed_flags=0xff;
475     bi->status=BI_INVALID;
476     for (i=0;i<2;i++) {
477     bi->dep[i].jmp_off=NULL;
478     bi->dep[i].target=NULL;
479     }
480     remove_deps(bi);
481     }
482    
483     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
484     {
485 gbeauche 1.24 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
486 gbeauche 1.1
487     Dif(!tbi) {
488     write_log("Could not create jmpdep!\n");
489     abort();
490     }
491     bi->dep[i].jmp_off=jmpaddr;
492     bi->dep[i].source=bi;
493     bi->dep[i].target=tbi;
494     bi->dep[i].next=tbi->deplist;
495     if (bi->dep[i].next)
496     bi->dep[i].next->prev_p=&(bi->dep[i].next);
497     bi->dep[i].prev_p=&(tbi->deplist);
498     tbi->deplist=&(bi->dep[i]);
499     }
500    
501     static __inline__ void block_need_recompile(blockinfo * bi)
502     {
503     uae_u32 cl = cacheline(bi->pc_p);
504    
505     set_dhtu(bi, bi->direct_pen);
506     bi->direct_handler = bi->direct_pen;
507    
508     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
509     bi->handler = (cpuop_func *)popall_execute_normal;
510     if (bi == cache_tags[cl + 1].bi)
511     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
512     bi->status = BI_NEED_RECOMP;
513     }
514    
515     static __inline__ void mark_callers_recompile(blockinfo * bi)
516     {
517     dependency *x = bi->deplist;
518    
519     while (x) {
520     dependency *next = x->next; /* This disappears when we mark for
521     * recompilation and thus remove the
522     * blocks from the lists */
523     if (x->jmp_off) {
524     blockinfo *cbi = x->source;
525    
526     Dif(cbi->status == BI_INVALID) {
527     // write_log("invalid block in dependency list\n"); // FIXME?
528     // abort();
529     }
530     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
531     block_need_recompile(cbi);
532     mark_callers_recompile(cbi);
533     }
534     else if (cbi->status == BI_COMPILING) {
535     redo_current_block = 1;
536     }
537     else if (cbi->status == BI_NEED_RECOMP) {
538     /* nothing */
539     }
540     else {
541     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
542     }
543     }
544     x = next;
545     }
546     }
547    
548     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
549     {
550     blockinfo* bi=get_blockinfo_addr(addr);
551     int i;
552    
553     if (!bi) {
554     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
555     if (hold_bi[i]) {
556     uae_u32 cl=cacheline(addr);
557    
558     bi=hold_bi[i];
559     hold_bi[i]=NULL;
560     bi->pc_p=(uae_u8 *)addr;
561     invalidate_block(bi);
562     add_to_active(bi);
563     add_to_cl_list(bi);
564    
565     }
566     }
567     }
568     if (!bi) {
569     write_log("Looking for blockinfo, can't find free one\n");
570     abort();
571     }
572     return bi;
573     }
574    
575     static void prepare_block(blockinfo* bi);
576    
577     /* Managment of blockinfos.
578    
579     A blockinfo struct is allocated whenever a new block has to be
580     compiled. If the list of free blockinfos is empty, we allocate a new
581     pool of blockinfos and link the newly created blockinfos altogether
582     into the list of free blockinfos. Otherwise, we simply pop a structure
583 gbeauche 1.7 off the free list.
584 gbeauche 1.1
585     Blockinfo are lazily deallocated, i.e. chained altogether in the
586     list of free blockinfos whenvever a translation cache flush (hard or
587     soft) request occurs.
588     */
589    
590 gbeauche 1.7 template< class T >
591     class LazyBlockAllocator
592     {
593     enum {
594     kPoolSize = 1 + 4096 / sizeof(T)
595     };
596     struct Pool {
597     T chunk[kPoolSize];
598     Pool * next;
599     };
600     Pool * mPools;
601     T * mChunks;
602     public:
603     LazyBlockAllocator() : mPools(0), mChunks(0) { }
604     ~LazyBlockAllocator();
605     T * acquire();
606     void release(T * const);
607 gbeauche 1.1 };
608    
609 gbeauche 1.7 template< class T >
610     LazyBlockAllocator<T>::~LazyBlockAllocator()
611 gbeauche 1.1 {
612 gbeauche 1.7 Pool * currentPool = mPools;
613     while (currentPool) {
614     Pool * deadPool = currentPool;
615     currentPool = currentPool->next;
616     free(deadPool);
617     }
618     }
619    
620     template< class T >
621     T * LazyBlockAllocator<T>::acquire()
622     {
623     if (!mChunks) {
624     // There is no chunk left, allocate a new pool and link the
625     // chunks into the free list
626     Pool * newPool = (Pool *)malloc(sizeof(Pool));
627     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
628     chunk->next = mChunks;
629     mChunks = chunk;
630 gbeauche 1.1 }
631 gbeauche 1.7 newPool->next = mPools;
632     mPools = newPool;
633     }
634     T * chunk = mChunks;
635     mChunks = chunk->next;
636     return chunk;
637     }
638    
639     template< class T >
640     void LazyBlockAllocator<T>::release(T * const chunk)
641     {
642     chunk->next = mChunks;
643     mChunks = chunk;
644     }
645    
646     template< class T >
647     class HardBlockAllocator
648     {
649     public:
650     T * acquire() {
651     T * data = (T *)current_compile_p;
652     current_compile_p += sizeof(T);
653     return data;
654 gbeauche 1.1 }
655 gbeauche 1.7
656     void release(T * const chunk) {
657     // Deallocated on invalidation
658     }
659     };
660    
661     #if USE_SEPARATE_BIA
662     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
663     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
664 gbeauche 1.1 #else
665 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
666     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
667 gbeauche 1.1 #endif
668    
669 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
670     {
671     checksum_info *csi = ChecksumInfoAllocator.acquire();
672     csi->next = NULL;
673     return csi;
674     }
675    
676     static __inline__ void free_checksum_info(checksum_info *csi)
677     {
678     csi->next = NULL;
679     ChecksumInfoAllocator.release(csi);
680     }
681    
682     static __inline__ void free_checksum_info_chain(checksum_info *csi)
683     {
684     while (csi != NULL) {
685     checksum_info *csi2 = csi->next;
686     free_checksum_info(csi);
687     csi = csi2;
688     }
689     }
690 gbeauche 1.7
691     static __inline__ blockinfo *alloc_blockinfo(void)
692 gbeauche 1.1 {
693 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
694     #if USE_CHECKSUM_INFO
695     bi->csi = NULL;
696 gbeauche 1.1 #endif
697 gbeauche 1.7 return bi;
698 gbeauche 1.1 }
699    
700 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
701 gbeauche 1.1 {
702 gbeauche 1.7 #if USE_CHECKSUM_INFO
703 gbeauche 1.8 free_checksum_info_chain(bi->csi);
704     bi->csi = NULL;
705 gbeauche 1.1 #endif
706 gbeauche 1.7 BlockInfoAllocator.release(bi);
707 gbeauche 1.1 }
708    
709     static __inline__ void alloc_blockinfos(void)
710     {
711     int i;
712     blockinfo* bi;
713    
714     for (i=0;i<MAX_HOLD_BI;i++) {
715     if (hold_bi[i])
716     return;
717     bi=hold_bi[i]=alloc_blockinfo();
718     prepare_block(bi);
719     }
720     }
721    
722     /********************************************************************
723     * Functions to emit data into memory, and other general support *
724     ********************************************************************/
725    
726     static uae_u8* target;
727    
728     static void emit_init(void)
729     {
730     }
731    
732     static __inline__ void emit_byte(uae_u8 x)
733     {
734     *target++=x;
735     }
736    
737     static __inline__ void emit_word(uae_u16 x)
738     {
739     *((uae_u16*)target)=x;
740     target+=2;
741     }
742    
743     static __inline__ void emit_long(uae_u32 x)
744     {
745     *((uae_u32*)target)=x;
746     target+=4;
747     }
748    
749 gbeauche 1.24 static __inline__ void emit_quad(uae_u64 x)
750     {
751     *((uae_u64*)target)=x;
752     target+=8;
753     }
754    
755 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
756     {
757     memcpy((uae_u8 *)target,block,blocklen);
758     target+=blocklen;
759     }
760    
761 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
762     {
763     #if 1
764     // gb-- We have specialized byteswapping functions, just use them
765     return do_byteswap_32(v);
766     #else
767     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
768     #endif
769     }
770    
771     /********************************************************************
772     * Getting the information about the target CPU *
773     ********************************************************************/
774    
775     #include "codegen_x86.cpp"
776    
777     void set_target(uae_u8* t)
778     {
779     target=t;
780     }
781    
782     static __inline__ uae_u8* get_target_noopt(void)
783     {
784     return target;
785     }
786    
787     __inline__ uae_u8* get_target(void)
788     {
789     return get_target_noopt();
790     }
791    
792    
793     /********************************************************************
794     * Flags status handling. EMIT TIME! *
795     ********************************************************************/
796    
797     static void bt_l_ri_noclobber(R4 r, IMM i);
798    
799     static void make_flags_live_internal(void)
800     {
801     if (live.flags_in_flags==VALID)
802     return;
803     Dif (live.flags_on_stack==TRASH) {
804     write_log("Want flags, got something on stack, but it is TRASH\n");
805     abort();
806     }
807     if (live.flags_on_stack==VALID) {
808     int tmp;
809     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
810     raw_reg_to_flags(tmp);
811     unlock2(tmp);
812    
813     live.flags_in_flags=VALID;
814     return;
815     }
816     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
817     live.flags_in_flags,live.flags_on_stack);
818     abort();
819     }
820    
821     static void flags_to_stack(void)
822     {
823     if (live.flags_on_stack==VALID)
824     return;
825     if (!live.flags_are_important) {
826     live.flags_on_stack=VALID;
827     return;
828     }
829     Dif (live.flags_in_flags!=VALID)
830     abort();
831     else {
832     int tmp;
833     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
834     raw_flags_to_reg(tmp);
835     unlock2(tmp);
836     }
837     live.flags_on_stack=VALID;
838     }
839    
840     static __inline__ void clobber_flags(void)
841     {
842     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
843     flags_to_stack();
844     live.flags_in_flags=TRASH;
845     }
846    
847     /* Prepare for leaving the compiled stuff */
848     static __inline__ void flush_flags(void)
849     {
850     flags_to_stack();
851     return;
852     }
853    
854     int touchcnt;
855    
856     /********************************************************************
857 gbeauche 1.18 * Partial register flushing for optimized calls *
858     ********************************************************************/
859    
860     struct regusage {
861     uae_u16 rmask;
862     uae_u16 wmask;
863     };
864    
865     static inline void ru_set(uae_u16 *mask, int reg)
866     {
867     #if USE_OPTIMIZED_CALLS
868     *mask |= 1 << reg;
869     #endif
870     }
871    
872     static inline bool ru_get(const uae_u16 *mask, int reg)
873     {
874     #if USE_OPTIMIZED_CALLS
875     return (*mask & (1 << reg));
876     #else
877     /* Default: instruction reads & write to register */
878     return true;
879     #endif
880     }
881    
882     static inline void ru_set_read(regusage *ru, int reg)
883     {
884     ru_set(&ru->rmask, reg);
885     }
886    
887     static inline void ru_set_write(regusage *ru, int reg)
888     {
889     ru_set(&ru->wmask, reg);
890     }
891    
892     static inline bool ru_read_p(const regusage *ru, int reg)
893     {
894     return ru_get(&ru->rmask, reg);
895     }
896    
897     static inline bool ru_write_p(const regusage *ru, int reg)
898     {
899     return ru_get(&ru->wmask, reg);
900     }
901    
902     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
903     wordsizes size, int write_mode)
904     {
905     switch (mode) {
906     case Areg:
907     reg += 8;
908     /* fall through */
909     case Dreg:
910     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
911     break;
912     case Ad16:
913     /* skip displacment */
914     m68k_pc_offset += 2;
915     case Aind:
916     case Aipi:
917     case Apdi:
918     ru_set_read(ru, reg+8);
919     break;
920     case Ad8r:
921     ru_set_read(ru, reg+8);
922     /* fall through */
923     case PC8r: {
924     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
925     reg = (dp >> 12) & 15;
926     ru_set_read(ru, reg);
927     if (dp & 0x100)
928     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
929     break;
930     }
931     case PC16:
932     case absw:
933     case imm0:
934     case imm1:
935     m68k_pc_offset += 2;
936     break;
937     case absl:
938     case imm2:
939     m68k_pc_offset += 4;
940     break;
941     case immi:
942     m68k_pc_offset += (size == sz_long) ? 4 : 2;
943     break;
944     }
945     }
946    
947     /* TODO: split into a static initialization part and a dynamic one
948     (instructions depending on extension words) */
949     static void ru_fill(regusage *ru, uae_u32 opcode)
950     {
951     m68k_pc_offset += 2;
952    
953     /* Default: no register is used or written to */
954     ru->rmask = 0;
955     ru->wmask = 0;
956    
957     uae_u32 real_opcode = cft_map(opcode);
958     struct instr *dp = &table68k[real_opcode];
959    
960     bool rw_dest = true;
961     bool handled = false;
962    
963     /* Handle some instructions specifically */
964     uae_u16 reg, ext;
965     switch (dp->mnemo) {
966     case i_BFCHG:
967     case i_BFCLR:
968     case i_BFEXTS:
969     case i_BFEXTU:
970     case i_BFFFO:
971     case i_BFINS:
972     case i_BFSET:
973     case i_BFTST:
974     ext = comp_get_iword((m68k_pc_offset+=2)-2);
975     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
976     if (ext & 0x020) ru_set_read(ru, ext & 7);
977     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
978     if (dp->dmode == Dreg)
979     ru_set_read(ru, dp->dreg);
980     switch (dp->mnemo) {
981     case i_BFEXTS:
982     case i_BFEXTU:
983     case i_BFFFO:
984     ru_set_write(ru, (ext >> 12) & 7);
985     break;
986     case i_BFINS:
987     ru_set_read(ru, (ext >> 12) & 7);
988     /* fall through */
989     case i_BFCHG:
990     case i_BFCLR:
991     case i_BSET:
992     if (dp->dmode == Dreg)
993     ru_set_write(ru, dp->dreg);
994     break;
995     }
996     handled = true;
997     rw_dest = false;
998     break;
999    
1000     case i_BTST:
1001     rw_dest = false;
1002     break;
1003    
1004     case i_CAS:
1005     {
1006     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1007     int Du = ext & 7;
1008     ru_set_read(ru, Du);
1009     int Dc = (ext >> 6) & 7;
1010     ru_set_read(ru, Dc);
1011     ru_set_write(ru, Dc);
1012     break;
1013     }
1014     case i_CAS2:
1015     {
1016     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1017     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1018     Rn1 = (ext >> 12) & 15;
1019     Du1 = (ext >> 6) & 7;
1020     Dc1 = ext & 7;
1021     ru_set_read(ru, Rn1);
1022     ru_set_read(ru, Du1);
1023     ru_set_read(ru, Dc1);
1024     ru_set_write(ru, Dc1);
1025     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1026     Rn2 = (ext >> 12) & 15;
1027     Du2 = (ext >> 6) & 7;
1028     Dc2 = ext & 7;
1029     ru_set_read(ru, Rn2);
1030     ru_set_read(ru, Du2);
1031     ru_set_write(ru, Dc2);
1032     break;
1033     }
1034     case i_DIVL: case i_MULL:
1035     m68k_pc_offset += 2;
1036     break;
1037     case i_LEA:
1038     case i_MOVE: case i_MOVEA: case i_MOVE16:
1039     rw_dest = false;
1040     break;
1041     case i_PACK: case i_UNPK:
1042     rw_dest = false;
1043     m68k_pc_offset += 2;
1044     break;
1045     case i_TRAPcc:
1046     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1047     break;
1048     case i_RTR:
1049     /* do nothing, just for coverage debugging */
1050     break;
1051     /* TODO: handle EXG instruction */
1052     }
1053    
1054     /* Handle A-Traps better */
1055     if ((real_opcode & 0xf000) == 0xa000) {
1056     handled = true;
1057     }
1058    
1059     /* Handle EmulOps better */
1060     if ((real_opcode & 0xff00) == 0x7100) {
1061     handled = true;
1062     ru->rmask = 0xffff;
1063     ru->wmask = 0;
1064     }
1065    
1066     if (dp->suse && !handled)
1067     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1068    
1069     if (dp->duse && !handled)
1070     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1071    
1072     if (rw_dest)
1073     ru->rmask |= ru->wmask;
1074    
1075     handled = handled || dp->suse || dp->duse;
1076    
1077     /* Mark all registers as used/written if the instruction may trap */
1078     if (may_trap(opcode)) {
1079     handled = true;
1080     ru->rmask = 0xffff;
1081     ru->wmask = 0xffff;
1082     }
1083    
1084     if (!handled) {
1085     write_log("ru_fill: %04x = { %04x, %04x }\n",
1086     real_opcode, ru->rmask, ru->wmask);
1087     abort();
1088     }
1089     }
1090    
1091     /********************************************************************
1092 gbeauche 1.1 * register allocation per block logging *
1093     ********************************************************************/
1094    
1095     static uae_s8 vstate[VREGS];
1096     static uae_s8 vwritten[VREGS];
1097     static uae_s8 nstate[N_REGS];
1098    
1099     #define L_UNKNOWN -127
1100     #define L_UNAVAIL -1
1101     #define L_NEEDED -2
1102     #define L_UNNEEDED -3
1103    
1104     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1105     {
1106     int i;
1107    
1108     for (i = 0; i < VREGS; i++)
1109     s->virt[i] = vstate[i];
1110     for (i = 0; i < N_REGS; i++)
1111     s->nat[i] = nstate[i];
1112     }
1113    
1114     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1115     {
1116     int i;
1117     int reverse = 0;
1118    
1119     for (i = 0; i < VREGS; i++) {
1120     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1121     return 1;
1122     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1123     reverse++;
1124     }
1125     for (i = 0; i < N_REGS; i++) {
1126     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1127     return 1;
1128     if (nstate[i] < 0 && s->nat[i] >= 0)
1129     reverse++;
1130     }
1131     if (reverse >= 2 && USE_MATCH)
1132     return 1; /* In this case, it might be worth recompiling the
1133     * callers */
1134     return 0;
1135     }
1136    
1137     static __inline__ void log_startblock(void)
1138     {
1139     int i;
1140    
1141     for (i = 0; i < VREGS; i++) {
1142     vstate[i] = L_UNKNOWN;
1143     vwritten[i] = 0;
1144     }
1145     for (i = 0; i < N_REGS; i++)
1146     nstate[i] = L_UNKNOWN;
1147     }
1148    
1149     /* Using an n-reg for a temp variable */
1150     static __inline__ void log_isused(int n)
1151     {
1152     if (nstate[n] == L_UNKNOWN)
1153     nstate[n] = L_UNAVAIL;
1154     }
1155    
1156     static __inline__ void log_visused(int r)
1157     {
1158     if (vstate[r] == L_UNKNOWN)
1159     vstate[r] = L_NEEDED;
1160     }
1161    
1162     static __inline__ void do_load_reg(int n, int r)
1163     {
1164     if (r == FLAGTMP)
1165     raw_load_flagreg(n, r);
1166     else if (r == FLAGX)
1167     raw_load_flagx(n, r);
1168     else
1169 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1170 gbeauche 1.1 }
1171    
1172     static __inline__ void check_load_reg(int n, int r)
1173     {
1174 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1175 gbeauche 1.1 }
1176    
1177     static __inline__ void log_vwrite(int r)
1178     {
1179     vwritten[r] = 1;
1180     }
1181    
1182     /* Using an n-reg to hold a v-reg */
1183     static __inline__ void log_isreg(int n, int r)
1184     {
1185     static int count = 0;
1186    
1187     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1188     nstate[n] = r;
1189     else {
1190     do_load_reg(n, r);
1191     if (nstate[n] == L_UNKNOWN)
1192     nstate[n] = L_UNAVAIL;
1193     }
1194     if (vstate[r] == L_UNKNOWN)
1195     vstate[r] = L_NEEDED;
1196     }
1197    
1198     static __inline__ void log_clobberreg(int r)
1199     {
1200     if (vstate[r] == L_UNKNOWN)
1201     vstate[r] = L_UNNEEDED;
1202     }
1203    
1204     /* This ends all possibility of clever register allocation */
1205    
1206     static __inline__ void log_flush(void)
1207     {
1208     int i;
1209    
1210     for (i = 0; i < VREGS; i++)
1211     if (vstate[i] == L_UNKNOWN)
1212     vstate[i] = L_NEEDED;
1213     for (i = 0; i < N_REGS; i++)
1214     if (nstate[i] == L_UNKNOWN)
1215     nstate[i] = L_UNAVAIL;
1216     }
1217    
1218     static __inline__ void log_dump(void)
1219     {
1220     int i;
1221    
1222     return;
1223    
1224     write_log("----------------------\n");
1225     for (i = 0; i < N_REGS; i++) {
1226     switch (nstate[i]) {
1227     case L_UNKNOWN:
1228     write_log("Nat %d : UNKNOWN\n", i);
1229     break;
1230     case L_UNAVAIL:
1231     write_log("Nat %d : UNAVAIL\n", i);
1232     break;
1233     default:
1234     write_log("Nat %d : %d\n", i, nstate[i]);
1235     break;
1236     }
1237     }
1238     for (i = 0; i < VREGS; i++) {
1239     if (vstate[i] == L_UNNEEDED)
1240     write_log("Virt %d: UNNEEDED\n", i);
1241     }
1242     }
1243    
1244     /********************************************************************
1245     * register status handling. EMIT TIME! *
1246     ********************************************************************/
1247    
1248     static __inline__ void set_status(int r, int status)
1249     {
1250     if (status == ISCONST)
1251     log_clobberreg(r);
1252     live.state[r].status=status;
1253     }
1254    
1255     static __inline__ int isinreg(int r)
1256     {
1257     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1258     }
1259    
1260     static __inline__ void adjust_nreg(int r, uae_u32 val)
1261     {
1262     if (!val)
1263     return;
1264     raw_lea_l_brr(r,r,val);
1265     }
1266    
1267     static void tomem(int r)
1268     {
1269     int rr=live.state[r].realreg;
1270    
1271     if (isinreg(r)) {
1272     if (live.state[r].val && live.nat[rr].nholds==1
1273     && !live.nat[rr].locked) {
1274     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1275     // live.state[r].val,r,rr,target);
1276     adjust_nreg(rr,live.state[r].val);
1277     live.state[r].val=0;
1278     live.state[r].dirtysize=4;
1279     set_status(r,DIRTY);
1280     }
1281     }
1282    
1283     if (live.state[r].status==DIRTY) {
1284     switch (live.state[r].dirtysize) {
1285 gbeauche 1.24 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1286     case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1287     case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1288 gbeauche 1.1 default: abort();
1289     }
1290     log_vwrite(r);
1291     set_status(r,CLEAN);
1292     live.state[r].dirtysize=0;
1293     }
1294     }
1295    
1296     static __inline__ int isconst(int r)
1297     {
1298     return live.state[r].status==ISCONST;
1299     }
1300    
1301     int is_const(int r)
1302     {
1303     return isconst(r);
1304     }
1305    
1306     static __inline__ void writeback_const(int r)
1307     {
1308     if (!isconst(r))
1309     return;
1310     Dif (live.state[r].needflush==NF_HANDLER) {
1311     write_log("Trying to write back constant NF_HANDLER!\n");
1312     abort();
1313     }
1314    
1315 gbeauche 1.24 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1316 gbeauche 1.1 log_vwrite(r);
1317     live.state[r].val=0;
1318     set_status(r,INMEM);
1319     }
1320    
1321     static __inline__ void tomem_c(int r)
1322     {
1323     if (isconst(r)) {
1324     writeback_const(r);
1325     }
1326     else
1327     tomem(r);
1328     }
1329    
1330     static void evict(int r)
1331     {
1332     int rr;
1333    
1334     if (!isinreg(r))
1335     return;
1336     tomem(r);
1337     rr=live.state[r].realreg;
1338    
1339     Dif (live.nat[rr].locked &&
1340     live.nat[rr].nholds==1) {
1341     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1342     abort();
1343     }
1344    
1345     live.nat[rr].nholds--;
1346     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1347     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1348     int thisind=live.state[r].realind;
1349    
1350     live.nat[rr].holds[thisind]=topreg;
1351     live.state[topreg].realind=thisind;
1352     }
1353     live.state[r].realreg=-1;
1354     set_status(r,INMEM);
1355     }
1356    
1357     static __inline__ void free_nreg(int r)
1358     {
1359     int i=live.nat[r].nholds;
1360    
1361     while (i) {
1362     int vr;
1363    
1364     --i;
1365     vr=live.nat[r].holds[i];
1366     evict(vr);
1367     }
1368     Dif (live.nat[r].nholds!=0) {
1369     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1370     abort();
1371     }
1372     }
1373    
1374     /* Use with care! */
1375     static __inline__ void isclean(int r)
1376     {
1377     if (!isinreg(r))
1378     return;
1379     live.state[r].validsize=4;
1380     live.state[r].dirtysize=0;
1381     live.state[r].val=0;
1382     set_status(r,CLEAN);
1383     }
1384    
1385     static __inline__ void disassociate(int r)
1386     {
1387     isclean(r);
1388     evict(r);
1389     }
1390    
1391     static __inline__ void set_const(int r, uae_u32 val)
1392     {
1393     disassociate(r);
1394     live.state[r].val=val;
1395     set_status(r,ISCONST);
1396     }
1397    
1398     static __inline__ uae_u32 get_offset(int r)
1399     {
1400     return live.state[r].val;
1401     }
1402    
1403     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1404     {
1405     int bestreg;
1406     uae_s32 when;
1407     int i;
1408     uae_s32 badness=0; /* to shut up gcc */
1409     bestreg=-1;
1410     when=2000000000;
1411    
1412 gbeauche 1.37 /* XXX use a regalloc_order table? */
1413     for (i=0;i<N_REGS;i++) {
1414 gbeauche 1.1 badness=live.nat[i].touched;
1415     if (live.nat[i].nholds==0)
1416     badness=0;
1417     if (i==hint)
1418     badness-=200000000;
1419     if (!live.nat[i].locked && badness<when) {
1420     if ((size==1 && live.nat[i].canbyte) ||
1421     (size==2 && live.nat[i].canword) ||
1422     (size==4)) {
1423     bestreg=i;
1424     when=badness;
1425     if (live.nat[i].nholds==0 && hint<0)
1426     break;
1427     if (i==hint)
1428     break;
1429     }
1430     }
1431     }
1432     Dif (bestreg==-1)
1433     abort();
1434    
1435     if (live.nat[bestreg].nholds>0) {
1436     free_nreg(bestreg);
1437     }
1438     if (isinreg(r)) {
1439     int rr=live.state[r].realreg;
1440     /* This will happen if we read a partially dirty register at a
1441     bigger size */
1442     Dif (willclobber || live.state[r].validsize>=size)
1443     abort();
1444     Dif (live.nat[rr].nholds!=1)
1445     abort();
1446     if (size==4 && live.state[r].validsize==2) {
1447     log_isused(bestreg);
1448     log_visused(r);
1449 gbeauche 1.24 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1450 gbeauche 1.1 raw_bswap_32(bestreg);
1451     raw_zero_extend_16_rr(rr,rr);
1452     raw_zero_extend_16_rr(bestreg,bestreg);
1453     raw_bswap_32(bestreg);
1454     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1455     live.state[r].validsize=4;
1456     live.nat[rr].touched=touchcnt++;
1457     return rr;
1458     }
1459     if (live.state[r].validsize==1) {
1460     /* Nothing yet */
1461     }
1462     evict(r);
1463     }
1464    
1465     if (!willclobber) {
1466     if (live.state[r].status!=UNDEF) {
1467     if (isconst(r)) {
1468     raw_mov_l_ri(bestreg,live.state[r].val);
1469     live.state[r].val=0;
1470     live.state[r].dirtysize=4;
1471     set_status(r,DIRTY);
1472     log_isused(bestreg);
1473     }
1474     else {
1475     log_isreg(bestreg, r); /* This will also load it! */
1476     live.state[r].dirtysize=0;
1477     set_status(r,CLEAN);
1478     }
1479     }
1480     else {
1481     live.state[r].val=0;
1482     live.state[r].dirtysize=0;
1483     set_status(r,CLEAN);
1484     log_isused(bestreg);
1485     }
1486     live.state[r].validsize=4;
1487     }
1488     else { /* this is the easiest way, but not optimal. FIXME! */
1489     /* Now it's trickier, but hopefully still OK */
1490     if (!isconst(r) || size==4) {
1491     live.state[r].validsize=size;
1492     live.state[r].dirtysize=size;
1493     live.state[r].val=0;
1494     set_status(r,DIRTY);
1495     if (size == 4) {
1496     log_clobberreg(r);
1497     log_isused(bestreg);
1498     }
1499     else {
1500     log_visused(r);
1501     log_isused(bestreg);
1502     }
1503     }
1504     else {
1505     if (live.state[r].status!=UNDEF)
1506     raw_mov_l_ri(bestreg,live.state[r].val);
1507     live.state[r].val=0;
1508     live.state[r].validsize=4;
1509     live.state[r].dirtysize=4;
1510     set_status(r,DIRTY);
1511     log_isused(bestreg);
1512     }
1513     }
1514     live.state[r].realreg=bestreg;
1515     live.state[r].realind=live.nat[bestreg].nholds;
1516     live.nat[bestreg].touched=touchcnt++;
1517     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1518     live.nat[bestreg].nholds++;
1519    
1520     return bestreg;
1521     }
1522    
1523     static int alloc_reg(int r, int size, int willclobber)
1524     {
1525     return alloc_reg_hinted(r,size,willclobber,-1);
1526     }
1527    
1528     static void unlock2(int r)
1529     {
1530     Dif (!live.nat[r].locked)
1531     abort();
1532     live.nat[r].locked--;
1533     }
1534    
1535     static void setlock(int r)
1536     {
1537     live.nat[r].locked++;
1538     }
1539    
1540    
1541     static void mov_nregs(int d, int s)
1542     {
1543     int ns=live.nat[s].nholds;
1544     int nd=live.nat[d].nholds;
1545     int i;
1546    
1547     if (s==d)
1548     return;
1549    
1550     if (nd>0)
1551     free_nreg(d);
1552    
1553     log_isused(d);
1554     raw_mov_l_rr(d,s);
1555    
1556     for (i=0;i<live.nat[s].nholds;i++) {
1557     int vs=live.nat[s].holds[i];
1558    
1559     live.state[vs].realreg=d;
1560     live.state[vs].realind=i;
1561     live.nat[d].holds[i]=vs;
1562     }
1563     live.nat[d].nholds=live.nat[s].nholds;
1564    
1565     live.nat[s].nholds=0;
1566     }
1567    
1568    
1569     static __inline__ void make_exclusive(int r, int size, int spec)
1570     {
1571     int clobber;
1572     reg_status oldstate;
1573     int rr=live.state[r].realreg;
1574     int nr;
1575     int nind;
1576     int ndirt=0;
1577     int i;
1578    
1579     if (!isinreg(r))
1580     return;
1581     if (live.nat[rr].nholds==1)
1582     return;
1583     for (i=0;i<live.nat[rr].nholds;i++) {
1584     int vr=live.nat[rr].holds[i];
1585     if (vr!=r &&
1586     (live.state[vr].status==DIRTY || live.state[vr].val))
1587     ndirt++;
1588     }
1589     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1590     /* Everything else is clean, so let's keep this register */
1591     for (i=0;i<live.nat[rr].nholds;i++) {
1592     int vr=live.nat[rr].holds[i];
1593     if (vr!=r) {
1594     evict(vr);
1595     i--; /* Try that index again! */
1596     }
1597     }
1598     Dif (live.nat[rr].nholds!=1) {
1599     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1600     rr,live.nat[rr].nholds,r);
1601     abort();
1602     }
1603     return;
1604     }
1605    
1606     /* We have to split the register */
1607     oldstate=live.state[r];
1608    
1609     setlock(rr); /* Make sure this doesn't go away */
1610     /* Forget about r being in the register rr */
1611     disassociate(r);
1612     /* Get a new register, that we will clobber completely */
1613     if (oldstate.status==DIRTY) {
1614     /* If dirtysize is <4, we need a register that can handle the
1615     eventual smaller memory store! Thanks to Quake68k for exposing
1616     this detail ;-) */
1617     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1618     }
1619     else {
1620     nr=alloc_reg_hinted(r,4,1,spec);
1621     }
1622     nind=live.state[r].realind;
1623     live.state[r]=oldstate; /* Keep all the old state info */
1624     live.state[r].realreg=nr;
1625     live.state[r].realind=nind;
1626    
1627     if (size<live.state[r].validsize) {
1628     if (live.state[r].val) {
1629     /* Might as well compensate for the offset now */
1630     raw_lea_l_brr(nr,rr,oldstate.val);
1631     live.state[r].val=0;
1632     live.state[r].dirtysize=4;
1633     set_status(r,DIRTY);
1634     }
1635     else
1636     raw_mov_l_rr(nr,rr); /* Make another copy */
1637     }
1638     unlock2(rr);
1639     }
1640    
1641     static __inline__ void add_offset(int r, uae_u32 off)
1642     {
1643     live.state[r].val+=off;
1644     }
1645    
1646     static __inline__ void remove_offset(int r, int spec)
1647     {
1648     reg_status oldstate;
1649     int rr;
1650    
1651     if (isconst(r))
1652     return;
1653     if (live.state[r].val==0)
1654     return;
1655     if (isinreg(r) && live.state[r].validsize<4)
1656     evict(r);
1657    
1658     if (!isinreg(r))
1659     alloc_reg_hinted(r,4,0,spec);
1660    
1661     Dif (live.state[r].validsize!=4) {
1662     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1663     abort();
1664     }
1665     make_exclusive(r,0,-1);
1666     /* make_exclusive might have done the job already */
1667     if (live.state[r].val==0)
1668     return;
1669    
1670     rr=live.state[r].realreg;
1671    
1672     if (live.nat[rr].nholds==1) {
1673     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1674     // live.state[r].val,r,rr,target);
1675     adjust_nreg(rr,live.state[r].val);
1676     live.state[r].dirtysize=4;
1677     live.state[r].val=0;
1678     set_status(r,DIRTY);
1679     return;
1680     }
1681     write_log("Failed in remove_offset\n");
1682     abort();
1683     }
1684    
1685     static __inline__ void remove_all_offsets(void)
1686     {
1687     int i;
1688    
1689     for (i=0;i<VREGS;i++)
1690     remove_offset(i,-1);
1691     }
1692    
1693 gbeauche 1.28 static inline void flush_reg_count(void)
1694     {
1695     #if RECORD_REGISTER_USAGE
1696     for (int r = 0; r < 16; r++)
1697     if (reg_count_local[r])
1698     ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1699     #endif
1700     }
1701    
1702     static inline void record_register(int r)
1703     {
1704     #if RECORD_REGISTER_USAGE
1705     if (r < 16)
1706     reg_count_local[r]++;
1707     #endif
1708     }
1709    
1710 gbeauche 1.1 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1711     {
1712     int n;
1713     int answer=-1;
1714    
1715 gbeauche 1.28 record_register(r);
1716 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1717     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1718     }
1719     if (!can_offset)
1720     remove_offset(r,spec);
1721    
1722     if (isinreg(r) && live.state[r].validsize>=size) {
1723     n=live.state[r].realreg;
1724     switch(size) {
1725     case 1:
1726     if (live.nat[n].canbyte || spec>=0) {
1727     answer=n;
1728     }
1729     break;
1730     case 2:
1731     if (live.nat[n].canword || spec>=0) {
1732     answer=n;
1733     }
1734     break;
1735     case 4:
1736     answer=n;
1737     break;
1738     default: abort();
1739     }
1740     if (answer<0)
1741     evict(r);
1742     }
1743     /* either the value was in memory to start with, or it was evicted and
1744     is in memory now */
1745     if (answer<0) {
1746     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1747     }
1748    
1749     if (spec>=0 && spec!=answer) {
1750     /* Too bad */
1751     mov_nregs(spec,answer);
1752     answer=spec;
1753     }
1754     live.nat[answer].locked++;
1755     live.nat[answer].touched=touchcnt++;
1756     return answer;
1757     }
1758    
1759    
1760    
1761     static int readreg(int r, int size)
1762     {
1763     return readreg_general(r,size,-1,0);
1764     }
1765    
1766     static int readreg_specific(int r, int size, int spec)
1767     {
1768     return readreg_general(r,size,spec,0);
1769     }
1770    
1771     static int readreg_offset(int r, int size)
1772     {
1773     return readreg_general(r,size,-1,1);
1774     }
1775    
1776     /* writereg_general(r, size, spec)
1777     *
1778     * INPUT
1779     * - r : mid-layer register
1780     * - size : requested size (1/2/4)
1781     * - spec : -1 if find or make a register free, otherwise specifies
1782     * the physical register to use in any case
1783     *
1784     * OUTPUT
1785     * - hard (physical, x86 here) register allocated to virtual register r
1786     */
1787     static __inline__ int writereg_general(int r, int size, int spec)
1788     {
1789     int n;
1790     int answer=-1;
1791    
1792 gbeauche 1.28 record_register(r);
1793 gbeauche 1.1 if (size<4) {
1794     remove_offset(r,spec);
1795     }
1796    
1797     make_exclusive(r,size,spec);
1798     if (isinreg(r)) {
1799     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1800     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1801     n=live.state[r].realreg;
1802    
1803     Dif (live.nat[n].nholds!=1)
1804     abort();
1805     switch(size) {
1806     case 1:
1807     if (live.nat[n].canbyte || spec>=0) {
1808     live.state[r].dirtysize=ndsize;
1809     live.state[r].validsize=nvsize;
1810     answer=n;
1811     }
1812     break;
1813     case 2:
1814     if (live.nat[n].canword || spec>=0) {
1815     live.state[r].dirtysize=ndsize;
1816     live.state[r].validsize=nvsize;
1817     answer=n;
1818     }
1819     break;
1820     case 4:
1821     live.state[r].dirtysize=ndsize;
1822     live.state[r].validsize=nvsize;
1823     answer=n;
1824     break;
1825     default: abort();
1826     }
1827     if (answer<0)
1828     evict(r);
1829     }
1830     /* either the value was in memory to start with, or it was evicted and
1831     is in memory now */
1832     if (answer<0) {
1833     answer=alloc_reg_hinted(r,size,1,spec);
1834     }
1835     if (spec>=0 && spec!=answer) {
1836     mov_nregs(spec,answer);
1837     answer=spec;
1838     }
1839     if (live.state[r].status==UNDEF)
1840     live.state[r].validsize=4;
1841     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1842     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1843    
1844     live.nat[answer].locked++;
1845     live.nat[answer].touched=touchcnt++;
1846     if (size==4) {
1847     live.state[r].val=0;
1848     }
1849     else {
1850     Dif (live.state[r].val) {
1851     write_log("Problem with val\n");
1852     abort();
1853     }
1854     }
1855     set_status(r,DIRTY);
1856     return answer;
1857     }
1858    
1859     static int writereg(int r, int size)
1860     {
1861     return writereg_general(r,size,-1);
1862     }
1863    
1864     static int writereg_specific(int r, int size, int spec)
1865     {
1866     return writereg_general(r,size,spec);
1867     }
1868    
1869     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1870     {
1871     int n;
1872     int answer=-1;
1873    
1874 gbeauche 1.28 record_register(r);
1875 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1876     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1877     }
1878     remove_offset(r,spec);
1879     make_exclusive(r,0,spec);
1880    
1881     Dif (wsize<rsize) {
1882     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1883     abort();
1884     }
1885     if (isinreg(r) && live.state[r].validsize>=rsize) {
1886     n=live.state[r].realreg;
1887     Dif (live.nat[n].nholds!=1)
1888     abort();
1889    
1890     switch(rsize) {
1891     case 1:
1892     if (live.nat[n].canbyte || spec>=0) {
1893     answer=n;
1894     }
1895     break;
1896     case 2:
1897     if (live.nat[n].canword || spec>=0) {
1898     answer=n;
1899     }
1900     break;
1901     case 4:
1902     answer=n;
1903     break;
1904     default: abort();
1905     }
1906     if (answer<0)
1907     evict(r);
1908     }
1909     /* either the value was in memory to start with, or it was evicted and
1910     is in memory now */
1911     if (answer<0) {
1912     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1913     }
1914    
1915     if (spec>=0 && spec!=answer) {
1916     /* Too bad */
1917     mov_nregs(spec,answer);
1918     answer=spec;
1919     }
1920     if (wsize>live.state[r].dirtysize)
1921     live.state[r].dirtysize=wsize;
1922     if (wsize>live.state[r].validsize)
1923     live.state[r].validsize=wsize;
1924     set_status(r,DIRTY);
1925    
1926     live.nat[answer].locked++;
1927     live.nat[answer].touched=touchcnt++;
1928    
1929     Dif (live.state[r].val) {
1930     write_log("Problem with val(rmw)\n");
1931     abort();
1932     }
1933     return answer;
1934     }
1935    
1936     static int rmw(int r, int wsize, int rsize)
1937     {
1938     return rmw_general(r,wsize,rsize,-1);
1939     }
1940    
1941     static int rmw_specific(int r, int wsize, int rsize, int spec)
1942     {
1943     return rmw_general(r,wsize,rsize,spec);
1944     }
1945    
1946    
1947     /* needed for restoring the carry flag on non-P6 cores */
1948     static void bt_l_ri_noclobber(R4 r, IMM i)
1949     {
1950     int size=4;
1951     if (i<16)
1952     size=2;
1953     r=readreg(r,size);
1954     raw_bt_l_ri(r,i);
1955     unlock2(r);
1956     }
1957    
1958     /********************************************************************
1959     * FPU register status handling. EMIT TIME! *
1960     ********************************************************************/
1961    
1962     static void f_tomem(int r)
1963     {
1964     if (live.fate[r].status==DIRTY) {
1965     #if USE_LONG_DOUBLE
1966 gbeauche 1.24 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1967 gbeauche 1.1 #else
1968 gbeauche 1.24 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1969 gbeauche 1.1 #endif
1970     live.fate[r].status=CLEAN;
1971     }
1972     }
1973    
1974     static void f_tomem_drop(int r)
1975     {
1976     if (live.fate[r].status==DIRTY) {
1977     #if USE_LONG_DOUBLE
1978 gbeauche 1.24 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1979 gbeauche 1.1 #else
1980 gbeauche 1.24 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1981 gbeauche 1.1 #endif
1982     live.fate[r].status=INMEM;
1983     }
1984     }
1985    
1986    
1987     static __inline__ int f_isinreg(int r)
1988     {
1989     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1990     }
1991    
1992     static void f_evict(int r)
1993     {
1994     int rr;
1995    
1996     if (!f_isinreg(r))
1997     return;
1998     rr=live.fate[r].realreg;
1999     if (live.fat[rr].nholds==1)
2000     f_tomem_drop(r);
2001     else
2002     f_tomem(r);
2003    
2004     Dif (live.fat[rr].locked &&
2005     live.fat[rr].nholds==1) {
2006     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2007     abort();
2008     }
2009    
2010     live.fat[rr].nholds--;
2011     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2012     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2013     int thisind=live.fate[r].realind;
2014     live.fat[rr].holds[thisind]=topreg;
2015     live.fate[topreg].realind=thisind;
2016     }
2017     live.fate[r].status=INMEM;
2018     live.fate[r].realreg=-1;
2019     }
2020    
2021     static __inline__ void f_free_nreg(int r)
2022     {
2023     int i=live.fat[r].nholds;
2024    
2025     while (i) {
2026     int vr;
2027    
2028     --i;
2029     vr=live.fat[r].holds[i];
2030     f_evict(vr);
2031     }
2032     Dif (live.fat[r].nholds!=0) {
2033     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2034     abort();
2035     }
2036     }
2037    
2038    
2039     /* Use with care! */
2040     static __inline__ void f_isclean(int r)
2041     {
2042     if (!f_isinreg(r))
2043     return;
2044     live.fate[r].status=CLEAN;
2045     }
2046    
2047     static __inline__ void f_disassociate(int r)
2048     {
2049     f_isclean(r);
2050     f_evict(r);
2051     }
2052    
2053    
2054    
2055     static int f_alloc_reg(int r, int willclobber)
2056     {
2057     int bestreg;
2058     uae_s32 when;
2059     int i;
2060     uae_s32 badness;
2061     bestreg=-1;
2062     when=2000000000;
2063     for (i=N_FREGS;i--;) {
2064     badness=live.fat[i].touched;
2065     if (live.fat[i].nholds==0)
2066     badness=0;
2067    
2068     if (!live.fat[i].locked && badness<when) {
2069     bestreg=i;
2070     when=badness;
2071     if (live.fat[i].nholds==0)
2072     break;
2073     }
2074     }
2075     Dif (bestreg==-1)
2076     abort();
2077    
2078     if (live.fat[bestreg].nholds>0) {
2079     f_free_nreg(bestreg);
2080     }
2081     if (f_isinreg(r)) {
2082     f_evict(r);
2083     }
2084    
2085     if (!willclobber) {
2086     if (live.fate[r].status!=UNDEF) {
2087     #if USE_LONG_DOUBLE
2088 gbeauche 1.24 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2089 gbeauche 1.1 #else
2090 gbeauche 1.24 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2091 gbeauche 1.1 #endif
2092     }
2093     live.fate[r].status=CLEAN;
2094     }
2095     else {
2096     live.fate[r].status=DIRTY;
2097     }
2098     live.fate[r].realreg=bestreg;
2099     live.fate[r].realind=live.fat[bestreg].nholds;
2100     live.fat[bestreg].touched=touchcnt++;
2101     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2102     live.fat[bestreg].nholds++;
2103    
2104     return bestreg;
2105     }
2106    
2107     static void f_unlock(int r)
2108     {
2109     Dif (!live.fat[r].locked)
2110     abort();
2111     live.fat[r].locked--;
2112     }
2113    
2114     static void f_setlock(int r)
2115     {
2116     live.fat[r].locked++;
2117     }
2118    
2119     static __inline__ int f_readreg(int r)
2120     {
2121     int n;
2122     int answer=-1;
2123    
2124     if (f_isinreg(r)) {
2125     n=live.fate[r].realreg;
2126     answer=n;
2127     }
2128     /* either the value was in memory to start with, or it was evicted and
2129     is in memory now */
2130     if (answer<0)
2131     answer=f_alloc_reg(r,0);
2132    
2133     live.fat[answer].locked++;
2134     live.fat[answer].touched=touchcnt++;
2135     return answer;
2136     }
2137    
2138     static __inline__ void f_make_exclusive(int r, int clobber)
2139     {
2140     freg_status oldstate;
2141     int rr=live.fate[r].realreg;
2142     int nr;
2143     int nind;
2144     int ndirt=0;
2145     int i;
2146    
2147     if (!f_isinreg(r))
2148     return;
2149     if (live.fat[rr].nholds==1)
2150     return;
2151     for (i=0;i<live.fat[rr].nholds;i++) {
2152     int vr=live.fat[rr].holds[i];
2153     if (vr!=r && live.fate[vr].status==DIRTY)
2154     ndirt++;
2155     }
2156     if (!ndirt && !live.fat[rr].locked) {
2157     /* Everything else is clean, so let's keep this register */
2158     for (i=0;i<live.fat[rr].nholds;i++) {
2159     int vr=live.fat[rr].holds[i];
2160     if (vr!=r) {
2161     f_evict(vr);
2162     i--; /* Try that index again! */
2163     }
2164     }
2165     Dif (live.fat[rr].nholds!=1) {
2166     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2167     for (i=0;i<live.fat[rr].nholds;i++) {
2168     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2169     live.fate[live.fat[rr].holds[i]].realreg,
2170     live.fate[live.fat[rr].holds[i]].realind);
2171     }
2172     write_log("\n");
2173     abort();
2174     }
2175     return;
2176     }
2177    
2178     /* We have to split the register */
2179     oldstate=live.fate[r];
2180    
2181     f_setlock(rr); /* Make sure this doesn't go away */
2182     /* Forget about r being in the register rr */
2183     f_disassociate(r);
2184     /* Get a new register, that we will clobber completely */
2185     nr=f_alloc_reg(r,1);
2186     nind=live.fate[r].realind;
2187     if (!clobber)
2188     raw_fmov_rr(nr,rr); /* Make another copy */
2189     live.fate[r]=oldstate; /* Keep all the old state info */
2190     live.fate[r].realreg=nr;
2191     live.fate[r].realind=nind;
2192     f_unlock(rr);
2193     }
2194    
2195    
2196     static __inline__ int f_writereg(int r)
2197     {
2198     int n;
2199     int answer=-1;
2200    
2201     f_make_exclusive(r,1);
2202     if (f_isinreg(r)) {
2203     n=live.fate[r].realreg;
2204     answer=n;
2205     }
2206     if (answer<0) {
2207     answer=f_alloc_reg(r,1);
2208     }
2209     live.fate[r].status=DIRTY;
2210     live.fat[answer].locked++;
2211     live.fat[answer].touched=touchcnt++;
2212     return answer;
2213     }
2214    
2215     static int f_rmw(int r)
2216     {
2217     int n;
2218    
2219     f_make_exclusive(r,0);
2220     if (f_isinreg(r)) {
2221     n=live.fate[r].realreg;
2222     }
2223     else
2224     n=f_alloc_reg(r,0);
2225     live.fate[r].status=DIRTY;
2226     live.fat[n].locked++;
2227     live.fat[n].touched=touchcnt++;
2228     return n;
2229     }
2230    
2231     static void fflags_into_flags_internal(uae_u32 tmp)
2232     {
2233     int r;
2234    
2235     clobber_flags();
2236     r=f_readreg(FP_RESULT);
2237     if (FFLAG_NREG_CLOBBER_CONDITION) {
2238     int tmp2=tmp;
2239     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2240     raw_fflags_into_flags(r);
2241     unlock2(tmp);
2242     forget_about(tmp2);
2243     }
2244     else
2245     raw_fflags_into_flags(r);
2246     f_unlock(r);
2247 gbeauche 1.19 live_flags();
2248 gbeauche 1.1 }
2249    
2250    
2251    
2252    
2253     /********************************************************************
2254     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2255     ********************************************************************/
2256    
2257     /*
2258     * RULES FOR HANDLING REGISTERS:
2259     *
2260     * * In the function headers, order the parameters
2261     * - 1st registers written to
2262     * - 2nd read/modify/write registers
2263     * - 3rd registers read from
2264     * * Before calling raw_*, you must call readreg, writereg or rmw for
2265     * each register
2266     * * The order for this is
2267     * - 1st call remove_offset for all registers written to with size<4
2268     * - 2nd call readreg for all registers read without offset
2269     * - 3rd call rmw for all rmw registers
2270     * - 4th call readreg_offset for all registers that can handle offsets
2271     * - 5th call get_offset for all the registers from the previous step
2272     * - 6th call writereg for all written-to registers
2273     * - 7th call raw_*
2274     * - 8th unlock2 all registers that were locked
2275     */
2276    
2277     MIDFUNC(0,live_flags,(void))
2278     {
2279     live.flags_on_stack=TRASH;
2280     live.flags_in_flags=VALID;
2281     live.flags_are_important=1;
2282     }
2283     MENDFUNC(0,live_flags,(void))
2284    
2285     MIDFUNC(0,dont_care_flags,(void))
2286     {
2287     live.flags_are_important=0;
2288     }
2289     MENDFUNC(0,dont_care_flags,(void))
2290    
2291    
2292     MIDFUNC(0,duplicate_carry,(void))
2293     {
2294     evict(FLAGX);
2295     make_flags_live_internal();
2296 gbeauche 1.24 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2297 gbeauche 1.1 log_vwrite(FLAGX);
2298     }
2299     MENDFUNC(0,duplicate_carry,(void))
2300    
2301     MIDFUNC(0,restore_carry,(void))
2302     {
2303     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2304     bt_l_ri_noclobber(FLAGX,0);
2305     }
2306     else { /* Avoid the stall the above creates.
2307     This is slow on non-P6, though.
2308     */
2309     COMPCALL(rol_b_ri(FLAGX,8));
2310     isclean(FLAGX);
2311     }
2312     }
2313     MENDFUNC(0,restore_carry,(void))
2314    
2315     MIDFUNC(0,start_needflags,(void))
2316     {
2317     needflags=1;
2318     }
2319     MENDFUNC(0,start_needflags,(void))
2320    
2321     MIDFUNC(0,end_needflags,(void))
2322     {
2323     needflags=0;
2324     }
2325     MENDFUNC(0,end_needflags,(void))
2326    
2327     MIDFUNC(0,make_flags_live,(void))
2328     {
2329     make_flags_live_internal();
2330     }
2331     MENDFUNC(0,make_flags_live,(void))
2332    
2333     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2334     {
2335     clobber_flags();
2336     fflags_into_flags_internal(tmp);
2337     }
2338     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2339    
2340    
2341     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2342     {
2343     int size=4;
2344     if (i<16)
2345     size=2;
2346     CLOBBER_BT;
2347     r=readreg(r,size);
2348     raw_bt_l_ri(r,i);
2349     unlock2(r);
2350     }
2351     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2352    
2353     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2354     {
2355     CLOBBER_BT;
2356     r=readreg(r,4);
2357     b=readreg(b,4);
2358     raw_bt_l_rr(r,b);
2359     unlock2(r);
2360     unlock2(b);
2361     }
2362     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2363    
2364     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2365     {
2366     int size=4;
2367     if (i<16)
2368     size=2;
2369     CLOBBER_BT;
2370     r=rmw(r,size,size);
2371     raw_btc_l_ri(r,i);
2372     unlock2(r);
2373     }
2374     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2375    
2376     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2377     {
2378     CLOBBER_BT;
2379     b=readreg(b,4);
2380     r=rmw(r,4,4);
2381     raw_btc_l_rr(r,b);
2382     unlock2(r);
2383     unlock2(b);
2384     }
2385     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2386    
2387    
2388     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2389     {
2390     int size=4;
2391     if (i<16)
2392     size=2;
2393     CLOBBER_BT;
2394     r=rmw(r,size,size);
2395     raw_btr_l_ri(r,i);
2396     unlock2(r);
2397     }
2398     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2399    
2400     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2401     {
2402     CLOBBER_BT;
2403     b=readreg(b,4);
2404     r=rmw(r,4,4);
2405     raw_btr_l_rr(r,b);
2406     unlock2(r);
2407     unlock2(b);
2408     }
2409     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2410    
2411    
2412     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2413     {
2414     int size=4;
2415     if (i<16)
2416     size=2;
2417     CLOBBER_BT;
2418     r=rmw(r,size,size);
2419     raw_bts_l_ri(r,i);
2420     unlock2(r);
2421     }
2422     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2423    
2424     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2425     {
2426     CLOBBER_BT;
2427     b=readreg(b,4);
2428     r=rmw(r,4,4);
2429     raw_bts_l_rr(r,b);
2430     unlock2(r);
2431     unlock2(b);
2432     }
2433     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2434    
2435     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2436     {
2437     CLOBBER_MOV;
2438     d=writereg(d,4);
2439     raw_mov_l_rm(d,s);
2440     unlock2(d);
2441     }
2442     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2443    
2444    
2445     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2446     {
2447     r=readreg(r,4);
2448     raw_call_r(r);
2449     unlock2(r);
2450     }
2451     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2452    
2453     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2454     {
2455     CLOBBER_SUB;
2456     raw_sub_l_mi(d,s) ;
2457     }
2458     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2459    
2460     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2461     {
2462     CLOBBER_MOV;
2463     raw_mov_l_mi(d,s) ;
2464     }
2465     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2466    
2467     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2468     {
2469     CLOBBER_MOV;
2470     raw_mov_w_mi(d,s) ;
2471     }
2472     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2473    
2474     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2475     {
2476     CLOBBER_MOV;
2477     raw_mov_b_mi(d,s) ;
2478     }
2479     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2480    
2481     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2482     {
2483     if (!i && !needflags)
2484     return;
2485     CLOBBER_ROL;
2486     r=rmw(r,1,1);
2487     raw_rol_b_ri(r,i);
2488     unlock2(r);
2489     }
2490     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2491    
2492     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2493     {
2494     if (!i && !needflags)
2495     return;
2496     CLOBBER_ROL;
2497     r=rmw(r,2,2);
2498     raw_rol_w_ri(r,i);
2499     unlock2(r);
2500     }
2501     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2502    
2503     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2504     {
2505     if (!i && !needflags)
2506     return;
2507     CLOBBER_ROL;
2508     r=rmw(r,4,4);
2509     raw_rol_l_ri(r,i);
2510     unlock2(r);
2511     }
2512     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2513    
2514     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2515     {
2516     if (isconst(r)) {
2517     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2518     return;
2519     }
2520     CLOBBER_ROL;
2521     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2522     d=rmw(d,4,4);
2523     Dif (r!=1) {
2524     write_log("Illegal register %d in raw_rol_b\n",r);
2525     abort();
2526     }
2527     raw_rol_l_rr(d,r) ;
2528     unlock2(r);
2529     unlock2(d);
2530     }
2531     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2532    
2533     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2534     { /* Can only do this with r==1, i.e. cl */
2535    
2536     if (isconst(r)) {
2537     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2538     return;
2539     }
2540     CLOBBER_ROL;
2541     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2542     d=rmw(d,2,2);
2543     Dif (r!=1) {
2544     write_log("Illegal register %d in raw_rol_b\n",r);
2545     abort();
2546     }
2547     raw_rol_w_rr(d,r) ;
2548     unlock2(r);
2549     unlock2(d);
2550     }
2551     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2552    
2553     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2554     { /* Can only do this with r==1, i.e. cl */
2555    
2556     if (isconst(r)) {
2557     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2558     return;
2559     }
2560    
2561     CLOBBER_ROL;
2562     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2563     d=rmw(d,1,1);
2564     Dif (r!=1) {
2565     write_log("Illegal register %d in raw_rol_b\n",r);
2566     abort();
2567     }
2568     raw_rol_b_rr(d,r) ;
2569     unlock2(r);
2570     unlock2(d);
2571     }
2572     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2573    
2574    
2575     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2576     {
2577     if (isconst(r)) {
2578     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2579     return;
2580     }
2581     CLOBBER_SHLL;
2582     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2583     d=rmw(d,4,4);
2584     Dif (r!=1) {
2585     write_log("Illegal register %d in raw_rol_b\n",r);
2586     abort();
2587     }
2588     raw_shll_l_rr(d,r) ;
2589     unlock2(r);
2590     unlock2(d);
2591     }
2592     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2593    
2594     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2595     { /* Can only do this with r==1, i.e. cl */
2596    
2597     if (isconst(r)) {
2598     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2599     return;
2600     }
2601     CLOBBER_SHLL;
2602     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2603     d=rmw(d,2,2);
2604     Dif (r!=1) {
2605     write_log("Illegal register %d in raw_shll_b\n",r);
2606     abort();
2607     }
2608     raw_shll_w_rr(d,r) ;
2609     unlock2(r);
2610     unlock2(d);
2611     }
2612     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2613    
2614     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2615     { /* Can only do this with r==1, i.e. cl */
2616    
2617     if (isconst(r)) {
2618     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2619     return;
2620     }
2621    
2622     CLOBBER_SHLL;
2623     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2624     d=rmw(d,1,1);
2625     Dif (r!=1) {
2626     write_log("Illegal register %d in raw_shll_b\n",r);
2627     abort();
2628     }
2629     raw_shll_b_rr(d,r) ;
2630     unlock2(r);
2631     unlock2(d);
2632     }
2633     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2634    
2635    
2636     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2637     {
2638     if (!i && !needflags)
2639     return;
2640     CLOBBER_ROR;
2641     r=rmw(r,1,1);
2642     raw_ror_b_ri(r,i);
2643     unlock2(r);
2644     }
2645     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2646    
2647     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2648     {
2649     if (!i && !needflags)
2650     return;
2651     CLOBBER_ROR;
2652     r=rmw(r,2,2);
2653     raw_ror_w_ri(r,i);
2654     unlock2(r);
2655     }
2656     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2657    
2658     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2659     {
2660     if (!i && !needflags)
2661     return;
2662     CLOBBER_ROR;
2663     r=rmw(r,4,4);
2664     raw_ror_l_ri(r,i);
2665     unlock2(r);
2666     }
2667     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2668    
2669     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2670     {
2671     if (isconst(r)) {
2672     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2673     return;
2674     }
2675     CLOBBER_ROR;
2676     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2677     d=rmw(d,4,4);
2678     raw_ror_l_rr(d,r) ;
2679     unlock2(r);
2680     unlock2(d);
2681     }
2682     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2683    
2684     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2685     {
2686     if (isconst(r)) {
2687     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2688     return;
2689     }
2690     CLOBBER_ROR;
2691     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2692     d=rmw(d,2,2);
2693     raw_ror_w_rr(d,r) ;
2694     unlock2(r);
2695     unlock2(d);
2696     }
2697     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2698    
2699     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2700     {
2701     if (isconst(r)) {
2702     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2703     return;
2704     }
2705    
2706     CLOBBER_ROR;
2707     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2708     d=rmw(d,1,1);
2709     raw_ror_b_rr(d,r) ;
2710     unlock2(r);
2711     unlock2(d);
2712     }
2713     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2714    
2715     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2716     {
2717     if (isconst(r)) {
2718     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2719     return;
2720     }
2721     CLOBBER_SHRL;
2722     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2723     d=rmw(d,4,4);
2724     Dif (r!=1) {
2725     write_log("Illegal register %d in raw_rol_b\n",r);
2726     abort();
2727     }
2728     raw_shrl_l_rr(d,r) ;
2729     unlock2(r);
2730     unlock2(d);
2731     }
2732     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2733    
2734     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2735     { /* Can only do this with r==1, i.e. cl */
2736    
2737     if (isconst(r)) {
2738     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2739     return;
2740     }
2741     CLOBBER_SHRL;
2742     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2743     d=rmw(d,2,2);
2744     Dif (r!=1) {
2745     write_log("Illegal register %d in raw_shrl_b\n",r);
2746     abort();
2747     }
2748     raw_shrl_w_rr(d,r) ;
2749     unlock2(r);
2750     unlock2(d);
2751     }
2752     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2753    
2754     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2755     { /* Can only do this with r==1, i.e. cl */
2756    
2757     if (isconst(r)) {
2758     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2759     return;
2760     }
2761    
2762     CLOBBER_SHRL;
2763     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2764     d=rmw(d,1,1);
2765     Dif (r!=1) {
2766     write_log("Illegal register %d in raw_shrl_b\n",r);
2767     abort();
2768     }
2769     raw_shrl_b_rr(d,r) ;
2770     unlock2(r);
2771     unlock2(d);
2772     }
2773     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2774    
2775    
2776    
2777     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2778     {
2779     if (!i && !needflags)
2780     return;
2781     if (isconst(r) && !needflags) {
2782     live.state[r].val<<=i;
2783     return;
2784     }
2785     CLOBBER_SHLL;
2786     r=rmw(r,4,4);
2787     raw_shll_l_ri(r,i);
2788     unlock2(r);
2789     }
2790     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2791    
2792     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2793     {
2794     if (!i && !needflags)
2795     return;
2796     CLOBBER_SHLL;
2797     r=rmw(r,2,2);
2798     raw_shll_w_ri(r,i);
2799     unlock2(r);
2800     }
2801     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2802    
2803     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2804     {
2805     if (!i && !needflags)
2806     return;
2807     CLOBBER_SHLL;
2808     r=rmw(r,1,1);
2809     raw_shll_b_ri(r,i);
2810     unlock2(r);
2811     }
2812     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2813    
2814     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2815     {
2816     if (!i && !needflags)
2817     return;
2818     if (isconst(r) && !needflags) {
2819     live.state[r].val>>=i;
2820     return;
2821     }
2822     CLOBBER_SHRL;
2823     r=rmw(r,4,4);
2824     raw_shrl_l_ri(r,i);
2825     unlock2(r);
2826     }
2827     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2828    
2829     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2830     {
2831     if (!i && !needflags)
2832     return;
2833     CLOBBER_SHRL;
2834     r=rmw(r,2,2);
2835     raw_shrl_w_ri(r,i);
2836     unlock2(r);
2837     }
2838     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2839    
2840     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2841     {
2842     if (!i && !needflags)
2843     return;
2844     CLOBBER_SHRL;
2845     r=rmw(r,1,1);
2846     raw_shrl_b_ri(r,i);
2847     unlock2(r);
2848     }
2849     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2850    
2851     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2852     {
2853     if (!i && !needflags)
2854     return;
2855     CLOBBER_SHRA;
2856     r=rmw(r,4,4);
2857     raw_shra_l_ri(r,i);
2858     unlock2(r);
2859     }
2860     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2861    
2862     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2863     {
2864     if (!i && !needflags)
2865     return;
2866     CLOBBER_SHRA;
2867     r=rmw(r,2,2);
2868     raw_shra_w_ri(r,i);
2869     unlock2(r);
2870     }
2871     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2872    
2873     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2874     {
2875     if (!i && !needflags)
2876     return;
2877     CLOBBER_SHRA;
2878     r=rmw(r,1,1);
2879     raw_shra_b_ri(r,i);
2880     unlock2(r);
2881     }
2882     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2883    
2884     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2885     {
2886     if (isconst(r)) {
2887     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2888     return;
2889     }
2890     CLOBBER_SHRA;
2891     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2892     d=rmw(d,4,4);
2893     Dif (r!=1) {
2894     write_log("Illegal register %d in raw_rol_b\n",r);
2895     abort();
2896     }
2897     raw_shra_l_rr(d,r) ;
2898     unlock2(r);
2899     unlock2(d);
2900     }
2901     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2902    
2903     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2904     { /* Can only do this with r==1, i.e. cl */
2905    
2906     if (isconst(r)) {
2907     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2908     return;
2909     }
2910     CLOBBER_SHRA;
2911     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2912     d=rmw(d,2,2);
2913     Dif (r!=1) {
2914     write_log("Illegal register %d in raw_shra_b\n",r);
2915     abort();
2916     }
2917     raw_shra_w_rr(d,r) ;
2918     unlock2(r);
2919     unlock2(d);
2920     }
2921     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2922    
2923     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2924     { /* Can only do this with r==1, i.e. cl */
2925    
2926     if (isconst(r)) {
2927     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2928     return;
2929     }
2930    
2931     CLOBBER_SHRA;
2932     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2933     d=rmw(d,1,1);
2934     Dif (r!=1) {
2935     write_log("Illegal register %d in raw_shra_b\n",r);
2936     abort();
2937     }
2938     raw_shra_b_rr(d,r) ;
2939     unlock2(r);
2940     unlock2(d);
2941     }
2942     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2943    
2944    
2945     MIDFUNC(2,setcc,(W1 d, IMM cc))
2946     {
2947     CLOBBER_SETCC;
2948     d=writereg(d,1);
2949     raw_setcc(d,cc);
2950     unlock2(d);
2951     }
2952     MENDFUNC(2,setcc,(W1 d, IMM cc))
2953    
2954     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2955     {
2956     CLOBBER_SETCC;
2957     raw_setcc_m(d,cc);
2958     }
2959     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2960    
2961     MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2962     {
2963     if (d==s)
2964     return;
2965     CLOBBER_CMOV;
2966     s=readreg(s,4);
2967     d=rmw(d,4,4);
2968     raw_cmov_l_rr(d,s,cc);
2969     unlock2(s);
2970     unlock2(d);
2971     }
2972     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2973    
2974     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2975     {
2976     CLOBBER_CMOV;
2977     d=rmw(d,4,4);
2978     raw_cmov_l_rm(d,s,cc);
2979     unlock2(d);
2980     }
2981     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2982    
2983 gbeauche 1.26 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2984     {
2985     CLOBBER_BSF;
2986     s = readreg(s, 4);
2987     d = writereg(d, 4);
2988     raw_bsf_l_rr(d, s);
2989     unlock2(s);
2990     unlock2(d);
2991     }
2992     MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2993    
2994     /* Set the Z flag depending on the value in s. Note that the
2995     value has to be 0 or -1 (or, more precisely, for non-zero
2996     values, bit 14 must be set)! */
2997     MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2998 gbeauche 1.1 {
2999 gbeauche 1.26 CLOBBER_BSF;
3000     s=rmw_specific(s,4,4,FLAG_NREG3);
3001     tmp=writereg(tmp,4);
3002     raw_flags_set_zero(s, tmp);
3003     unlock2(tmp);
3004     unlock2(s);
3005 gbeauche 1.1 }
3006 gbeauche 1.26 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3007 gbeauche 1.1
3008     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3009     {
3010     CLOBBER_MUL;
3011     s=readreg(s,4);
3012     d=rmw(d,4,4);
3013     raw_imul_32_32(d,s);
3014     unlock2(s);
3015     unlock2(d);
3016     }
3017     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3018    
3019     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3020     {
3021     CLOBBER_MUL;
3022     s=rmw_specific(s,4,4,MUL_NREG2);
3023     d=rmw_specific(d,4,4,MUL_NREG1);
3024     raw_imul_64_32(d,s);
3025     unlock2(s);
3026     unlock2(d);
3027     }
3028     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3029    
3030     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3031     {
3032     CLOBBER_MUL;
3033     s=rmw_specific(s,4,4,MUL_NREG2);
3034     d=rmw_specific(d,4,4,MUL_NREG1);
3035     raw_mul_64_32(d,s);
3036     unlock2(s);
3037     unlock2(d);
3038     }
3039     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3040    
3041     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3042     {
3043     CLOBBER_MUL;
3044     s=readreg(s,4);
3045     d=rmw(d,4,4);
3046     raw_mul_32_32(d,s);
3047     unlock2(s);
3048     unlock2(d);
3049     }
3050     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3051    
3052 gbeauche 1.24 #if SIZEOF_VOID_P == 8
3053     MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3054     {
3055     int isrmw;
3056    
3057     if (isconst(s)) {
3058     set_const(d,(uae_s32)live.state[s].val);
3059     return;
3060     }
3061    
3062     CLOBBER_SE32;
3063     isrmw=(s==d);
3064     if (!isrmw) {
3065     s=readreg(s,4);
3066     d=writereg(d,4);
3067     }
3068     else { /* If we try to lock this twice, with different sizes, we
3069     are int trouble! */
3070     s=d=rmw(s,4,4);
3071     }
3072     raw_sign_extend_32_rr(d,s);
3073     if (!isrmw) {
3074     unlock2(d);
3075     unlock2(s);
3076     }
3077     else {
3078     unlock2(s);
3079     }
3080     }
3081     MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3082     #endif
3083    
3084 gbeauche 1.1 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3085     {
3086     int isrmw;
3087    
3088     if (isconst(s)) {
3089     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3090     return;
3091     }
3092    
3093     CLOBBER_SE16;
3094     isrmw=(s==d);
3095     if (!isrmw) {
3096     s=readreg(s,2);
3097     d=writereg(d,4);
3098     }
3099     else { /* If we try to lock this twice, with different sizes, we
3100     are int trouble! */
3101     s=d=rmw(s,4,2);
3102     }
3103     raw_sign_extend_16_rr(d,s);
3104     if (!isrmw) {
3105     unlock2(d);
3106     unlock2(s);
3107     }
3108     else {
3109     unlock2(s);
3110     }
3111     }
3112     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3113    
3114     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3115     {
3116     int isrmw;
3117    
3118     if (isconst(s)) {
3119     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3120     return;
3121     }
3122    
3123     isrmw=(s==d);
3124     CLOBBER_SE8;
3125     if (!isrmw) {
3126     s=readreg(s,1);
3127     d=writereg(d,4);
3128     }
3129     else { /* If we try to lock this twice, with different sizes, we
3130     are int trouble! */
3131     s=d=rmw(s,4,1);
3132     }
3133    
3134     raw_sign_extend_8_rr(d,s);
3135    
3136     if (!isrmw) {
3137     unlock2(d);
3138     unlock2(s);
3139     }
3140     else {
3141     unlock2(s);
3142     }
3143     }
3144     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3145    
3146    
3147     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3148     {
3149     int isrmw;
3150    
3151     if (isconst(s)) {
3152     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3153     return;
3154     }
3155    
3156     isrmw=(s==d);
3157     CLOBBER_ZE16;
3158     if (!isrmw) {
3159     s=readreg(s,2);
3160     d=writereg(d,4);
3161     }
3162     else { /* If we try to lock this twice, with different sizes, we
3163     are int trouble! */
3164     s=d=rmw(s,4,2);
3165     }
3166     raw_zero_extend_16_rr(d,s);
3167     if (!isrmw) {
3168     unlock2(d);
3169     unlock2(s);
3170     }
3171     else {
3172     unlock2(s);
3173     }
3174     }
3175     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3176    
3177     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3178     {
3179     int isrmw;
3180     if (isconst(s)) {
3181     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3182     return;
3183     }
3184    
3185     isrmw=(s==d);
3186     CLOBBER_ZE8;
3187     if (!isrmw) {
3188     s=readreg(s,1);
3189     d=writereg(d,4);
3190     }
3191     else { /* If we try to lock this twice, with different sizes, we
3192     are int trouble! */
3193     s=d=rmw(s,4,1);
3194     }
3195    
3196     raw_zero_extend_8_rr(d,s);
3197    
3198     if (!isrmw) {
3199     unlock2(d);
3200     unlock2(s);
3201     }
3202     else {
3203     unlock2(s);
3204     }
3205     }
3206     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3207    
3208     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3209     {
3210     if (d==s)
3211     return;
3212     if (isconst(s)) {
3213     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3214     return;
3215     }
3216    
3217     CLOBBER_MOV;
3218     s=readreg(s,1);
3219     d=writereg(d,1);
3220     raw_mov_b_rr(d,s);
3221     unlock2(d);
3222     unlock2(s);
3223     }
3224     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3225    
3226     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3227     {
3228     if (d==s)
3229     return;
3230     if (isconst(s)) {
3231     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3232     return;
3233     }
3234    
3235     CLOBBER_MOV;
3236     s=readreg(s,2);
3237     d=writereg(d,2);
3238     raw_mov_w_rr(d,s);
3239     unlock2(d);
3240     unlock2(s);
3241     }
3242     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3243    
3244    
3245     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3246     {
3247     CLOBBER_MOV;
3248     baser=readreg(baser,4);
3249     index=readreg(index,4);
3250     d=writereg(d,4);
3251    
3252     raw_mov_l_rrm_indexed(d,baser,index,factor);
3253     unlock2(d);
3254     unlock2(baser);
3255     unlock2(index);
3256     }
3257     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3258    
3259     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3260     {
3261     CLOBBER_MOV;
3262     baser=readreg(baser,4);
3263     index=readreg(index,4);
3264     d=writereg(d,2);
3265    
3266     raw_mov_w_rrm_indexed(d,baser,index,factor);
3267     unlock2(d);
3268     unlock2(baser);
3269     unlock2(index);
3270     }
3271     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3272    
3273     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3274     {
3275     CLOBBER_MOV;
3276     baser=readreg(baser,4);
3277     index=readreg(index,4);
3278     d=writereg(d,1);
3279    
3280     raw_mov_b_rrm_indexed(d,baser,index,factor);
3281    
3282     unlock2(d);
3283     unlock2(baser);
3284     unlock2(index);
3285     }
3286     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3287    
3288    
3289     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3290     {
3291     CLOBBER_MOV;
3292     baser=readreg(baser,4);
3293     index=readreg(index,4);
3294     s=readreg(s,4);
3295    
3296     Dif (baser==s || index==s)
3297     abort();
3298    
3299    
3300     raw_mov_l_mrr_indexed(baser,index,factor,s);
3301     unlock2(s);
3302     unlock2(baser);
3303     unlock2(index);
3304     }
3305     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3306    
3307     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3308     {
3309     CLOBBER_MOV;
3310     baser=readreg(baser,4);
3311     index=readreg(index,4);
3312     s=readreg(s,2);
3313    
3314     raw_mov_w_mrr_indexed(baser,index,factor,s);
3315     unlock2(s);
3316     unlock2(baser);
3317     unlock2(index);
3318     }
3319     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3320    
3321     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3322     {
3323     CLOBBER_MOV;
3324     s=readreg(s,1);
3325     baser=readreg(baser,4);
3326     index=readreg(index,4);
3327    
3328     raw_mov_b_mrr_indexed(baser,index,factor,s);
3329     unlock2(s);
3330     unlock2(baser);
3331     unlock2(index);
3332     }
3333     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3334    
3335    
3336     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3337     {
3338     int basereg=baser;
3339     int indexreg=index;
3340    
3341     CLOBBER_MOV;
3342     s=readreg(s,4);
3343     baser=readreg_offset(baser,4);
3344     index=readreg_offset(index,4);
3345    
3346     base+=get_offset(basereg);
3347     base+=factor*get_offset(indexreg);
3348    
3349     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3350     unlock2(s);
3351     unlock2(baser);
3352     unlock2(index);
3353     }
3354     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3355    
3356     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3357     {
3358     int basereg=baser;
3359     int indexreg=index;
3360    
3361     CLOBBER_MOV;
3362     s=readreg(s,2);
3363     baser=readreg_offset(baser,4);
3364     index=readreg_offset(index,4);
3365    
3366     base+=get_offset(basereg);
3367     base+=factor*get_offset(indexreg);
3368    
3369     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3370     unlock2(s);
3371     unlock2(baser);
3372     unlock2(index);
3373     }
3374     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3375    
3376     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3377     {
3378     int basereg=baser;
3379     int indexreg=index;
3380    
3381     CLOBBER_MOV;
3382     s=readreg(s,1);
3383     baser=readreg_offset(baser,4);
3384     index=readreg_offset(index,4);
3385    
3386     base+=get_offset(basereg);
3387     base+=factor*get_offset(indexreg);
3388    
3389     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3390     unlock2(s);
3391     unlock2(baser);
3392     unlock2(index);
3393     }
3394     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3395    
3396    
3397    
3398     /* Read a long from base+baser+factor*index */
3399     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3400     {
3401     int basereg=baser;
3402     int indexreg=index;
3403    
3404     CLOBBER_MOV;
3405     baser=readreg_offset(baser,4);
3406     index=readreg_offset(index,4);
3407     base+=get_offset(basereg);
3408     base+=factor*get_offset(indexreg);
3409     d=writereg(d,4);
3410     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3411     unlock2(d);
3412     unlock2(baser);
3413     unlock2(index);
3414     }
3415     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3416    
3417    
3418     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3419     {
3420     int basereg=baser;
3421     int indexreg=index;
3422    
3423     CLOBBER_MOV;
3424     remove_offset(d,-1);
3425     baser=readreg_offset(baser,4);
3426     index=readreg_offset(index,4);
3427     base+=get_offset(basereg);
3428     base+=factor*get_offset(indexreg);
3429     d=writereg(d,2);
3430     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3431     unlock2(d);
3432     unlock2(baser);
3433     unlock2(index);
3434     }
3435     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3436    
3437    
3438     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3439     {
3440     int basereg=baser;
3441     int indexreg=index;
3442    
3443     CLOBBER_MOV;
3444     remove_offset(d,-1);
3445     baser=readreg_offset(baser,4);
3446     index=readreg_offset(index,4);
3447     base+=get_offset(basereg);
3448     base+=factor*get_offset(indexreg);
3449     d=writereg(d,1);
3450     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3451     unlock2(d);
3452     unlock2(baser);
3453     unlock2(index);
3454     }
3455     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3456    
3457     /* Read a long from base+factor*index */
3458     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3459     {
3460     int indexreg=index;
3461    
3462     if (isconst(index)) {
3463     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3464     return;
3465     }
3466    
3467     CLOBBER_MOV;
3468     index=readreg_offset(index,4);
3469     base+=get_offset(indexreg)*factor;
3470     d=writereg(d,4);
3471    
3472     raw_mov_l_rm_indexed(d,base,index,factor);
3473     unlock2(index);
3474     unlock2(d);
3475     }
3476     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3477    
3478    
3479     /* read the long at the address contained in s+offset and store in d */
3480     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3481     {
3482     if (isconst(s)) {
3483     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3484     return;
3485     }
3486     CLOBBER_MOV;
3487     s=readreg(s,4);
3488     d=writereg(d,4);
3489    
3490     raw_mov_l_rR(d,s,offset);
3491     unlock2(d);
3492     unlock2(s);
3493     }
3494     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3495    
3496     /* read the word at the address contained in s+offset and store in d */
3497     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3498     {
3499     if (isconst(s)) {
3500     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3501     return;
3502     }
3503     CLOBBER_MOV;
3504     s=readreg(s,4);
3505     d=writereg(d,2);
3506    
3507     raw_mov_w_rR(d,s,offset);
3508     unlock2(d);
3509     unlock2(s);
3510     }
3511     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3512    
3513     /* read the word at the address contained in s+offset and store in d */
3514     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3515     {
3516     if (isconst(s)) {
3517     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3518     return;
3519     }
3520     CLOBBER_MOV;
3521     s=readreg(s,4);
3522     d=writereg(d,1);
3523    
3524     raw_mov_b_rR(d,s,offset);
3525     unlock2(d);
3526     unlock2(s);
3527     }
3528     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3529    
3530     /* read the long at the address contained in s+offset and store in d */
3531     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3532     {
3533     int sreg=s;
3534     if (isconst(s)) {
3535     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3536     return;
3537     }
3538     CLOBBER_MOV;
3539     s=readreg_offset(s,4);
3540     offset+=get_offset(sreg);
3541     d=writereg(d,4);
3542    
3543     raw_mov_l_brR(d,s,offset);
3544     unlock2(d);
3545     unlock2(s);
3546     }
3547     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3548    
3549     /* read the word at the address contained in s+offset and store in d */
3550     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3551     {
3552     int sreg=s;
3553     if (isconst(s)) {
3554     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3555     return;
3556     }
3557     CLOBBER_MOV;
3558     remove_offset(d,-1);
3559     s=readreg_offset(s,4);
3560     offset+=get_offset(sreg);
3561     d=writereg(d,2);
3562    
3563     raw_mov_w_brR(d,s,offset);
3564     unlock2(d);
3565     unlock2(s);
3566     }
3567     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3568    
3569     /* read the word at the address contained in s+offset and store in d */
3570     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3571     {
3572     int sreg=s;
3573     if (isconst(s)) {
3574     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3575     return;
3576     }
3577     CLOBBER_MOV;
3578     remove_offset(d,-1);
3579     s=readreg_offset(s,4);
3580     offset+=get_offset(sreg);
3581     d=writereg(d,1);
3582    
3583     raw_mov_b_brR(d,s,offset);
3584     unlock2(d);
3585     unlock2(s);
3586     }
3587     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3588    
3589     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3590     {
3591     int dreg=d;
3592     if (isconst(d)) {
3593     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3594     return;
3595     }
3596    
3597     CLOBBER_MOV;
3598     d=readreg_offset(d,4);
3599     offset+=get_offset(dreg);
3600     raw_mov_l_Ri(d,i,offset);
3601     unlock2(d);
3602     }
3603     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3604    
3605     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3606     {
3607     int dreg=d;
3608     if (isconst(d)) {
3609     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3610     return;
3611     }
3612    
3613     CLOBBER_MOV;
3614     d=readreg_offset(d,4);
3615     offset+=get_offset(dreg);
3616     raw_mov_w_Ri(d,i,offset);
3617     unlock2(d);
3618     }
3619     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3620    
3621     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3622     {
3623     int dreg=d;
3624     if (isconst(d)) {
3625     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3626     return;
3627     }
3628    
3629     CLOBBER_MOV;
3630     d=readreg_offset(d,4);
3631     offset+=get_offset(dreg);
3632     raw_mov_b_Ri(d,i,offset);
3633     unlock2(d);
3634     }
3635     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3636    
3637     /* Warning! OFFSET is byte sized only! */
3638     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3639     {
3640     if (isconst(d)) {
3641     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3642     return;
3643     }
3644     if (isconst(s)) {
3645     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3646     return;
3647     }
3648    
3649     CLOBBER_MOV;
3650     s=readreg(s,4);
3651     d=readreg(d,4);
3652    
3653     raw_mov_l_Rr(d,s,offset);
3654     unlock2(d);
3655     unlock2(s);
3656     }
3657     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3658    
3659     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3660     {
3661     if (isconst(d)) {
3662     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3663     return;
3664     }
3665     if (isconst(s)) {
3666     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3667     return;
3668     }
3669    
3670     CLOBBER_MOV;
3671     s=readreg(s,2);
3672     d=readreg(d,4);
3673     raw_mov_w_Rr(d,s,offset);
3674     unlock2(d);
3675     unlock2(s);
3676     }
3677     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3678    
3679     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3680     {
3681     if (isconst(d)) {
3682     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3683     return;
3684     }
3685     if (isconst(s)) {
3686     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3687     return;
3688     }
3689    
3690     CLOBBER_MOV;
3691     s=readreg(s,1);
3692     d=readreg(d,4);
3693     raw_mov_b_Rr(d,s,offset);
3694     unlock2(d);
3695     unlock2(s);
3696     }
3697     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3698    
3699     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3700     {
3701     if (isconst(s)) {
3702     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3703     return;
3704     }
3705     #if USE_OFFSET
3706     if (d==s) {
3707     add_offset(d,offset);
3708     return;
3709     }
3710     #endif
3711     CLOBBER_LEA;
3712     s=readreg(s,4);
3713     d=writereg(d,4);
3714     raw_lea_l_brr(d,s,offset);
3715     unlock2(d);
3716     unlock2(s);
3717     }
3718     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3719    
3720     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3721     {
3722     if (!offset) {
3723     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3724     return;
3725     }
3726     CLOBBER_LEA;
3727     s=readreg(s,4);
3728     index=readreg(index,4);
3729     d=writereg(d,4);
3730    
3731     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3732     unlock2(d);
3733     unlock2(index);
3734     unlock2(s);
3735     }
3736     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3737    
3738     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3739     {
3740     CLOBBER_LEA;
3741     s=readreg(s,4);
3742     index=readreg(index,4);
3743     d=writereg(d,4);
3744    
3745     raw_lea_l_rr_indexed(d,s,index,factor);
3746     unlock2(d);
3747     unlock2(index);
3748     unlock2(s);
3749     }
3750     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3751    
3752     /* write d to the long at the address contained in s+offset */
3753     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3754     {
3755     int dreg=d;
3756     if (isconst(d)) {
3757     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3758     return;
3759     }
3760    
3761     CLOBBER_MOV;
3762     s=readreg(s,4);
3763     d=readreg_offset(d,4);
3764     offset+=get_offset(dreg);
3765    
3766     raw_mov_l_bRr(d,s,offset);
3767     unlock2(d);
3768     unlock2(s);
3769     }
3770     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3771    
3772     /* write the word at the address contained in s+offset and store in d */
3773     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3774     {
3775     int dreg=d;
3776    
3777     if (isconst(d)) {
3778     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3779     return;
3780     }
3781    
3782     CLOBBER_MOV;
3783     s=readreg(s,2);
3784     d=readreg_offset(d,4);
3785     offset+=get_offset(dreg);
3786     raw_mov_w_bRr(d,s,offset);
3787     unlock2(d);
3788     unlock2(s);
3789     }
3790     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3791    
3792     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3793     {
3794     int dreg=d;
3795     if (isconst(d)) {
3796     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3797     return;
3798     }
3799    
3800     CLOBBER_MOV;
3801     s=readreg(s,1);
3802     d=readreg_offset(d,4);
3803     offset+=get_offset(dreg);
3804     raw_mov_b_bRr(d,s,offset);
3805     unlock2(d);
3806     unlock2(s);
3807     }
3808     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3809    
3810     MIDFUNC(1,bswap_32,(RW4 r))
3811     {
3812     int reg=r;
3813    
3814     if (isconst(r)) {
3815     uae_u32 oldv=live.state[r].val;
3816     live.state[r].val=reverse32(oldv);
3817     return;
3818     }
3819    
3820     CLOBBER_SW32;
3821     r=rmw(r,4,4);
3822     raw_bswap_32(r);
3823     unlock2(r);
3824     }
3825     MENDFUNC(1,bswap_32,(RW4 r))
3826    
3827     MIDFUNC(1,bswap_16,(RW2 r))
3828     {
3829     if (isconst(r)) {
3830     uae_u32 oldv=live.state[r].val;
3831     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3832     (oldv&0xffff0000);
3833     return;
3834     }
3835    
3836     CLOBBER_SW16;
3837     r=rmw(r,2,2);
3838    
3839     raw_bswap_16(r);
3840     unlock2(r);
3841     }
3842     MENDFUNC(1,bswap_16,(RW2 r))
3843    
3844    
3845    
3846     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3847     {
3848     int olds;
3849    
3850     if (d==s) { /* How pointless! */
3851     return;
3852     }
3853     if (isconst(s)) {
3854     COMPCALL(mov_l_ri)(d,live.state[s].val);
3855     return;
3856     }
3857     olds=s;
3858     disassociate(d);
3859     s=readreg_offset(s,4);
3860     live.state[d].realreg=s;
3861     live.state[d].realind=live.nat[s].nholds;
3862     live.state[d].val=live.state[olds].val;
3863     live.state[d].validsize=4;
3864     live.state[d].dirtysize=4;
3865     set_status(d,DIRTY);
3866    
3867     live.nat[s].holds[live.nat[s].nholds]=d;
3868     live.nat[s].nholds++;
3869     log_clobberreg(d);
3870     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3871     d,s,live.state[d].realind,live.nat[s].nholds); */
3872     unlock2(s);
3873     }
3874     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3875    
3876     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3877     {
3878     if (isconst(s)) {
3879     COMPCALL(mov_l_mi)(d,live.state[s].val);
3880     return;
3881     }
3882     CLOBBER_MOV;
3883     s=readreg(s,4);
3884    
3885     raw_mov_l_mr(d,s);
3886     unlock2(s);
3887     }
3888     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3889    
3890    
3891     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3892     {
3893     if (isconst(s)) {
3894     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3895     return;
3896     }
3897     CLOBBER_MOV;
3898     s=readreg(s,2);
3899    
3900     raw_mov_w_mr(d,s);
3901     unlock2(s);
3902     }
3903     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3904    
3905     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3906     {
3907     CLOBBER_MOV;
3908     d=writereg(d,2);
3909    
3910     raw_mov_w_rm(d,s);
3911     unlock2(d);
3912     }
3913     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3914    
3915     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3916     {
3917     if (isconst(s)) {
3918     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3919     return;
3920     }
3921    
3922     CLOBBER_MOV;
3923     s=readreg(s,1);
3924    
3925     raw_mov_b_mr(d,s);
3926     unlock2(s);
3927     }
3928     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3929    
3930     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3931     {
3932     CLOBBER_MOV;
3933     d=writereg(d,1);
3934    
3935     raw_mov_b_rm(d,s);
3936     unlock2(d);
3937     }
3938     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3939    
3940     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3941     {
3942     set_const(d,s);
3943     return;
3944     }
3945     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3946    
3947     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3948     {
3949     CLOBBER_MOV;
3950     d=writereg(d,2);
3951    
3952     raw_mov_w_ri(d,s);
3953     unlock2(d);
3954     }
3955     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3956    
3957     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3958     {
3959     CLOBBER_MOV;
3960     d=writereg(d,1);
3961    
3962     raw_mov_b_ri(d,s);
3963     unlock2(d);
3964     }
3965     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3966    
3967    
3968     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3969     {
3970     CLOBBER_ADD;
3971     raw_add_l_mi(d,s) ;
3972     }
3973     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3974    
3975     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3976     {
3977     CLOBBER_ADD;
3978     raw_add_w_mi(d,s) ;
3979     }
3980     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3981    
3982     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3983     {
3984     CLOBBER_ADD;
3985     raw_add_b_mi(d,s) ;
3986     }
3987     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3988    
3989    
3990     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3991     {
3992     CLOBBER_TEST;
3993     d=readreg(d,4);
3994    
3995     raw_test_l_ri(d,i);
3996     unlock2(d);
3997     }
3998     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3999    
4000     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
4001     {
4002     CLOBBER_TEST;
4003     d=readreg(d,4);
4004     s=readreg(s,4);
4005    
4006     raw_test_l_rr(d,s);;
4007     unlock2(d);
4008     unlock2(s);
4009     }
4010     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4011    
4012     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4013     {
4014     CLOBBER_TEST;
4015     d=readreg(d,2);
4016     s=readreg(s,2);
4017    
4018     raw_test_w_rr(d,s);
4019     unlock2(d);
4020     unlock2(s);
4021     }
4022     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4023    
4024     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4025     {
4026     CLOBBER_TEST;
4027     d=readreg(d,1);
4028     s=readreg(s,1);
4029    
4030     raw_test_b_rr(d,s);
4031     unlock2(d);
4032     unlock2(s);
4033     }
4034     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4035    
4036    
4037     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4038     {
4039     if (isconst(d) && !needflags) {
4040     live.state[d].val &= i;
4041     return;
4042     }
4043    
4044     CLOBBER_AND;
4045     d=rmw(d,4,4);
4046    
4047     raw_and_l_ri(d,i);
4048     unlock2(d);
4049     }
4050     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4051    
4052     MIDFUNC(2,and_l,(RW4 d, R4 s))
4053     {
4054     CLOBBER_AND;
4055     s=readreg(s,4);
4056     d=rmw(d,4,4);
4057    
4058     raw_and_l(d,s);
4059     unlock2(d);
4060     unlock2(s);
4061     }
4062     MENDFUNC(2,and_l,(RW4 d, R4 s))
4063    
4064     MIDFUNC(2,and_w,(RW2 d, R2 s))
4065     {
4066     CLOBBER_AND;
4067     s=readreg(s,2);
4068     d=rmw(d,2,2);
4069    
4070     raw_and_w(d,s);
4071     unlock2(d);
4072     unlock2(s);
4073     }
4074     MENDFUNC(2,and_w,(RW2 d, R2 s))
4075    
4076     MIDFUNC(2,and_b,(RW1 d, R1 s))
4077     {
4078     CLOBBER_AND;
4079     s=readreg(s,1);
4080     d=rmw(d,1,1);
4081    
4082     raw_and_b(d,s);
4083     unlock2(d);
4084     unlock2(s);
4085     }
4086     MENDFUNC(2,and_b,(RW1 d, R1 s))
4087    
4088     // gb-- used for making an fpcr value in compemu_fpp.cpp
4089     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4090     {
4091     CLOBBER_OR;
4092     d=rmw(d,4,4);
4093    
4094     raw_or_l_rm(d,s);
4095     unlock2(d);
4096     }
4097     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4098    
4099     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4100     {
4101     if (isconst(d) && !needflags) {
4102     live.state[d].val|=i;
4103     return;
4104     }
4105     CLOBBER_OR;
4106     d=rmw(d,4,4);
4107    
4108     raw_or_l_ri(d,i);
4109     unlock2(d);
4110     }
4111     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4112    
4113     MIDFUNC(2,or_l,(RW4 d, R4 s))
4114     {
4115     if (isconst(d) && isconst(s) && !needflags) {
4116     live.state[d].val|=live.state[s].val;
4117     return;
4118     }
4119     CLOBBER_OR;
4120     s=readreg(s,4);
4121     d=rmw(d,4,4);
4122    
4123     raw_or_l(d,s);
4124     unlock2(d);
4125     unlock2(s);
4126     }
4127     MENDFUNC(2,or_l,(RW4 d, R4 s))
4128    
4129     MIDFUNC(2,or_w,(RW2 d, R2 s))
4130     {
4131     CLOBBER_OR;
4132     s=readreg(s,2);
4133     d=rmw(d,2,2);
4134    
4135     raw_or_w(d,s);
4136     unlock2(d);
4137     unlock2(s);
4138     }
4139     MENDFUNC(2,or_w,(RW2 d, R2 s))
4140    
4141     MIDFUNC(2,or_b,(RW1 d, R1 s))
4142     {
4143     CLOBBER_OR;
4144     s=readreg(s,1);
4145     d=rmw(d,1,1);
4146    
4147     raw_or_b(d,s);
4148     unlock2(d);
4149     unlock2(s);
4150     }
4151     MENDFUNC(2,or_b,(RW1 d, R1 s))
4152    
4153     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4154     {
4155     CLOBBER_ADC;
4156     s=readreg(s,4);
4157     d=rmw(d,4,4);
4158    
4159     raw_adc_l(d,s);
4160    
4161     unlock2(d);
4162     unlock2(s);
4163     }
4164     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4165    
4166     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4167     {
4168     CLOBBER_ADC;
4169     s=readreg(s,2);
4170     d=rmw(d,2,2);
4171    
4172     raw_adc_w(d,s);
4173     unlock2(d);
4174     unlock2(s);
4175     }
4176     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4177    
4178     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4179     {
4180     CLOBBER_ADC;
4181     s=readreg(s,1);
4182     d=rmw(d,1,1);
4183    
4184     raw_adc_b(d,s);
4185     unlock2(d);
4186     unlock2(s);
4187     }
4188     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4189    
4190     MIDFUNC(2,add_l,(RW4 d, R4 s))
4191     {
4192     if (isconst(s)) {
4193     COMPCALL(add_l_ri)(d,live.state[s].val);
4194     return;
4195     }
4196    
4197     CLOBBER_ADD;
4198     s=readreg(s,4);
4199     d=rmw(d,4,4);
4200    
4201     raw_add_l(d,s);
4202    
4203     unlock2(d);
4204     unlock2(s);
4205     }
4206     MENDFUNC(2,add_l,(RW4 d, R4 s))
4207    
4208     MIDFUNC(2,add_w,(RW2 d, R2 s))
4209     {
4210     if (isconst(s)) {
4211     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4212     return;
4213     }
4214    
4215     CLOBBER_ADD;
4216     s=readreg(s,2);
4217     d=rmw(d,2,2);
4218    
4219     raw_add_w(d,s);
4220     unlock2(d);
4221     unlock2(s);
4222     }
4223     MENDFUNC(2,add_w,(RW2 d, R2 s))
4224    
4225     MIDFUNC(2,add_b,(RW1 d, R1 s))
4226     {
4227     if (isconst(s)) {
4228     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4229     return;
4230     }
4231    
4232     CLOBBER_ADD;
4233     s=readreg(s,1);
4234     d=rmw(d,1,1);
4235    
4236     raw_add_b(d,s);
4237     unlock2(d);
4238     unlock2(s);
4239     }
4240     MENDFUNC(2,add_b,(RW1 d, R1 s))
4241    
4242     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4243     {
4244     if (!i && !needflags)
4245     return;
4246     if (isconst(d) && !needflags) {
4247     live.state[d].val-=i;
4248     return;
4249     }
4250     #if USE_OFFSET
4251     if (!needflags) {
4252     add_offset(d,-i);
4253     return;
4254     }
4255     #endif
4256    
4257     CLOBBER_SUB;
4258     d=rmw(d,4,4);
4259    
4260     raw_sub_l_ri(d,i);
4261     unlock2(d);
4262     }
4263     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4264    
4265     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4266     {
4267     if (!i && !needflags)
4268     return;
4269    
4270     CLOBBER_SUB;
4271     d=rmw(d,2,2);
4272    
4273     raw_sub_w_ri(d,i);
4274     unlock2(d);
4275     }
4276     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4277    
4278     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4279     {
4280     if (!i && !needflags)
4281     return;
4282    
4283     CLOBBER_SUB;
4284     d=rmw(d,1,1);
4285    
4286     raw_sub_b_ri(d,i);
4287    
4288     unlock2(d);
4289     }
4290     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4291    
4292     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4293     {
4294     if (!i && !needflags)
4295     return;
4296     if (isconst(d) && !needflags) {
4297     live.state[d].val+=i;
4298     return;
4299     }
4300     #if USE_OFFSET
4301     if (!needflags) {
4302     add_offset(d,i);
4303     return;
4304     }
4305     #endif
4306     CLOBBER_ADD;
4307     d=rmw(d,4,4);
4308     raw_add_l_ri(d,i);
4309     unlock2(d);
4310     }
4311     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4312    
4313     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4314     {
4315     if (!i && !needflags)
4316     return;
4317    
4318     CLOBBER_ADD;
4319     d=rmw(d,2,2);
4320    
4321     raw_add_w_ri(d,i);
4322     unlock2(d);
4323     }
4324     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4325    
4326     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4327     {
4328     if (!i && !needflags)
4329     return;
4330    
4331     CLOBBER_ADD;
4332     d=rmw(d,1,1);
4333    
4334     raw_add_b_ri(d,i);
4335    
4336     unlock2(d);
4337     }
4338     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4339    
4340     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4341     {
4342     CLOBBER_SBB;
4343     s=readreg(s,4);
4344     d=rmw(d,4,4);
4345    
4346     raw_sbb_l(d,s);
4347     unlock2(d);
4348     unlock2(s);
4349     }
4350     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4351    
4352     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4353     {
4354     CLOBBER_SBB;
4355     s=readreg(s,2);
4356     d=rmw(d,2,2);
4357    
4358     raw_sbb_w(d,s);
4359     unlock2(d);
4360     unlock2(s);
4361     }
4362     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4363    
4364     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4365     {
4366     CLOBBER_SBB;
4367     s=readreg(s,1);
4368     d=rmw(d,1,1);
4369    
4370     raw_sbb_b(d,s);
4371     unlock2(d);
4372     unlock2(s);
4373     }
4374     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4375    
4376     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4377     {
4378     if (isconst(s)) {
4379     COMPCALL(sub_l_ri)(d,live.state[s].val);
4380     return;
4381     }
4382    
4383     CLOBBER_SUB;
4384     s=readreg(s,4);
4385     d=rmw(d,4,4);
4386    
4387     raw_sub_l(d,s);
4388     unlock2(d);
4389     unlock2(s);
4390     }
4391     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4392    
4393     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4394     {
4395     if (isconst(s)) {
4396     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4397     return;
4398     }
4399    
4400     CLOBBER_SUB;
4401     s=readreg(s,2);
4402     d=rmw(d,2,2);
4403    
4404     raw_sub_w(d,s);
4405     unlock2(d);
4406     unlock2(s);
4407     }
4408     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4409    
4410     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4411     {
4412     if (isconst(s)) {
4413     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4414     return;
4415     }
4416    
4417     CLOBBER_SUB;
4418     s=readreg(s,1);
4419     d=rmw(d,1,1);
4420    
4421     raw_sub_b(d,s);
4422     unlock2(d);
4423     unlock2(s);
4424     }
4425     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4426    
4427     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4428     {
4429     CLOBBER_CMP;
4430     s=readreg(s,4);
4431     d=readreg(d,4);
4432    
4433     raw_cmp_l(d,s);
4434     unlock2(d);
4435     unlock2(s);
4436     }
4437     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4438    
4439     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4440     {
4441     CLOBBER_CMP;
4442     r=readreg(r,4);
4443    
4444     raw_cmp_l_ri(r,i);
4445     unlock2(r);
4446     }
4447     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4448    
4449     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4450     {
4451     CLOBBER_CMP;
4452     s=readreg(s,2);
4453     d=readreg(d,2);
4454    
4455     raw_cmp_w(d,s);
4456     unlock2(d);
4457     unlock2(s);
4458     }
4459     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4460    
4461     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4462     {
4463     CLOBBER_CMP;
4464     s=readreg(s,1);
4465     d=readreg(d,1);
4466    
4467     raw_cmp_b(d,s);
4468     unlock2(d);
4469     unlock2(s);
4470     }
4471     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4472    
4473    
4474     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4475     {
4476     CLOBBER_XOR;
4477     s=readreg(s,4);
4478     d=rmw(d,4,4);
4479    
4480     raw_xor_l(d,s);
4481     unlock2(d);
4482     unlock2(s);
4483     }
4484     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4485    
4486     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4487     {
4488     CLOBBER_XOR;
4489     s=readreg(s,2);
4490     d=rmw(d,2,2);
4491    
4492     raw_xor_w(d,s);
4493     unlock2(d);
4494     unlock2(s);
4495     }
4496     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4497    
4498     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4499     {
4500     CLOBBER_XOR;
4501     s=readreg(s,1);
4502     d=rmw(d,1,1);
4503    
4504     raw_xor_b(d,s);
4505     unlock2(d);
4506     unlock2(s);
4507     }
4508     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4509    
4510     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4511     {
4512     clobber_flags();
4513     remove_all_offsets();
4514     if (osize==4) {
4515     if (out1!=in1 && out1!=r) {
4516     COMPCALL(forget_about)(out1);
4517     }
4518     }
4519     else {
4520     tomem_c(out1);
4521     }
4522    
4523     in1=readreg_specific(in1,isize,REG_PAR1);
4524     r=readreg(r,4);
4525     prepare_for_call_1(); /* This should ensure that there won't be
4526     any need for swapping nregs in prepare_for_call_2
4527     */
4528     #if USE_NORMAL_CALLING_CONVENTION
4529     raw_push_l_r(in1);
4530     #endif
4531     unlock2(in1);
4532     unlock2(r);
4533    
4534     prepare_for_call_2();
4535     raw_call_r(r);
4536    
4537     #if USE_NORMAL_CALLING_CONVENTION
4538     raw_inc_sp(4);
4539     #endif
4540    
4541    
4542     live.nat[REG_RESULT].holds[0]=out1;
4543     live.nat[REG_RESULT].nholds=1;
4544     live.nat[REG_RESULT].touched=touchcnt++;
4545    
4546     live.state[out1].realreg=REG_RESULT;
4547     live.state[out1].realind=0;
4548     live.state[out1].val=0;
4549     live.state[out1].validsize=osize;
4550     live.state[out1].dirtysize=osize;
4551     set_status(out1,DIRTY);
4552     }
4553     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4554    
4555     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4556     {
4557     clobber_flags();
4558     remove_all_offsets();
4559     in1=readreg_specific(in1,isize1,REG_PAR1);
4560     in2=readreg_specific(in2,isize2,REG_PAR2);
4561     r=readreg(r,4);
4562     prepare_for_call_1(); /* This should ensure that there won't be
4563     any need for swapping nregs in prepare_for_call_2
4564     */
4565     #if USE_NORMAL_CALLING_CONVENTION
4566     raw_push_l_r(in2);
4567     raw_push_l_r(in1);
4568     #endif
4569     unlock2(r);
4570     unlock2(in1);
4571     unlock2(in2);
4572     prepare_for_call_2();
4573     raw_call_r(r);
4574     #if USE_NORMAL_CALLING_CONVENTION
4575     raw_inc_sp(8);
4576     #endif
4577     }
4578     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4579    
4580     /* forget_about() takes a mid-layer register */
4581     MIDFUNC(1,forget_about,(W4 r))
4582     {
4583     if (isinreg(r))
4584     disassociate(r);
4585     live.state[r].val=0;
4586     set_status(r,UNDEF);
4587     }
4588     MENDFUNC(1,forget_about,(W4 r))
4589    
4590     MIDFUNC(0,nop,(void))
4591     {
4592     raw_nop();
4593     }
4594     MENDFUNC(0,nop,(void))
4595    
4596    
4597     MIDFUNC(1,f_forget_about,(FW r))
4598     {
4599     if (f_isinreg(r))
4600     f_disassociate(r);
4601     live.fate[r].status=UNDEF;
4602     }
4603     MENDFUNC(1,f_forget_about,(FW r))
4604    
4605     MIDFUNC(1,fmov_pi,(FW r))
4606     {
4607     r=f_writereg(r);
4608     raw_fmov_pi(r);
4609     f_unlock(r);
4610     }
4611     MENDFUNC(1,fmov_pi,(FW r))
4612    
4613     MIDFUNC(1,fmov_log10_2,(FW r))
4614     {
4615     r=f_writereg(r);
4616     raw_fmov_log10_2(r);
4617     f_unlock(r);
4618     }
4619     MENDFUNC(1,fmov_log10_2,(FW r))
4620    
4621     MIDFUNC(1,fmov_log2_e,(FW r))
4622     {
4623     r=f_writereg(r);
4624     raw_fmov_log2_e(r);
4625     f_unlock(r);
4626     }
4627     MENDFUNC(1,fmov_log2_e,(FW r))
4628    
4629     MIDFUNC(1,fmov_loge_2,(FW r))
4630     {
4631     r=f_writereg(r);
4632     raw_fmov_loge_2(r);
4633     f_unlock(r);
4634     }
4635     MENDFUNC(1,fmov_loge_2,(FW r))
4636    
4637     MIDFUNC(1,fmov_1,(FW r))
4638     {
4639     r=f_writereg(r);
4640     raw_fmov_1(r);
4641     f_unlock(r);
4642     }
4643     MENDFUNC(1,fmov_1,(FW r))
4644    
4645     MIDFUNC(1,fmov_0,(FW r))
4646     {
4647     r=f_writereg(r);
4648     raw_fmov_0(r);
4649     f_unlock(r);
4650     }
4651     MENDFUNC(1,fmov_0,(FW r))
4652    
4653     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4654     {
4655     r=f_writereg(r);
4656     raw_fmov_rm(r,m);
4657     f_unlock(r);
4658     }
4659     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4660    
4661     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4662     {
4663     r=f_writereg(r);
4664     raw_fmovi_rm(r,m);
4665     f_unlock(r);
4666     }
4667     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4668    
4669     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4670     {
4671     r=f_readreg(r);
4672     raw_fmovi_mr(m,r);
4673     f_unlock(r);
4674     }
4675     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4676    
4677     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4678     {
4679     r=f_writereg(r);
4680     raw_fmovs_rm(r,m);
4681     f_unlock(r);
4682     }
4683     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4684    
4685     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4686     {
4687     r=f_readreg(r);
4688     raw_fmovs_mr(m,r);
4689     f_unlock(r);
4690     }
4691     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4692    
4693     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4694     {
4695     r=f_readreg(r);
4696     raw_fmov_ext_mr(m,r);
4697     f_unlock(r);
4698     }
4699     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4700    
4701     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4702     {
4703     r=f_readreg(r);
4704     raw_fmov_mr(m,r);
4705     f_unlock(r);
4706     }
4707     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4708    
4709     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4710     {
4711     r=f_writereg(r);
4712     raw_fmov_ext_rm(r,m);
4713     f_unlock(r);
4714     }
4715     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4716    
4717     MIDFUNC(2,fmov_rr,(FW d, FR s))
4718     {
4719     if (d==s) { /* How pointless! */
4720     return;
4721     }
4722     #if USE_F_ALIAS
4723     f_disassociate(d);
4724     s=f_readreg(s);
4725     live.fate[d].realreg=s;
4726     live.fate[d].realind=live.fat[s].nholds;
4727     live.fate[d].status=DIRTY;
4728     live.fat[s].holds[live.fat[s].nholds]=d;
4729     live.fat[s].nholds++;
4730     f_unlock(s);
4731     #else
4732     s=f_readreg(s);
4733     d=f_writereg(d);
4734     raw_fmov_rr(d,s);
4735     f_unlock(s);
4736     f_unlock(d);
4737     #endif
4738     }
4739     MENDFUNC(2,fmov_rr,(FW d, FR s))
4740    
4741     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4742     {
4743     index=readreg(index,4);
4744    
4745     raw_fldcw_m_indexed(index,base);
4746     unlock2(index);
4747     }
4748     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4749    
4750     MIDFUNC(1,ftst_r,(FR r))
4751     {
4752     r=f_readreg(r);
4753     raw_ftst_r(r);
4754     f_unlock(r);
4755     }
4756     MENDFUNC(1,ftst_r,(FR r))
4757    
4758     MIDFUNC(0,dont_care_fflags,(void))
4759     {
4760     f_disassociate(FP_RESULT);
4761     }
4762     MENDFUNC(0,dont_care_fflags,(void))
4763    
4764     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4765     {
4766     s=f_readreg(s);
4767     d=f_writereg(d);
4768     raw_fsqrt_rr(d,s);
4769     f_unlock(s);
4770     f_unlock(d);
4771     }
4772     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4773    
4774     MIDFUNC(2,fabs_rr,(FW d, FR s))
4775     {
4776     s=f_readreg(s);
4777     d=f_writereg(d);
4778     raw_fabs_rr(d,s);
4779     f_unlock(s);
4780     f_unlock(d);
4781     }
4782     MENDFUNC(2,fabs_rr,(FW d, FR s))
4783    
4784     MIDFUNC(2,fsin_rr,(FW d, FR s))
4785     {
4786     s=f_readreg(s);
4787     d=f_writereg(d);
4788     raw_fsin_rr(d,s);
4789     f_unlock(s);
4790     f_unlock(d);
4791     }
4792     MENDFUNC(2,fsin_rr,(FW d, FR s))
4793    
4794     MIDFUNC(2,fcos_rr,(FW d, FR s))
4795     {
4796     s=f_readreg(s);
4797     d=f_writereg(d);
4798     raw_fcos_rr(d,s);
4799     f_unlock(s);
4800     f_unlock(d);
4801     }
4802     MENDFUNC(2,fcos_rr,(FW d, FR s))
4803    
4804     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4805     {
4806     s=f_readreg(s);
4807     d=f_writereg(d);
4808     raw_ftwotox_rr(d,s);
4809     f_unlock(s);
4810     f_unlock(d);
4811     }
4812     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4813    
4814     MIDFUNC(2,fetox_rr,(FW d, FR s))
4815     {
4816     s=f_readreg(s);
4817     d=f_writereg(d);
4818     raw_fetox_rr(d,s);
4819     f_unlock(s);
4820     f_unlock(d);
4821     }
4822     MENDFUNC(2,fetox_rr,(FW d, FR s))
4823    
4824     MIDFUNC(2,frndint_rr,(FW d, FR s))
4825     {
4826     s=f_readreg(s);
4827     d=f_writereg(d);
4828     raw_frndint_rr(d,s);
4829     f_unlock(s);
4830     f_unlock(d);
4831     }
4832     MENDFUNC(2,frndint_rr,(FW d, FR s))
4833    
4834     MIDFUNC(2,flog2_rr,(FW d, FR s))
4835     {
4836     s=f_readreg(s);
4837     d=f_writereg(d);
4838     raw_flog2_rr(d,s);
4839     f_unlock(s);
4840     f_unlock(d);
4841     }
4842     MENDFUNC(2,flog2_rr,(FW d, FR s))
4843    
4844     MIDFUNC(2,fneg_rr,(FW d, FR s))
4845     {
4846     s=f_readreg(s);
4847     d=f_writereg(d);
4848     raw_fneg_rr(d,s);
4849     f_unlock(s);
4850     f_unlock(d);
4851     }
4852     MENDFUNC(2,fneg_rr,(FW d, FR s))
4853    
4854     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4855     {
4856     s=f_readreg(s);
4857     d=f_rmw(d);
4858     raw_fadd_rr(d,s);
4859     f_unlock(s);
4860     f_unlock(d);
4861     }
4862     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4863    
4864     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4865     {
4866     s=f_readreg(s);
4867     d=f_rmw(d);
4868     raw_fsub_rr(d,s);
4869     f_unlock(s);
4870     f_unlock(d);
4871     }
4872     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4873    
4874     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4875     {
4876     d=f_readreg(d);
4877     s=f_readreg(s);
4878     raw_fcmp_rr(d,s);
4879     f_unlock(s);
4880     f_unlock(d);
4881     }
4882     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4883    
4884     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4885     {
4886     s=f_readreg(s);
4887     d=f_rmw(d);
4888     raw_fdiv_rr(d,s);
4889     f_unlock(s);
4890     f_unlock(d);
4891     }
4892     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4893    
4894     MIDFUNC(2,frem_rr,(FRW d, FR s))
4895     {
4896     s=f_readreg(s);
4897     d=f_rmw(d);
4898     raw_frem_rr(d,s);
4899     f_unlock(s);
4900     f_unlock(d);
4901     }
4902     MENDFUNC(2,frem_rr,(FRW d, FR s))
4903    
4904     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4905     {
4906     s=f_readreg(s);
4907     d=f_rmw(d);
4908     raw_frem1_rr(d,s);
4909     f_unlock(s);
4910     f_unlock(d);
4911     }
4912     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4913    
4914     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4915     {
4916     s=f_readreg(s);
4917     d=f_rmw(d);
4918     raw_fmul_rr(d,s);
4919     f_unlock(s);
4920     f_unlock(d);
4921     }
4922     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4923    
4924     /********************************************************************
4925     * Support functions exposed to gencomp. CREATE time *
4926     ********************************************************************/
4927    
4928 gbeauche 1.26 void set_zero(int r, int tmp)
4929     {
4930     if (setzflg_uses_bsf)
4931     bsf_l_rr(r,r);
4932     else
4933     simulate_bsf(tmp,r);
4934     }
4935    
4936 gbeauche 1.1 int kill_rodent(int r)
4937     {
4938     return KILLTHERAT &&
4939     have_rat_stall &&
4940     (live.state[r].status==INMEM ||
4941     live.state[r].status==CLEAN ||
4942     live.state[r].status==ISCONST ||
4943     live.state[r].dirtysize==4);
4944     }
4945    
4946     uae_u32 get_const(int r)
4947     {
4948     Dif (!isconst(r)) {
4949     write_log("Register %d should be constant, but isn't\n",r);
4950     abort();
4951     }
4952     return live.state[r].val;
4953     }
4954    
4955     void sync_m68k_pc(void)
4956     {
4957     if (m68k_pc_offset) {
4958     add_l_ri(PC_P,m68k_pc_offset);
4959     comp_pc_p+=m68k_pc_offset;
4960     m68k_pc_offset=0;
4961     }
4962     }
4963    
4964     /********************************************************************
4965     * Scratch registers management *
4966     ********************************************************************/
4967    
4968     struct scratch_t {
4969     uae_u32 regs[VREGS];
4970     fpu_register fregs[VFREGS];
4971     };
4972    
4973     static scratch_t scratch;
4974    
4975     /********************************************************************
4976     * Support functions exposed to newcpu *
4977     ********************************************************************/
4978    
4979     static inline const char *str_on_off(bool b)
4980     {
4981     return b ? "on" : "off";
4982     }
4983    
4984     void compiler_init(void)
4985     {
4986     static bool initialized = false;
4987     if (initialized)
4988     return;
4989 gbeauche 1.24
4990 gbeauche 1.1 #if JIT_DEBUG
4991     // JIT debug mode ?
4992     JITDebug = PrefsFindBool("jitdebug");
4993     #endif
4994     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4995    
4996     #ifdef USE_JIT_FPU
4997     // Use JIT compiler for FPU instructions ?
4998     avoid_fpu = !PrefsFindBool("jitfpu");
4999     #else
5000     // JIT FPU is always disabled
5001     avoid_fpu = true;
5002     #endif
5003     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
5004    
5005     // Get size of the translation cache (in KB)
5006     cache_size = PrefsFindInt32("jitcachesize");
5007     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5008    
5009     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5010     raw_init_cpu();
5011 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
5012 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5013     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5014 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5015 gbeauche 1.1
5016     // Translation cache flush mechanism
5017     lazy_flush = PrefsFindBool("jitlazyflush");
5018     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5019     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5020    
5021     // Compiler features
5022     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5023     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5024     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5025 gbeauche 1.33 #if USE_INLINING
5026     follow_const_jumps = PrefsFindBool("jitinline");
5027     #endif
5028     write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
5029 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5030    
5031     // Build compiler tables
5032     build_comp();
5033    
5034     initialized = true;
5035    
5036 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
5037     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5038     #endif
5039    
5040 gbeauche 1.1 #if PROFILE_COMPILE_TIME
5041     write_log("<JIT compiler> : gather statistics on translation time\n");
5042     emul_start_time = clock();
5043     #endif
5044     }
5045    
5046     void compiler_exit(void)
5047     {
5048     #if PROFILE_COMPILE_TIME
5049     emul_end_time = clock();
5050     #endif
5051    
5052     // Deallocate translation cache
5053     if (compiled_code) {
5054 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5055 gbeauche 1.1 compiled_code = 0;
5056     }
5057 gbeauche 1.24
5058     // Deallocate popallspace
5059     if (popallspace) {
5060     vm_release(popallspace, POPALLSPACE_SIZE);
5061     popallspace = 0;
5062     }
5063 gbeauche 1.1
5064     #if PROFILE_COMPILE_TIME
5065     write_log("### Compile Block statistics\n");
5066     write_log("Number of calls to compile_block : %d\n", compile_count);
5067     uae_u32 emul_time = emul_end_time - emul_start_time;
5068     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5069     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5070     100.0*double(compile_time)/double(emul_time));
5071     write_log("\n");
5072     #endif
5073 gbeauche 1.9
5074     #if PROFILE_UNTRANSLATED_INSNS
5075     uae_u64 untranslated_count = 0;
5076     for (int i = 0; i < 65536; i++) {
5077     opcode_nums[i] = i;
5078     untranslated_count += raw_cputbl_count[i];
5079     }
5080     write_log("Sorting out untranslated instructions count...\n");
5081     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5082     write_log("\nRank Opc Count Name\n");
5083     for (int i = 0; i < untranslated_top_ten; i++) {
5084     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5085     struct instr *dp;
5086     struct mnemolookup *lookup;
5087     if (!count)
5088     break;
5089     dp = table68k + opcode_nums[i];
5090     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5091     ;
5092     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5093     }
5094     #endif
5095 gbeauche 1.28
5096     #if RECORD_REGISTER_USAGE
5097     int reg_count_ids[16];
5098     uint64 tot_reg_count = 0;
5099     for (int i = 0; i < 16; i++) {
5100     reg_count_ids[i] = i;
5101     tot_reg_count += reg_count[i];
5102     }
5103     qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5104     uint64 cum_reg_count = 0;
5105     for (int i = 0; i < 16; i++) {
5106     int r = reg_count_ids[i];
5107     cum_reg_count += reg_count[r];
5108     printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5109     reg_count[r],
5110     100.0*double(reg_count[r])/double(tot_reg_count),
5111     100.0*double(cum_reg_count)/double(tot_reg_count));
5112     }
5113     #endif
5114 gbeauche 1.1 }
5115    
5116     bool compiler_use_jit(void)
5117     {
5118     // Check for the "jit" prefs item
5119     if (!PrefsFindBool("jit"))
5120     return false;
5121    
5122     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5123     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5124     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5125     return false;
5126     }
5127    
5128     // FIXME: there are currently problems with JIT compilation and anything below a 68040
5129     if (CPUType < 4) {
5130     write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5131     return false;
5132     }
5133    
5134     return true;
5135     }
5136    
5137     void init_comp(void)
5138     {
5139     int i;
5140     uae_s8* cb=can_byte;
5141     uae_s8* cw=can_word;
5142     uae_s8* au=always_used;
5143    
5144 gbeauche 1.28 #if RECORD_REGISTER_USAGE
5145     for (i=0;i<16;i++)
5146     reg_count_local[i] = 0;
5147     #endif
5148    
5149 gbeauche 1.1 for (i=0;i<VREGS;i++) {
5150     live.state[i].realreg=-1;
5151     live.state[i].needflush=NF_SCRATCH;
5152     live.state[i].val=0;
5153     set_status(i,UNDEF);
5154     }
5155    
5156     for (i=0;i<VFREGS;i++) {
5157     live.fate[i].status=UNDEF;
5158     live.fate[i].realreg=-1;
5159     live.fate[i].needflush=NF_SCRATCH;
5160     }
5161    
5162     for (i=0;i<VREGS;i++) {
5163     if (i<16) { /* First 16 registers map to 68k registers */
5164     live.state[i].mem=((uae_u32*)&regs)+i;
5165     live.state[i].needflush=NF_TOMEM;
5166     set_status(i,INMEM);
5167     }
5168     else
5169     live.state[i].mem=scratch.regs+i;
5170     }
5171     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5172     live.state[PC_P].needflush=NF_TOMEM;
5173 gbeauche 1.24 set_const(PC_P,(uintptr)comp_pc_p);
5174 gbeauche 1.1
5175 gbeauche 1.24 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5176 gbeauche 1.1 live.state[FLAGX].needflush=NF_TOMEM;
5177     set_status(FLAGX,INMEM);
5178    
5179 gbeauche 1.24 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5180 gbeauche 1.1 live.state[FLAGTMP].needflush=NF_TOMEM;
5181     set_status(FLAGTMP,INMEM);
5182    
5183     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5184     set_status(NEXT_HANDLER,UNDEF);
5185    
5186     for (i=0;i<VFREGS;i++) {
5187     if (i<8) { /* First 8 registers map to 68k FPU registers */
5188     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5189     live.fate[i].needflush=NF_TOMEM;
5190     live.fate[i].status=INMEM;
5191     }
5192     else if (i==FP_RESULT) {
5193     live.fate[i].mem=(uae_u32*)(&fpu.result);
5194     live.fate[i].needflush=NF_TOMEM;
5195     live.fate[i].status=INMEM;
5196     }
5197     else
5198 gbeauche 1.25 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5199 gbeauche 1.1 }
5200    
5201    
5202     for (i=0;i<N_REGS;i++) {
5203     live.nat[i].touched=0;
5204     live.nat[i].nholds=0;
5205     live.nat[i].locked=0;
5206     if (*cb==i) {
5207     live.nat[i].canbyte=1; cb++;
5208     } else live.nat[i].canbyte=0;
5209     if (*cw==i) {
5210     live.nat[i].canword=1; cw++;
5211     } else live.nat[i].canword=0;
5212     if (*au==i) {
5213     live.nat[i].locked=1; au++;
5214     }
5215     }
5216    
5217     for (i=0;i<N_FREGS;i++) {
5218     live.fat[i].touched=0;
5219     live.fat[i].nholds=0;
5220     live.fat[i].locked=0;
5221     }
5222    
5223     touchcnt=1;
5224     m68k_pc_offset=0;
5225     live.flags_in_flags=TRASH;
5226     live.flags_on_stack=VALID;
5227     live.flags_are_important=1;
5228    
5229     raw_fp_init();
5230     }
5231    
5232     /* Only do this if you really mean it! The next call should be to init!*/
5233     void flush(int save_regs)
5234     {
5235     int fi,i;
5236    
5237     log_flush();
5238     flush_flags(); /* low level */
5239     sync_m68k_pc(); /* mid level */
5240    
5241     if (save_regs) {
5242     for (i=0;i<VFREGS;i++) {
5243     if (live.fate[i].needflush==NF_SCRATCH ||
5244     live.fate[i].status==CLEAN) {
5245     f_disassociate(i);
5246     }
5247     }
5248     for (i=0;i<VREGS;i++) {
5249     if (live.state[i].needflush==NF_TOMEM) {
5250     switch(live.state[i].status) {
5251     case INMEM:
5252     if (live.state[i].val) {
5253 gbeauche 1.24 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5254 gbeauche 1.1 log_vwrite(i);
5255     live.state[i].val=0;
5256     }
5257     break;
5258     case CLEAN:
5259     case DIRTY:
5260     remove_offset(i,-1); tomem(i); break;
5261     case ISCONST:
5262     if (i!=PC_P)
5263     writeback_const(i);
5264     break;
5265     default: break;
5266     }
5267     Dif (live.state[i].val && i!=PC_P) {
5268     write_log("Register %d still has val %x\n",
5269     i,live.state[i].val);
5270     }
5271     }
5272     }
5273     for (i=0;i<VFREGS;i++) {
5274     if (live.fate[i].needflush==NF_TOMEM &&
5275     live.fate[i].status==DIRTY) {
5276     f_evict(i);
5277     }
5278     }
5279     raw_fp_cleanup_drop();
5280     }
5281     if (needflags) {
5282     write_log("Warning! flush with needflags=1!\n");
5283     }
5284     }
5285    
5286     static void flush_keepflags(void)
5287     {
5288     int fi,i;
5289    
5290     for (i=0;i<VFREGS;i++) {
5291     if (live.fate[i].needflush==NF_SCRATCH ||
5292     live.fate[i].status==CLEAN) {
5293     f_disassociate(i);
5294     }
5295     }
5296     for (i=0;i<VREGS;i++) {
5297     if (live.state[i].needflush==NF_TOMEM) {
5298     switch(live.state[i].status) {
5299     case INMEM:
5300     /* Can't adjust the offset here --- that needs "add" */
5301     break;
5302     case CLEAN:
5303     case DIRTY:
5304     remove_offset(i,-1); tomem(i); break;
5305     case ISCONST:
5306     if (i!=PC_P)
5307     writeback_const(i);
5308     break;
5309     default: break;
5310     }
5311     }
5312     }
5313     for (i=0;i<VFREGS;i++) {
5314     if (live.fate[i].needflush==NF_TOMEM &&
5315     live.fate[i].status==DIRTY) {
5316     f_evict(i);
5317     }
5318     }
5319     raw_fp_cleanup_drop();
5320     }
5321    
5322     void freescratch(void)
5323     {
5324     int i;
5325     for (i=0;i<N_REGS;i++)
5326     if (live.nat[i].locked && i!=4)
5327     write_log("Warning! %d is locked\n",i);
5328    
5329     for (i=0;i<VREGS;i++)
5330     if (live.state[i].needflush==NF_SCRATCH) {
5331     forget_about(i);
5332     }
5333    
5334     for (i=0;i<VFREGS;i++)
5335     if (live.fate[i].needflush==NF_SCRATCH) {
5336     f_forget_about(i);
5337     }
5338     }
5339    
5340     /********************************************************************
5341     * Support functions, internal *
5342     ********************************************************************/
5343    
5344    
5345     static void align_target(uae_u32 a)
5346     {
5347 gbeauche 1.14 if (!a)
5348     return;
5349    
5350 gbeauche 1.12 if (tune_nop_fillers)
5351 gbeauche 1.24 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5352 gbeauche 1.12 else {
5353     /* Fill with NOPs --- makes debugging with gdb easier */
5354 gbeauche 1.24 while ((uintptr)target&(a-1))
5355 gbeauche 1.12 *target++=0x90;
5356     }
5357 gbeauche 1.1 }
5358    
5359     static __inline__ int isinrom(uintptr addr)
5360     {
5361     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5362     }
5363    
5364     static void flush_all(void)
5365     {
5366     int i;
5367    
5368     log_flush();
5369     for (i=0;i<VREGS;i++)
5370     if (live.state[i].status==DIRTY) {
5371     if (!call_saved[live.state[i].realreg]) {
5372     tomem(i);
5373     }
5374     }
5375     for (i=0;i<VFREGS;i++)
5376     if (f_isinreg(i))
5377     f_evict(i);
5378     raw_fp_cleanup_drop();
5379     }
5380    
5381     /* Make sure all registers that will get clobbered by a call are
5382     save and sound in memory */
5383     static void prepare_for_call_1(void)
5384     {
5385     flush_all(); /* If there are registers that don't get clobbered,
5386     * we should be a bit more selective here */
5387     }
5388    
5389     /* We will call a C routine in a moment. That will clobber all registers,
5390     so we need to disassociate everything */
5391     static void prepare_for_call_2(void)
5392     {
5393     int i;
5394     for (i=0;i<N_REGS;i++)
5395     if (!call_saved[i] && live.nat[i].nholds>0)
5396     free_nreg(i);
5397    
5398     for (i=0;i<N_FREGS;i++)
5399     if (live.fat[i].nholds>0)
5400     f_free_nreg(i);
5401    
5402     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5403     flags at the very start of the call_r
5404     functions! */
5405     }
5406    
5407     /********************************************************************
5408     * Memory access and related functions, CREATE time *
5409     ********************************************************************/
5410    
5411     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5412     {
5413     next_pc_p=not_taken;
5414     taken_pc_p=taken;
5415     branch_cc=cond;
5416     }
5417    
5418    
5419     static uae_u32 get_handler_address(uae_u32 addr)
5420     {
5421     uae_u32 cl=cacheline(addr);
5422 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5423     return (uintptr)&(bi->direct_handler_to_use);
5424 gbeauche 1.1 }
5425    
5426     static uae_u32 get_handler(uae_u32 addr)
5427     {
5428     uae_u32 cl=cacheline(addr);
5429 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5430     return (uintptr)bi->direct_handler_to_use;
5431 gbeauche 1.1 }
5432    
5433     static void load_handler(int reg, uae_u32 addr)
5434     {
5435     mov_l_rm(reg,get_handler_address(addr));
5436     }
5437    
5438     /* This version assumes that it is writing *real* memory, and *will* fail
5439     * if that assumption is wrong! No branches, no second chances, just
5440     * straight go-for-it attitude */
5441    
5442 gbeauche 1.24 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5443 gbeauche 1.1 {
5444     int f=tmp;
5445    
5446     if (clobber)
5447     f=source;
5448 gbeauche 1.24
5449     #if SIZEOF_VOID_P == 8
5450 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5451     sign_extend_32_rr(address, address);
5452 gbeauche 1.24 #endif
5453    
5454 gbeauche 1.1 switch(size) {
5455     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5456     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5457     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5458     }
5459     forget_about(tmp);
5460     forget_about(f);
5461     }
5462    
5463     void writebyte(int address, int source, int tmp)
5464     {
5465 gbeauche 1.24 writemem_real(address,source,1,tmp,0);
5466 gbeauche 1.1 }
5467    
5468     static __inline__ void writeword_general(int address, int source, int tmp,
5469     int clobber)
5470     {
5471 gbeauche 1.24 writemem_real(address,source,2,tmp,clobber);
5472 gbeauche 1.1 }
5473    
5474     void writeword_clobber(int address, int source, int tmp)
5475     {
5476     writeword_general(address,source,tmp,1);
5477     }
5478    
5479     void writeword(int address, int source, int tmp)
5480     {
5481     writeword_general(address,source,tmp,0);
5482     }
5483    
5484     static __inline__ void writelong_general(int address, int source, int tmp,
5485     int clobber)
5486     {
5487 gbeauche 1.24 writemem_real(address,source,4,tmp,clobber);
5488 gbeauche 1.1 }
5489    
5490     void writelong_clobber(int address, int source, int tmp)
5491     {
5492     writelong_general(address,source,tmp,1);
5493     }
5494    
5495     void writelong(int address, int source, int tmp)
5496     {
5497     writelong_general(address,source,tmp,0);
5498     }
5499    
5500    
5501    
5502     /* This version assumes that it is reading *real* memory, and *will* fail
5503     * if that assumption is wrong! No branches, no second chances, just
5504     * straight go-for-it attitude */
5505    
5506 gbeauche 1.24 static void readmem_real(int address, int dest, int size, int tmp)
5507 gbeauche 1.1 {
5508     int f=tmp;
5509    
5510     if (size==4 && address!=dest)
5511     f=dest;
5512    
5513 gbeauche 1.24 #if SIZEOF_VOID_P == 8
5514 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5515     sign_extend_32_rr(address, address);
5516 gbeauche 1.24 #endif
5517    
5518 gbeauche 1.1 switch(size) {
5519     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5520     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5521     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5522     }
5523     forget_about(tmp);
5524     }
5525    
5526     void readbyte(int address, int dest, int tmp)
5527     {
5528 gbeauche 1.24 readmem_real(address,dest,1,tmp);
5529 gbeauche 1.1 }
5530    
5531     void readword(int address, int dest, int tmp)
5532     {
5533 gbeauche 1.24 readmem_real(address,dest,2,tmp);
5534 gbeauche 1.1 }
5535    
5536     void readlong(int address, int dest, int tmp)
5537     {
5538 gbeauche 1.24 readmem_real(address,dest,4,tmp);
5539 gbeauche 1.1 }
5540    
5541     void get_n_addr(int address, int dest, int tmp)
5542     {
5543     // a is the register containing the virtual address
5544     // after the offset had been fetched
5545     int a=tmp;
5546    
5547     // f is the register that will contain the offset
5548     int f=tmp;
5549    
5550     // a == f == tmp if (address == dest)
5551     if (address!=dest) {
5552     a=address;
5553     f=dest;
5554     }
5555    
5556     #if REAL_ADDRESSING
5557     mov_l_rr(dest, address);
5558     #elif DIRECT_ADDRESSING
5559     lea_l_brr(dest,address,MEMBaseDiff);
5560     #endif
5561     forget_about(tmp);
5562     }
5563    
5564     void get_n_addr_jmp(int address, int dest, int tmp)
5565     {
5566     /* For this, we need to get the same address as the rest of UAE
5567     would --- otherwise we end up translating everything twice */
5568     get_n_addr(address,dest,tmp);
5569     }
5570    
5571    
5572     /* base is a register, but dp is an actual value.
5573     target is a register, as is tmp */
5574     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5575     {
5576     int reg = (dp >> 12) & 15;
5577     int regd_shift=(dp >> 9) & 3;
5578    
5579     if (dp & 0x100) {
5580     int ignorebase=(dp&0x80);
5581     int ignorereg=(dp&0x40);
5582     int addbase=0;
5583     int outer=0;
5584    
5585     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5586     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5587    
5588     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5589     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5590    
5591     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5592     if (!ignorereg) {
5593     if ((dp & 0x800) == 0)
5594     sign_extend_16_rr(target,reg);
5595     else
5596     mov_l_rr(target,reg);
5597     shll_l_ri(target,regd_shift);
5598     }
5599     else
5600     mov_l_ri(target,0);
5601    
5602     /* target is now regd */
5603     if (!ignorebase)
5604     add_l(target,base);
5605     add_l_ri(target,addbase);
5606     if (dp&0x03) readlong(target,target,tmp);
5607     } else { /* do the getlong first, then add regd */
5608     if (!ignorebase) {
5609     mov_l_rr(target,base);
5610     add_l_ri(target,addbase);
5611     }
5612     else
5613     mov_l_ri(target,addbase);
5614     if (dp&0x03) readlong(target,target,tmp);
5615    
5616     if (!ignorereg) {
5617     if ((dp & 0x800) == 0)
5618     sign_extend_16_rr(tmp,reg);
5619     else
5620     mov_l_rr(tmp,reg);
5621     shll_l_ri(tmp,regd_shift);
5622     /* tmp is now regd */
5623     add_l(target,tmp);
5624     }
5625     }
5626     add_l_ri(target,outer);
5627     }
5628     else { /* 68000 version */
5629     if ((dp & 0x800) == 0) { /* Sign extend */
5630     sign_extend_16_rr(target,reg);
5631     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5632     }
5633     else {
5634     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5635     }
5636     }
5637     forget_about(tmp);
5638     }
5639    
5640    
5641    
5642    
5643    
5644     void set_cache_state(int enabled)
5645     {
5646     if (enabled!=letit)
5647     flush_icache_hard(77);
5648     letit=enabled;
5649     }
5650    
5651     int get_cache_state(void)
5652     {
5653     return letit;
5654     }
5655    
5656     uae_u32 get_jitted_size(void)
5657     {
5658     if (compiled_code)
5659     return current_compile_p-compiled_code;
5660     return 0;
5661     }
5662    
5663 gbeauche 1.20 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5664     const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5665    
5666     static uint8 *do_alloc_code(uint32 size, int depth)
5667     {
5668     #if defined(__linux__) && 0
5669     /*
5670     This is a really awful hack that is known to work on Linux at
5671     least.
5672    
5673     The trick here is to make sure the allocated cache is nearby
5674     code segment, and more precisely in the positive half of a
5675     32-bit address space. i.e. addr < 0x80000000. Actually, it
5676     turned out that a 32-bit binary run on AMD64 yields a cache
5677     allocated around 0xa0000000, thus causing some troubles when
5678     translating addresses from m68k to x86.
5679     */
5680     static uint8 * code_base = NULL;
5681     if (code_base == NULL) {
5682     uintptr page_size = getpagesize();
5683     uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5684     if (boundaries < page_size)
5685     boundaries = page_size;
5686     code_base = (uint8 *)sbrk(0);
5687     for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5688     if (vm_acquire_fixed(code_base, size) == 0) {
5689     uint8 *code = code_base;
5690     code_base += size;
5691     return code;
5692     }
5693     code_base += boundaries;
5694     }
5695     return NULL;
5696     }
5697    
5698     if (vm_acquire_fixed(code_base, size) == 0) {
5699     uint8 *code = code_base;
5700     code_base += size;
5701     return code;
5702     }
5703    
5704     if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5705     return NULL;
5706    
5707     return do_alloc_code(size, depth + 1);
5708     #else
5709     uint8 *code = (uint8 *)vm_acquire(size);
5710     return code == VM_MAP_FAILED ? NULL : code;
5711     #endif
5712     }
5713    
5714     static inline uint8 *alloc_code(uint32 size)
5715     {
5716 gbeauche 1.31 uint8 *ptr = do_alloc_code(size, 0);
5717     /* allocated code must fit in 32-bit boundaries */
5718     assert((uintptr)ptr <= 0xffffffff);
5719     return ptr;
5720 gbeauche 1.20 }
5721    
5722 gbeauche 1.1 void alloc_cache(void)
5723     {
5724     if (compiled_code) {
5725     flush_icache_hard(6);
5726 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5727 gbeauche 1.1 compiled_code = 0;
5728     }
5729    
5730     if (cache_size == 0)
5731     return;
5732    
5733     while (!compiled_code && cache_size) {
5734 gbeauche 1.20 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5735 gbeauche 1.1 compiled_code = 0;
5736     cache_size /= 2;
5737     }
5738     }
5739 gbeauche 1.25 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5740 gbeauche 1.1
5741     if (compiled_code) {
5742     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5743     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5744     current_compile_p = compiled_code;
5745     current_cache_size = 0;
5746     }
5747     }
5748    
5749    
5750    
5751 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5752 gbeauche 1.1
5753 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5754 gbeauche 1.1 {
5755 gbeauche 1.8 uae_u32 k1 = 0;
5756     uae_u32 k2 = 0;
5757    
5758     #if USE_CHECKSUM_INFO
5759     checksum_info *csi = bi->csi;
5760     Dif(!csi) abort();
5761     while (csi) {
5762     uae_s32 len = csi->length;
5763 gbeauche 1.24 uintptr tmp = (uintptr)csi->start_p;
5764 gbeauche 1.8 #else
5765     uae_s32 len = bi->len;
5766 gbeauche 1.24 uintptr tmp = (uintptr)bi->min_pcp;
5767 gbeauche 1.8 #endif
5768     uae_u32*pos;
5769 gbeauche 1.1
5770 gbeauche 1.8 len += (tmp & 3);
5771 gbeauche 1.24 tmp &= ~((uintptr)3);
5772 gbeauche 1.8 pos = (uae_u32 *)tmp;
5773    
5774     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5775     while (len > 0) {
5776     k1 += *pos;
5777     k2 ^= *pos;
5778     pos++;
5779     len -= 4;
5780     }
5781     }
5782 gbeauche 1.1
5783 gbeauche 1.8 #if USE_CHECKSUM_INFO
5784     csi = csi->next;
5785 gbeauche 1.1 }
5786 gbeauche 1.8 #endif
5787    
5788     *c1 = k1;
5789     *c2 = k2;
5790 gbeauche 1.1 }
5791    
5792 gbeauche 1.8 #if 0
5793 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5794 gbeauche 1.1 {
5795     uae_u32 k1=0;
5796     uae_u32 k2=0;
5797 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5798 gbeauche 1.24 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5799 gbeauche 1.1 uae_u32* pos;
5800    
5801     len+=(tmp&3);
5802     tmp&=(~3);
5803     pos=(uae_u32*)tmp;
5804    
5805     if (len<0 || len>MAX_CHECKSUM_LEN) {
5806     return;
5807     }
5808     else {
5809     while (len>0) {
5810     write_log("%08x ",*pos);
5811     pos++;
5812     len-=4;
5813     }
5814     write_log(" bla\n");
5815     }
5816     }
5817 gbeauche 1.8 #endif
5818 gbeauche 1.1
5819    
5820     int check_for_cache_miss(void)
5821     {
5822     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5823    
5824     if (bi) {
5825     int cl=cacheline(regs.pc_p);
5826     if (bi!=cache_tags[cl+1].bi) {
5827     raise_in_cl_list(bi);
5828     return 1;
5829     }
5830     }
5831     return 0;
5832     }
5833    
5834    
5835     static void recompile_block(void)
5836     {
5837     /* An existing block's countdown code has expired. We need to make
5838     sure that execute_normal doesn't refuse to recompile due to a
5839     perceived cache miss... */
5840     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5841    
5842     Dif (!bi)
5843     abort();
5844     raise_in_cl_list(bi);
5845     execute_normal();
5846     return;
5847     }
5848     static void cache_miss(void)
5849     {
5850     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5851     uae_u32 cl=cacheline(regs.pc_p);
5852     blockinfo* bi2=get_blockinfo(cl);
5853    
5854     if (!bi) {
5855     execute_normal(); /* Compile this block now */
5856     return;
5857     }
5858     Dif (!bi2 || bi==bi2) {
5859     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5860     abort();
5861     }
5862     raise_in_cl_list(bi);
5863     return;
5864     }
5865    
5866     static int called_check_checksum(blockinfo* bi);
5867    
5868     static inline int block_check_checksum(blockinfo* bi)
5869     {
5870     uae_u32 c1,c2;
5871 gbeauche 1.7 bool isgood;
5872 gbeauche 1.1
5873     if (bi->status!=BI_NEED_CHECK)
5874     return 1; /* This block is in a checked state */
5875    
5876     checksum_count++;
5877 gbeauche 1.7
5878 gbeauche 1.1 if (bi->c1 || bi->c2)
5879     calc_checksum(bi,&c1,&c2);
5880     else {
5881     c1=c2=1; /* Make sure it doesn't match */
5882 gbeauche 1.7 }
5883 gbeauche 1.1
5884     isgood=(c1==bi->c1 && c2==bi->c2);
5885 gbeauche 1.7
5886 gbeauche 1.1 if (isgood) {
5887     /* This block is still OK. So we reactivate. Of course, that
5888     means we have to move it into the needs-to-be-flushed list */
5889     bi->handler_to_use=bi->handler;
5890     set_dhtu(bi,bi->direct_handler);
5891     bi->status=BI_CHECKING;
5892     isgood=called_check_checksum(bi);
5893     }
5894     if (isgood) {
5895     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5896     c1,c2,bi->c1,bi->c2);*/
5897     remove_from_list(bi);
5898     add_to_active(bi);
5899     raise_in_cl_list(bi);
5900     bi->status=BI_ACTIVE;
5901     }
5902     else {
5903     /* This block actually changed. We need to invalidate it,
5904     and set it up to be recompiled */
5905     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5906     c1,c2,bi->c1,bi->c2); */
5907     invalidate_block(bi);
5908     raise_in_cl_list(bi);
5909     }
5910     return isgood;
5911     }
5912    
5913     static int called_check_checksum(blockinfo* bi)
5914     {
5915     dependency* x=bi->deplist;
5916     int isgood=1;
5917     int i;
5918    
5919     for (i=0;i<2 && isgood;i++) {
5920     if (bi->dep[i].jmp_off) {
5921     isgood=block_check_checksum(bi->dep[i].target);
5922     }
5923     }
5924     return isgood;
5925     }
5926    
5927     static void check_checksum(void)
5928     {
5929     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5930     uae_u32 cl=cacheline(regs.pc_p);
5931     blockinfo* bi2=get_blockinfo(cl);
5932    
5933     /* These are not the droids you are looking for... */
5934     if (!bi) {
5935     /* Whoever is the primary target is in a dormant state, but
5936     calling it was accidental, and we should just compile this
5937     new block */
5938     execute_normal();
5939     return;
5940     }
5941     if (bi!=bi2) {
5942     /* The block was hit accidentally, but it does exist. Cache miss */
5943     cache_miss();
5944     return;
5945     }
5946    
5947     if (!block_check_checksum(bi))
5948     execute_normal();
5949     }
5950    
5951     static __inline__ void match_states(blockinfo* bi)
5952     {
5953     int i;
5954     smallstate* s=&(bi->env);
5955    
5956     if (bi->status==BI_NEED_CHECK) {
5957     block_check_checksum(bi);
5958     }
5959     if (bi->status==BI_ACTIVE ||
5960     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5961     block makes (about not using
5962     certain vregs) */
5963     for (i=0;i<16;i++) {
5964     if (s->virt[i]==L_UNNEEDED) {
5965     // write_log("unneeded reg %d at %p\n",i,target);
5966     COMPCALL(forget_about)(i); // FIXME
5967     }
5968     }
5969     }
5970     flush(1);
5971    
5972     /* And now deal with the *demands* the block makes */
5973     for (i=0;i<N_REGS;i++) {
5974     int v=s->nat[i];
5975     if (v>=0) {
5976     // printf("Loading reg %d into %d at %p\n",v,i,target);
5977     readreg_specific(v,4,i);
5978     // do_load_reg(i,v);
5979     // setlock(i);
5980     }
5981     }
5982     for (i=0;i<N_REGS;i++) {
5983     int v=s->nat[i];
5984     if (v>=0) {
5985     unlock2(i);
5986     }
5987     }
5988     }
5989    
5990     static __inline__ void create_popalls(void)
5991     {
5992     int i,r;
5993    
5994 gbeauche 1.24 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5995     write_log("FATAL: Could not allocate popallspace!\n");
5996     abort();
5997     }
5998     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5999    
6000 gbeauche 1.35 int stack_space = STACK_OFFSET;
6001     for (i=0;i<N_REGS;i++) {
6002     if (need_to_preserve[i])
6003     stack_space += sizeof(void *);
6004     }
6005     stack_space %= STACK_ALIGN;
6006     if (stack_space)
6007     stack_space = STACK_ALIGN - stack_space;
6008    
6009 gbeauche 1.1 current_compile_p=popallspace;
6010     set_target(current_compile_p);
6011 gbeauche 1.36
6012     /* We need to guarantee 16-byte stack alignment on x86 at any point
6013     within the JIT generated code. We have multiple exit points
6014     possible but a single entry. A "jmp" is used so that we don't
6015     have to generate stack alignment in generated code that has to
6016     call external functions (e.g. a generic instruction handler).
6017    
6018     In summary, JIT generated code is not leaf so we have to deal
6019     with it here to maintain correct stack alignment. */
6020     align_target(align_jumps);
6021     current_compile_p=get_target();
6022     pushall_call_handler=get_target();
6023     for (i=N_REGS;i--;) {
6024     if (need_to_preserve[i])
6025     raw_push_l_r(i);
6026     }
6027     raw_dec_sp(stack_space);
6028     r=REG_PC_TMP;
6029     raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6030     raw_and_l_ri(r,TAGMASK);
6031     raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6032    
6033     /* now the exit points */
6034 gbeauche 1.5 align_target(align_jumps);
6035     popall_do_nothing=get_target();
6036 gbeauche 1.35 raw_inc_sp(stack_space);
6037 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6038     if (need_to_preserve[i])
6039     raw_pop_l_r(i);
6040     }
6041 gbeauche 1.24 raw_jmp((uintptr)do_nothing);
6042 gbeauche 1.1
6043 gbeauche 1.5 align_target(align_jumps);
6044 gbeauche 1.1 popall_execute_normal=get_target();
6045 gbeauche 1.35 raw_inc_sp(stack_space);
6046 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6047     if (need_to_preserve[i])
6048     raw_pop_l_r(i);
6049     }
6050 gbeauche 1.24 raw_jmp((uintptr)execute_normal);
6051 gbeauche 1.1
6052 gbeauche 1.5 align_target(align_jumps);
6053 gbeauche 1.1 popall_cache_miss=get_target();
6054 gbeauche 1.35 raw_inc_sp(stack_space);
6055 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6056     if (need_to_preserve[i])
6057     raw_pop_l_r(i);
6058     }
6059 gbeauche 1.24 raw_jmp((uintptr)cache_miss);
6060 gbeauche 1.1
6061 gbeauche 1.5 align_target(align_jumps);
6062 gbeauche 1.1 popall_recompile_block=get_target();
6063 gbeauche 1.35 raw_inc_sp(stack_space);
6064 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6065     if (need_to_preserve[i])
6066     raw_pop_l_r(i);
6067     }
6068 gbeauche 1.24 raw_jmp((uintptr)recompile_block);
6069 gbeauche 1.5
6070     align_target(align_jumps);
6071 gbeauche 1.1 popall_exec_nostats=get_target();
6072 gbeauche 1.35 raw_inc_sp(stack_space);
6073 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6074     if (need_to_preserve[i])
6075     raw_pop_l_r(i);
6076     }
6077 gbeauche 1.24 raw_jmp((uintptr)exec_nostats);
6078 gbeauche 1.5
6079     align_target(align_jumps);
6080 gbeauche 1.1 popall_check_checksum=get_target();
6081 gbeauche 1.35 raw_inc_sp(stack_space);
6082 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6083     if (need_to_preserve[i])
6084     raw_pop_l_r(i);
6085     }
6086 gbeauche 1.24 raw_jmp((uintptr)check_checksum);
6087 gbeauche 1.5
6088 gbeauche 1.24 // no need to further write into popallspace
6089     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6090 gbeauche 1.1 }
6091    
6092     static __inline__ void reset_lists(void)
6093     {
6094     int i;
6095    
6096     for (i=0;i<MAX_HOLD_BI;i++)
6097     hold_bi[i]=NULL;
6098     active=NULL;
6099     dormant=NULL;
6100     }
6101    
6102     static void prepare_block(blockinfo* bi)
6103     {
6104     int i;
6105    
6106     set_target(current_compile_p);
6107 gbeauche 1.5 align_target(align_jumps);
6108 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
6109 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6110     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6111     raw_jmp((uintptr)popall_execute_normal);
6112 gbeauche 1.1
6113 gbeauche 1.5 align_target(align_jumps);
6114 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
6115 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6116     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6117     raw_jmp((uintptr)popall_check_checksum);
6118 gbeauche 1.1 current_compile_p=get_target();
6119    
6120     bi->deplist=NULL;
6121     for (i=0;i<2;i++) {
6122     bi->dep[i].prev_p=NULL;
6123     bi->dep[i].next=NULL;
6124     }
6125     bi->env=default_ss;
6126     bi->status=BI_INVALID;
6127     bi->havestate=0;
6128     //bi->env=empty_ss;
6129     }
6130    
6131 gbeauche 1.21 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6132     static inline void reset_compop(int opcode)
6133 gbeauche 1.17 {
6134 gbeauche 1.21 compfunctbl[opcode] = NULL;
6135     nfcompfunctbl[opcode] = NULL;
6136     }
6137    
6138     static int read_opcode(const char *p)
6139     {
6140     int opcode = 0;
6141     for (int i = 0; i < 4; i++) {
6142     int op = p[i];
6143     switch (op) {
6144     case '0': case '1': case '2': case '3': case '4':
6145     case '5': case '6': case '7': case '8': case '9':
6146     opcode = (opcode << 4) | (op - '0');
6147     break;
6148     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6149     opcode = (opcode << 4) | ((op - 'a') + 10);
6150     break;
6151     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6152     opcode = (opcode << 4) | ((op - 'A') + 10);
6153     break;
6154     default:
6155     return -1;
6156     }
6157     }
6158     return opcode;
6159     }
6160    
6161     static bool merge_blacklist()
6162     {
6163     const char *blacklist = PrefsFindString("jitblacklist");
6164     if (blacklist) {
6165     const char *p = blacklist;
6166     for (;;) {
6167     if (*p == 0)
6168     return true;
6169    
6170     int opcode1 = read_opcode(p);
6171     if (opcode1 < 0)
6172     return false;
6173     p += 4;
6174    
6175     int opcode2 = opcode1;
6176     if (*p == '-') {
6177     p++;
6178     opcode2 = read_opcode(p);
6179     if (opcode2 < 0)
6180     return false;
6181     p += 4;
6182     }
6183    
6184     if (*p == 0 || *p == ';') {
6185     write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6186     for (int opcode = opcode1; opcode <= opcode2; opcode++)
6187     reset_compop(cft_map(opcode));
6188    
6189     if (*p++ == ';')
6190     continue;
6191    
6192     return true;
6193     }
6194    
6195     return false;
6196     }
6197     }
6198     return true;
6199 gbeauche 1.17 }
6200    
6201 gbeauche 1.1 void build_comp(void)
6202     {
6203     int i;
6204     int jumpcount=0;
6205     unsigned long opcode;
6206     struct comptbl* tbl=op_smalltbl_0_comp_ff;
6207     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6208     int count;
6209     int cpu_level = 0; // 68000 (default)
6210     if (CPUType == 4)
6211     cpu_level = 4; // 68040 with FPU
6212     else {
6213     if (FPUType)
6214     cpu_level = 3; // 68020 with FPU
6215     else if (CPUType >= 2)
6216     cpu_level = 2; // 68020
6217     else if (CPUType == 1)
6218     cpu_level = 1;
6219     }
6220     struct cputbl *nfctbl = (
6221     cpu_level == 4 ? op_smalltbl_0_nf
6222     : cpu_level == 3 ? op_smalltbl_1_nf
6223     : cpu_level == 2 ? op_smalltbl_2_nf
6224     : cpu_level == 1 ? op_smalltbl_3_nf
6225     : op_smalltbl_4_nf);
6226    
6227     write_log ("<JIT compiler> : building compiler function tables\n");
6228    
6229     for (opcode = 0; opcode < 65536; opcode++) {
6230 gbeauche 1.21 reset_compop(opcode);
6231 gbeauche 1.1 nfcpufunctbl[opcode] = op_illg_1;
6232     prop[opcode].use_flags = 0x1f;
6233     prop[opcode].set_flags = 0x1f;
6234     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6235     }
6236    
6237     for (i = 0; tbl[i].opcode < 65536; i++) {
6238     int cflow = table68k[tbl[i].opcode].cflow;
6239 gbeauche 1.33 if (follow_const_jumps && (tbl[i].specific & 16))
6240 gbeauche 1.10 cflow = fl_const_jump;
6241 gbeauche 1.8 else
6242 gbeauche 1.10 cflow &= ~fl_const_jump;
6243     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6244 gbeauche 1.1
6245     int uses_fpu = tbl[i].specific & 32;
6246 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6247 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6248     else
6249     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6250     }
6251 gbeauche 1.8
6252 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6253     int uses_fpu = tbl[i].specific & 32;
6254 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6255 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6256     else
6257     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6258    
6259     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6260     }
6261    
6262     for (i = 0; nfctbl[i].handler; i++) {
6263     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6264     }
6265    
6266     for (opcode = 0; opcode < 65536; opcode++) {
6267     compop_func *f;
6268     compop_func *nff;
6269     cpuop_func *nfcf;
6270     int isaddx,cflow;
6271    
6272     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6273     continue;
6274    
6275     if (table68k[opcode].handler != -1) {
6276     f = compfunctbl[cft_map(table68k[opcode].handler)];
6277     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6278     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6279     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6280     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6281     prop[cft_map(opcode)].cflow = cflow;
6282     prop[cft_map(opcode)].is_addx = isaddx;
6283     compfunctbl[cft_map(opcode)] = f;
6284     nfcompfunctbl[cft_map(opcode)] = nff;
6285     Dif (nfcf == op_illg_1)
6286     abort();
6287     nfcpufunctbl[cft_map(opcode)] = nfcf;
6288     }
6289     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6290     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6291 gbeauche 1.33 /* Unconditional jumps don't evaluate condition codes, so they
6292     * don't actually use any flags themselves */
6293     if (prop[cft_map(opcode)].cflow & fl_const_jump)
6294     prop[cft_map(opcode)].use_flags = 0;
6295 gbeauche 1.1 }
6296     for (i = 0; nfctbl[i].handler != NULL; i++) {
6297     if (nfctbl[i].specific)
6298     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6299     }
6300 gbeauche 1.21
6301     /* Merge in blacklist */
6302     if (!merge_blacklist())
6303     write_log("<JIT compiler> : blacklist merge failure!\n");
6304 gbeauche 1.1
6305     count=0;
6306     for (opcode = 0; opcode < 65536; opcode++) {
6307     if (compfunctbl[cft_map(opcode)])
6308     count++;
6309     }
6310     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6311    
6312     /* Initialise state */
6313     create_popalls();
6314     alloc_cache();
6315     reset_lists();
6316    
6317     for (i=0;i<TAGSIZE;i+=2) {
6318     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6319     cache_tags[i+1].bi=NULL;
6320     }
6321    
6322     #if 0
6323     for (i=0;i<N_REGS;i++) {
6324     empty_ss.nat[i].holds=-1;
6325     empty_ss.nat[i].validsize=0;
6326     empty_ss.nat[i].dirtysize=0;
6327     }
6328     #endif
6329     for (i=0;i<VREGS;i++) {
6330     empty_ss.virt[i]=L_NEEDED;
6331     }
6332     for (i=0;i<N_REGS;i++) {
6333     empty_ss.nat[i]=L_UNKNOWN;
6334     }
6335     default_ss=empty_ss;
6336     }
6337    
6338    
6339     static void flush_icache_none(int n)
6340     {
6341     /* Nothing to do. */
6342     }
6343    
6344     static void flush_icache_hard(int n)
6345     {
6346     uae_u32 i;
6347     blockinfo* bi, *dbi;
6348    
6349     hard_flush_count++;
6350     #if 0
6351     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6352     n,regs.pc,regs.pc_p,current_cache_size/1024);
6353     current_cache_size = 0;
6354     #endif
6355     bi=active;
6356     while(bi) {
6357     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6358     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6359     dbi=bi; bi=bi->next;
6360     free_blockinfo(dbi);
6361     }
6362     bi=dormant;
6363     while(bi) {
6364     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6365     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6366     dbi=bi; bi=bi->next;
6367     free_blockinfo(dbi);
6368     }
6369    
6370     reset_lists();
6371     if (!compiled_code)
6372     return;
6373     current_compile_p=compiled_code;
6374     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6375     }
6376    
6377    
6378     /* "Soft flushing" --- instead of actually throwing everything away,
6379     we simply mark everything as "needs to be checked".
6380     */
6381    
6382     static inline void flush_icache_lazy(int n)
6383     {
6384     uae_u32 i;
6385     blockinfo* bi;
6386     blockinfo* bi2;
6387    
6388     soft_flush_count++;
6389     if (!active)
6390     return;
6391    
6392     bi=active;
6393     while (bi) {
6394     uae_u32 cl=cacheline(bi->pc_p);
6395     if (bi->status==BI_INVALID ||
6396     bi->status==BI_NEED_RECOMP) {
6397     if (bi==cache_tags[cl+1].bi)
6398     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6399     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6400     set_dhtu(bi,bi->direct_pen);
6401     bi->status=BI_INVALID;
6402     }
6403     else {
6404     if (bi==cache_tags[cl+1].bi)
6405     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6406     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6407     set_dhtu(bi,bi->direct_pcc);
6408     bi->status=BI_NEED_CHECK;
6409     }
6410     bi2=bi;
6411     bi=bi->next;
6412     }
6413     /* bi2 is now the last entry in the active list */
6414     bi2->next=dormant;
6415     if (dormant)
6416     dormant->prev_p=&(bi2->next);
6417    
6418     dormant=active;
6419     active->prev_p=&dormant;
6420     active=NULL;
6421 gbeauche 1.22 }
6422    
6423     void flush_icache_range(uae_u32 start, uae_u32 length)
6424     {
6425     if (!active)
6426     return;
6427    
6428     #if LAZY_FLUSH_ICACHE_RANGE
6429     uae_u8 *start_p = get_real_address(start);
6430     blockinfo *bi = active;
6431     while (bi) {
6432     #if USE_CHECKSUM_INFO
6433     bool invalidate = false;
6434     for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6435     invalidate = (((start_p - csi->start_p) < csi->length) ||
6436     ((csi->start_p - start_p) < length));
6437     #else
6438     // Assume system is consistent and would invalidate the right range
6439     const bool invalidate = (bi->pc_p - start_p) < length;
6440     #endif
6441     if (invalidate) {
6442     uae_u32 cl = cacheline(bi->pc_p);
6443     if (bi == cache_tags[cl + 1].bi)
6444     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6445     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6446     set_dhtu(bi, bi->direct_pen);
6447     bi->status = BI_NEED_RECOMP;
6448     }
6449     bi = bi->next;
6450     }
6451     return;
6452     #endif
6453     flush_icache(-1);
6454 gbeauche 1.1 }
6455    
6456     static void catastrophe(void)
6457     {
6458     abort();
6459     }
6460    
6461     int failure;
6462    
6463     #define TARGET_M68K 0
6464     #define TARGET_POWERPC 1
6465     #define TARGET_X86 2
6466 gbeauche 1.24 #define TARGET_X86_64 3
6467 gbeauche 1.1 #if defined(i386) || defined(__i386__)
6468     #define TARGET_NATIVE TARGET_X86
6469     #endif
6470     #if defined(powerpc) || defined(__powerpc__)
6471     #define TARGET_NATIVE TARGET_POWERPC
6472     #endif
6473 gbeauche 1.24 #if defined(x86_64) || defined(__x86_64__)
6474     #define TARGET_NATIVE TARGET_X86_64
6475     #endif
6476 gbeauche 1.1
6477     #ifdef ENABLE_MON
6478 gbeauche 1.24 static uae_u32 mon_read_byte_jit(uintptr addr)
6479 gbeauche 1.1 {
6480     uae_u8 *m = (uae_u8 *)addr;
6481 gbeauche 1.24 return (uintptr)(*m);
6482 gbeauche 1.1 }
6483    
6484 gbeauche 1.24 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6485 gbeauche 1.1 {
6486     uae_u8 *m = (uae_u8 *)addr;
6487     *m = b;
6488     }
6489     #endif
6490    
6491     void disasm_block(int target, uint8 * start, size_t length)
6492     {
6493     if (!JITDebug)
6494     return;
6495    
6496     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6497     char disasm_str[200];
6498     sprintf(disasm_str, "%s $%x $%x",
6499     target == TARGET_M68K ? "d68" :
6500     target == TARGET_X86 ? "d86" :
6501 gbeauche 1.24 target == TARGET_X86_64 ? "d8664" :
6502 gbeauche 1.1 target == TARGET_POWERPC ? "d" : "x",
6503     start, start + length - 1);
6504    
6505 gbeauche 1.24 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6506     void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6507 gbeauche 1.1
6508     mon_read_byte = mon_read_byte_jit;
6509     mon_write_byte = mon_write_byte_jit;
6510    
6511     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6512     mon(4, arg);
6513    
6514     mon_read_byte = old_mon_read_byte;
6515     mon_write_byte = old_mon_write_byte;
6516     #endif
6517     }
6518    
6519 gbeauche 1.24 static void disasm_native_block(uint8 *start, size_t length)
6520 gbeauche 1.1 {
6521     disasm_block(TARGET_NATIVE, start, length);
6522     }
6523    
6524 gbeauche 1.24 static void disasm_m68k_block(uint8 *start, size_t length)
6525 gbeauche 1.1 {
6526     disasm_block(TARGET_M68K, start, length);
6527     }
6528    
6529     #ifdef HAVE_GET_WORD_UNSWAPPED
6530     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6531     #else
6532     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6533     #endif
6534    
6535     #if JIT_DEBUG
6536     static uae_u8 *last_regs_pc_p = 0;
6537     static uae_u8 *last_compiled_block_addr = 0;
6538    
6539     void compiler_dumpstate(void)
6540     {
6541     if (!JITDebug)
6542     return;
6543    
6544     write_log("### Host addresses\n");
6545     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6546     write_log("PC_P : %p\n", &regs.pc_p);
6547     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6548     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6549     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6550     write_log("\n");
6551    
6552     write_log("### M68k processor state\n");
6553     m68k_dumpstate(0);
6554     write_log("\n");
6555    
6556     write_log("### Block in Mac address space\n");
6557     write_log("M68K block : %p\n",
6558 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6559 gbeauche 1.1 write_log("Native block : %p (%d bytes)\n",
6560 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6561 gbeauche 1.1 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6562     write_log("\n");
6563     }
6564     #endif
6565    
6566     static void compile_block(cpu_history* pc_hist, int blocklen)
6567     {
6568     if (letit && compiled_code) {
6569     #if PROFILE_COMPILE_TIME
6570     compile_count++;
6571     clock_t start_time = clock();
6572     #endif
6573     #if JIT_DEBUG
6574     bool disasm_block = false;
6575     #endif
6576    
6577     /* OK, here we need to 'compile' a block */
6578     int i;
6579     int r;
6580     int was_comp=0;
6581     uae_u8 liveflags[MAXRUN+1];
6582 gbeauche 1.8 #if USE_CHECKSUM_INFO
6583     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6584 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6585     uintptr min_pcp=max_pcp;
6586 gbeauche 1.8 #else
6587 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[0].location;
6588     uintptr min_pcp=max_pcp;
6589 gbeauche 1.8 #endif
6590 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6591     void* specflags=(void*)&regs.spcflags;
6592     blockinfo* bi=NULL;
6593     blockinfo* bi2;
6594     int extra_len=0;
6595    
6596     redo_current_block=0;
6597     if (current_compile_p>=max_compile_start)
6598     flush_icache_hard(7);
6599    
6600     alloc_blockinfos();
6601    
6602     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6603     bi2=get_blockinfo(cl);
6604    
6605     optlev=bi->optlevel;
6606     if (bi->status!=BI_INVALID) {
6607     Dif (bi!=bi2) {
6608     /* I don't think it can happen anymore. Shouldn't, in
6609     any case. So let's make sure... */
6610     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6611     bi->count,bi->optlevel,bi->handler_to_use,
6612     cache_tags[cl].handler);
6613     abort();
6614     }
6615    
6616     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6617     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6618     /* What the heck? We are not supposed to be here! */
6619     abort();
6620     }
6621     }
6622     if (bi->count==-1) {
6623     optlev++;
6624     while (!optcount[optlev])
6625     optlev++;
6626     bi->count=optcount[optlev]-1;
6627     }
6628 gbeauche 1.24 current_block_pc_p=(uintptr)pc_hist[0].location;
6629 gbeauche 1.1
6630     remove_deps(bi); /* We are about to create new code */
6631     bi->optlevel=optlev;
6632     bi->pc_p=(uae_u8*)pc_hist[0].location;
6633 gbeauche 1.8 #if USE_CHECKSUM_INFO
6634     free_checksum_info_chain(bi->csi);
6635     bi->csi = NULL;
6636     #endif
6637 gbeauche 1.1
6638     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6639     i=blocklen;
6640     while (i--) {
6641     uae_u16* currpcp=pc_hist[i].location;
6642     uae_u32 op=DO_GET_OPCODE(currpcp);
6643    
6644 gbeauche 1.8 #if USE_CHECKSUM_INFO
6645     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6646 gbeauche 1.33 if (follow_const_jumps && is_const_jump(op)) {
6647 gbeauche 1.8 checksum_info *csi = alloc_checksum_info();
6648     csi->start_p = (uae_u8 *)min_pcp;
6649     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6650     csi->next = bi->csi;
6651     bi->csi = csi;
6652 gbeauche 1.24 max_pcp = (uintptr)currpcp;
6653 gbeauche 1.8 }
6654 gbeauche 1.24 min_pcp = (uintptr)currpcp;
6655 gbeauche 1.8 #else
6656 gbeauche 1.24 if ((uintptr)currpcp<min_pcp)
6657     min_pcp=(uintptr)currpcp;
6658     if ((uintptr)currpcp>max_pcp)
6659     max_pcp=(uintptr)currpcp;
6660 gbeauche 1.8 #endif
6661 gbeauche 1.1
6662     liveflags[i]=((liveflags[i+1]&
6663     (~prop[op].set_flags))|
6664     prop[op].use_flags);
6665     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6666     liveflags[i]&= ~FLAG_Z;
6667     }
6668    
6669 gbeauche 1.8 #if USE_CHECKSUM_INFO
6670     checksum_info *csi = alloc_checksum_info();
6671     csi->start_p = (uae_u8 *)min_pcp;
6672     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6673     csi->next = bi->csi;
6674     bi->csi = csi;
6675     #endif
6676    
6677 gbeauche 1.1 bi->needed_flags=liveflags[0];
6678    
6679 gbeauche 1.5 align_target(align_loops);
6680 gbeauche 1.1 was_comp=0;
6681    
6682     bi->direct_handler=(cpuop_func *)get_target();
6683     set_dhtu(bi,bi->direct_handler);
6684     bi->status=BI_COMPILING;
6685 gbeauche 1.24 current_block_start_target=(uintptr)get_target();
6686 gbeauche 1.1
6687     log_startblock();
6688    
6689     if (bi->count>=0) { /* Need to generate countdown code */
6690 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6691     raw_sub_l_mi((uintptr)&(bi->count),1);
6692     raw_jl((uintptr)popall_recompile_block);
6693 gbeauche 1.1 }
6694     if (optlev==0) { /* No need to actually translate */
6695     /* Execute normally without keeping stats */
6696 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6697     raw_jmp((uintptr)popall_exec_nostats);
6698 gbeauche 1.1 }
6699     else {
6700     reg_alloc_run=0;
6701     next_pc_p=0;
6702     taken_pc_p=0;
6703     branch_cc=0;
6704    
6705     comp_pc_p=(uae_u8*)pc_hist[0].location;
6706     init_comp();
6707     was_comp=1;
6708    
6709 gbeauche 1.34 #ifdef USE_CPU_EMUL_SERVICES
6710     raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
6711     raw_jcc_b_oponly(NATIVE_CC_GT);
6712     uae_s8 *branchadd=(uae_s8*)get_target();
6713     emit_byte(0);
6714     raw_call((uintptr)cpu_do_check_ticks);
6715     *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
6716     #endif
6717    
6718 gbeauche 1.1 #if JIT_DEBUG
6719     if (JITDebug) {
6720 gbeauche 1.24 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6721     raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6722 gbeauche 1.1 }
6723     #endif
6724    
6725     for (i=0;i<blocklen &&
6726     get_target_noopt()<max_compile_start;i++) {
6727     cpuop_func **cputbl;
6728     compop_func **comptbl;
6729     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6730     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6731     if (!needed_flags) {
6732     cputbl=nfcpufunctbl;
6733     comptbl=nfcompfunctbl;
6734     }
6735     else {
6736     cputbl=cpufunctbl;
6737     comptbl=compfunctbl;
6738     }
6739 gbeauche 1.32
6740     #if FLIGHT_RECORDER
6741     {
6742     mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
6743     clobber_flags();
6744     remove_all_offsets();
6745     int arg = readreg_specific(S1,4,REG_PAR1);
6746     prepare_for_call_1();
6747     unlock2(arg);
6748     prepare_for_call_2();
6749     raw_call((uintptr)m68k_record_step);
6750     }
6751     #endif
6752 gbeauche 1.1
6753     failure = 1; // gb-- defaults to failure state
6754     if (comptbl[opcode] && optlev>1) {
6755     failure=0;
6756     if (!was_comp) {
6757     comp_pc_p=(uae_u8*)pc_hist[i].location;
6758     init_comp();
6759     }
6760 gbeauche 1.18 was_comp=1;
6761 gbeauche 1.1
6762     comptbl[opcode](opcode);
6763     freescratch();
6764     if (!(liveflags[i+1] & FLAG_CZNV)) {
6765     /* We can forget about flags */
6766     dont_care_flags();
6767     }
6768     #if INDIVIDUAL_INST
6769     flush(1);
6770     nop();
6771     flush(1);
6772     was_comp=0;
6773     #endif
6774     }
6775    
6776     if (failure) {
6777     if (was_comp) {
6778     flush(1);
6779     was_comp=0;
6780     }
6781     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6782     #if USE_NORMAL_CALLING_CONVENTION
6783     raw_push_l_r(REG_PAR1);
6784     #endif
6785 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,
6786     (uintptr)pc_hist[i].location);
6787     raw_call((uintptr)cputbl[opcode]);
6788 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6789     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6790 gbeauche 1.24 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6791 gbeauche 1.9 #endif
6792 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6793     raw_inc_sp(4);
6794     #endif
6795    
6796     if (i < blocklen - 1) {
6797     uae_s8* branchadd;
6798    
6799 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)specflags);
6800 gbeauche 1.1 raw_test_l_rr(0,0);
6801     raw_jz_b_oponly();
6802     branchadd=(uae_s8 *)get_target();
6803     emit_byte(0);
6804 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6805     *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6806 gbeauche 1.1 }
6807     }
6808     }
6809     #if 1 /* This isn't completely kosher yet; It really needs to be
6810     be integrated into a general inter-block-dependency scheme */
6811     if (next_pc_p && taken_pc_p &&
6812     was_comp && taken_pc_p==current_block_pc_p) {
6813     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6814     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6815     uae_u8 x=bi1->needed_flags;
6816    
6817     if (x==0xff || 1) { /* To be on the safe side */
6818     uae_u16* next=(uae_u16*)next_pc_p;
6819     uae_u32 op=DO_GET_OPCODE(next);
6820    
6821     x=0x1f;
6822     x&=(~prop[op].set_flags);
6823     x|=prop[op].use_flags;
6824     }
6825    
6826     x|=bi2->needed_flags;
6827     if (!(x & FLAG_CZNV)) {
6828     /* We can forget about flags */
6829     dont_care_flags();
6830     extra_len+=2; /* The next instruction now is part of this
6831     block */
6832     }
6833    
6834     }
6835     #endif
6836     log_flush();
6837    
6838     if (next_pc_p) { /* A branch was registered */
6839 gbeauche 1.24 uintptr t1=next_pc_p;
6840     uintptr t2=taken_pc_p;
6841 gbeauche 1.1 int cc=branch_cc;
6842    
6843     uae_u32* branchadd;
6844     uae_u32* tba;
6845     bigstate tmp;
6846     blockinfo* tbi;
6847    
6848     if (taken_pc_p<next_pc_p) {
6849     /* backward branch. Optimize for the "taken" case ---
6850     which means the raw_jcc should fall through when
6851     the 68k branch is taken. */
6852     t1=taken_pc_p;
6853     t2=next_pc_p;
6854     cc=branch_cc^1;
6855     }
6856    
6857     tmp=live; /* ouch! This is big... */
6858     raw_jcc_l_oponly(cc);
6859     branchadd=(uae_u32*)get_target();
6860     emit_long(0);
6861    
6862     /* predicted outcome */
6863     tbi=get_blockinfo_addr_new((void*)t1,1);
6864     match_states(tbi);
6865 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6866 gbeauche 1.1 raw_jcc_l_oponly(4);
6867     tba=(uae_u32*)get_target();
6868 gbeauche 1.24 emit_long(get_handler(t1)-((uintptr)tba+4));
6869     raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6870 gbeauche 1.28 flush_reg_count();
6871 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6872 gbeauche 1.1 create_jmpdep(bi,0,tba,t1);
6873    
6874 gbeauche 1.5 align_target(align_jumps);
6875 gbeauche 1.1 /* not-predicted outcome */
6876 gbeauche 1.24 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6877 gbeauche 1.1 live=tmp; /* Ouch again */
6878     tbi=get_blockinfo_addr_new((void*)t2,1);
6879     match_states(tbi);
6880    
6881     //flush(1); /* Can only get here if was_comp==1 */
6882 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6883 gbeauche 1.1 raw_jcc_l_oponly(4);
6884     tba=(uae_u32*)get_target();
6885 gbeauche 1.24 emit_long(get_handler(t2)-((uintptr)tba+4));
6886     raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6887 gbeauche 1.28 flush_reg_count();
6888 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6889 gbeauche 1.1 create_jmpdep(bi,1,tba,t2);
6890     }
6891     else
6892     {
6893     if (was_comp) {
6894     flush(1);
6895     }
6896 gbeauche 1.28 flush_reg_count();
6897 gbeauche 1.1
6898     /* Let's find out where next_handler is... */
6899     if (was_comp && isinreg(PC_P)) {
6900     r=live.state[PC_P].realreg;
6901     raw_and_l_ri(r,TAGMASK);
6902     int r2 = (r==0) ? 1 : 0;
6903 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6904     raw_cmp_l_mi((uintptr)specflags,0);
6905 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6906 gbeauche 1.1 raw_jmp_r(r2);
6907     }
6908     else if (was_comp && isconst(PC_P)) {
6909     uae_u32 v=live.state[PC_P].val;
6910     uae_u32* tba;
6911     blockinfo* tbi;
6912    
6913 gbeauche 1.24 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6914 gbeauche 1.1 match_states(tbi);
6915    
6916 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6917 gbeauche 1.1 raw_jcc_l_oponly(4);
6918     tba=(uae_u32*)get_target();
6919 gbeauche 1.24 emit_long(get_handler(v)-((uintptr)tba+4));
6920     raw_mov_l_mi((uintptr)&regs.pc_p,v);
6921     raw_jmp((uintptr)popall_do_nothing);
6922 gbeauche 1.1 create_jmpdep(bi,0,tba,v);
6923     }
6924     else {
6925     r=REG_PC_TMP;
6926 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6927 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6928     int r2 = (r==0) ? 1 : 0;
6929 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6930     raw_cmp_l_mi((uintptr)specflags,0);
6931 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6932 gbeauche 1.1 raw_jmp_r(r2);
6933     }
6934     }
6935     }
6936    
6937     #if USE_MATCH
6938     if (callers_need_recompile(&live,&(bi->env))) {
6939     mark_callers_recompile(bi);
6940     }
6941    
6942     big_to_small_state(&live,&(bi->env));
6943     #endif
6944    
6945 gbeauche 1.8 #if USE_CHECKSUM_INFO
6946     remove_from_list(bi);
6947     if (trace_in_rom) {
6948     // No need to checksum that block trace on cache invalidation
6949     free_checksum_info_chain(bi->csi);
6950     bi->csi = NULL;
6951     add_to_dormant(bi);
6952     }
6953     else {
6954     calc_checksum(bi,&(bi->c1),&(bi->c2));
6955     add_to_active(bi);
6956     }
6957     #else
6958 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
6959     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6960     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6961     else
6962     max_pcp+=LONGEST_68K_INST;
6963 gbeauche 1.7
6964 gbeauche 1.1 bi->len=max_pcp-min_pcp;
6965     bi->min_pcp=min_pcp;
6966 gbeauche 1.7
6967 gbeauche 1.1 remove_from_list(bi);
6968     if (isinrom(min_pcp) && isinrom(max_pcp)) {
6969     add_to_dormant(bi); /* No need to checksum it on cache flush.
6970     Please don't start changing ROMs in
6971     flight! */
6972     }
6973     else {
6974     calc_checksum(bi,&(bi->c1),&(bi->c2));
6975     add_to_active(bi);
6976     }
6977 gbeauche 1.8 #endif
6978 gbeauche 1.1
6979     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6980    
6981     #if JIT_DEBUG
6982     if (JITDebug)
6983     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6984    
6985     if (JITDebug && disasm_block) {
6986     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6987     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6988     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6989     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6990     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6991     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6992     getchar();
6993     }
6994     #endif
6995    
6996     log_dump();
6997 gbeauche 1.5 align_target(align_jumps);
6998 gbeauche 1.1
6999     /* This is the non-direct handler */
7000     bi->handler=
7001     bi->handler_to_use=(cpuop_func *)get_target();
7002 gbeauche 1.24 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
7003     raw_jnz((uintptr)popall_cache_miss);
7004 gbeauche 1.1 comp_pc_p=(uae_u8*)pc_hist[0].location;
7005    
7006     bi->status=BI_FINALIZING;
7007     init_comp();
7008     match_states(bi);
7009     flush(1);
7010    
7011 gbeauche 1.24 raw_jmp((uintptr)bi->direct_handler);
7012 gbeauche 1.1
7013     current_compile_p=get_target();
7014     raise_in_cl_list(bi);
7015    
7016     /* We will flush soon, anyway, so let's do it now */
7017     if (current_compile_p>=max_compile_start)
7018     flush_icache_hard(7);
7019    
7020     bi->status=BI_ACTIVE;
7021     if (redo_current_block)
7022     block_need_recompile(bi);
7023    
7024     #if PROFILE_COMPILE_TIME
7025     compile_time += (clock() - start_time);
7026     #endif
7027     }
7028 gbeauche 1.34
7029     /* Account for compilation time */
7030     cpu_do_check_ticks();
7031 gbeauche 1.1 }
7032    
7033     void do_nothing(void)
7034     {
7035     /* What did you expect this to do? */
7036     }
7037    
7038     void exec_nostats(void)
7039     {
7040     for (;;) {
7041     uae_u32 opcode = GET_OPCODE;
7042 gbeauche 1.32 #if FLIGHT_RECORDER
7043     m68k_record_step(m68k_getpc());
7044     #endif
7045 gbeauche 1.1 (*cpufunctbl[opcode])(opcode);
7046 gbeauche 1.34 cpu_check_ticks();
7047 gbeauche 1.1 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7048     return; /* We will deal with the spcflags in the caller */
7049     }
7050     }
7051     }
7052    
7053     void execute_normal(void)
7054     {
7055     if (!check_for_cache_miss()) {
7056     cpu_history pc_hist[MAXRUN];
7057     int blocklen = 0;
7058     #if REAL_ADDRESSING || DIRECT_ADDRESSING
7059     start_pc_p = regs.pc_p;
7060     start_pc = get_virtual_address(regs.pc_p);
7061     #else
7062     start_pc_p = regs.pc_oldp;
7063     start_pc = regs.pc;
7064     #endif
7065     for (;;) { /* Take note: This is the do-it-normal loop */
7066     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7067     uae_u32 opcode = GET_OPCODE;
7068     #if FLIGHT_RECORDER
7069     m68k_record_step(m68k_getpc());
7070     #endif
7071     (*cpufunctbl[opcode])(opcode);
7072 gbeauche 1.34 cpu_check_ticks();
7073 gbeauche 1.1 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7074     compile_block(pc_hist, blocklen);
7075     return; /* We will deal with the spcflags in the caller */
7076     }
7077     /* No need to check regs.spcflags, because if they were set,
7078     we'd have ended up inside that "if" */
7079     }
7080     }
7081     }
7082    
7083     typedef void (*compiled_handler)(void);
7084    
7085 gbeauche 1.36 static void m68k_do_compile_execute(void)
7086 gbeauche 1.1 {
7087     for (;;) {
7088     ((compiled_handler)(pushall_call_handler))();
7089     /* Whenever we return from that, we should check spcflags */
7090     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7091     if (m68k_do_specialties ())
7092     return;
7093     }
7094     }
7095     }
7096 gbeauche 1.35
7097     void m68k_compile_execute (void)
7098     {
7099     for (;;) {
7100     if (quit_program)
7101     break;
7102     m68k_do_compile_execute();
7103     }
7104     }